enh(stream): create update stream trans.
This commit is contained in:
parent
f3c5f20ee2
commit
d34b9af054
|
@ -32,8 +32,6 @@
|
||||||
#define MND_STREAM_VER_NUMBER 3
|
#define MND_STREAM_VER_NUMBER 3
|
||||||
#define MND_STREAM_RESERVE_SIZE 64
|
#define MND_STREAM_RESERVE_SIZE 64
|
||||||
#define MND_STREAM_MAX_NUM 60
|
#define MND_STREAM_MAX_NUM 60
|
||||||
#define MND_STREAM_HB_INTERVAL 100 // 100 sec
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct SNodeEntry {
|
typedef struct SNodeEntry {
|
||||||
int32_t nodeId;
|
int32_t nodeId;
|
||||||
|
@ -44,6 +42,7 @@ typedef struct SNodeEntry {
|
||||||
typedef struct SStreamVnodeRevertIndex {
|
typedef struct SStreamVnodeRevertIndex {
|
||||||
SArray* pDBList;
|
SArray* pDBList;
|
||||||
SArray* pNodeEntryList;
|
SArray* pNodeEntryList;
|
||||||
|
int64_t ts; // snapshot ts
|
||||||
} SStreamVnodeRevertIndex;
|
} SStreamVnodeRevertIndex;
|
||||||
|
|
||||||
static int32_t mndNodeCheckSentinel = 0;
|
static int32_t mndNodeCheckSentinel = 0;
|
||||||
|
@ -98,7 +97,7 @@ int32_t mndInitStream(SMnode *pMnode) {
|
||||||
mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp);
|
mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp);
|
||||||
mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr);
|
mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr);
|
||||||
mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint);
|
mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint);
|
||||||
mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb);
|
// mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb);
|
||||||
mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp);
|
mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp);
|
||||||
|
|
||||||
mndSetMsgHandle(pMnode, TDMT_MND_PAUSE_STREAM, mndProcessPauseStreamReq);
|
mndSetMsgHandle(pMnode, TDMT_MND_PAUSE_STREAM, mndProcessPauseStreamReq);
|
||||||
|
@ -863,7 +862,7 @@ static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) {
|
||||||
|
|
||||||
SRpcMsg rpcMsg = {
|
SRpcMsg rpcMsg = {
|
||||||
.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = sizeof(SMStreamDoCheckpointMsg)};
|
.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = sizeof(SMStreamDoCheckpointMsg)};
|
||||||
tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg);
|
// tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1740,6 +1739,7 @@ typedef struct SVgroupChangeInfo {
|
||||||
} SVgroupChangeInfo;
|
} SVgroupChangeInfo;
|
||||||
|
|
||||||
static void initNodeUpdateMsg(SStreamTaskNodeUpdateMsg* pMsg, const SVgroupChangeInfo* pInfo) {
|
static void initNodeUpdateMsg(SStreamTaskNodeUpdateMsg* pMsg, const SVgroupChangeInfo* pInfo) {
|
||||||
|
pMsg->pNodeList = taosArrayInit(taosArrayGetSize(pInfo->pUpdateNodeList), sizeof(SNodeUpdateInfo));
|
||||||
taosArrayAddAll(pMsg->pNodeList, pInfo->pUpdateNodeList);
|
taosArrayAddAll(pMsg->pNodeList, pInfo->pUpdateNodeList);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1821,7 +1821,6 @@ static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgr
|
||||||
}
|
}
|
||||||
|
|
||||||
mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid);
|
mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid);
|
||||||
ASSERT(0);
|
|
||||||
|
|
||||||
mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb);
|
mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb);
|
||||||
if (mndTransCheckConflict(pMnode, pTrans) != 0) {
|
if (mndTransCheckConflict(pMnode, pTrans) != 0) {
|
||||||
|
@ -1856,7 +1855,25 @@ static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgr
|
||||||
}
|
}
|
||||||
|
|
||||||
taosWUnLockLatch(&pStream->lock);
|
taosWUnLockLatch(&pStream->lock);
|
||||||
return mndPersistTransLog(pStream, pTrans);
|
|
||||||
|
int32_t code = mndPersistTransLog(pStream, pTrans);
|
||||||
|
if (code != TSDB_CODE_SUCCESS) {
|
||||||
|
sdbRelease(pMnode->pSdb, pStream);
|
||||||
|
mndTransDrop(pTrans);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mndTransPrepare(pMnode, pTrans) != 0) {
|
||||||
|
mError("trans:%d, failed to prepare update stream trans since %s", pTrans->id, terrstr());
|
||||||
|
sdbRelease(pMnode->pSdb, pStream);
|
||||||
|
mndTransDrop(pTrans);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sdbRelease(pMnode->pSdb, pStream);
|
||||||
|
mndTransDrop(pTrans);
|
||||||
|
|
||||||
|
return TSDB_CODE_ACTION_IN_PROGRESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo. 1. multiple change, 2. replica change problem
|
// todo. 1. multiple change, 2. replica change problem
|
||||||
|
@ -1943,6 +1960,7 @@ int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo* pChangeInfo) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mDebug("stream:0x%"PRIx64" involved node changed, create update trans", pStream->uid);
|
||||||
int32_t code = createStreamUpdateTrans(pMnode, pStream, pChangeInfo);
|
int32_t code = createStreamUpdateTrans(pMnode, pStream, pChangeInfo);
|
||||||
if (code != TSDB_CODE_SUCCESS) {
|
if (code != TSDB_CODE_SUCCESS) {
|
||||||
// todo
|
// todo
|
||||||
|
@ -1952,7 +1970,54 @@ int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo* pChangeInfo) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SArray* doExtractNodeList(SMnode *pMnode) {
|
||||||
|
SSdb *pSdb = pMnode->pSdb;
|
||||||
|
SStreamObj *pStream = NULL;
|
||||||
|
void *pIter = NULL;
|
||||||
|
|
||||||
|
SHashObj* pHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
|
||||||
|
while (1) {
|
||||||
|
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
|
||||||
|
if (pIter == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
taosWLockLatch(&pStream->lock);
|
||||||
|
int32_t numOfLevels = taosArrayGetSize(pStream->tasks);
|
||||||
|
|
||||||
|
for (int32_t j = 0; j < numOfLevels; ++j) {
|
||||||
|
SArray *pLevel = taosArrayGetP(pStream->tasks, j);
|
||||||
|
|
||||||
|
int32_t numOfTasks = taosArrayGetSize(pLevel);
|
||||||
|
for (int32_t k = 0; k < numOfTasks; ++k) {
|
||||||
|
SStreamTask *pTask = taosArrayGetP(pLevel, k);
|
||||||
|
SNodeEntry entry = {0};
|
||||||
|
epsetAssign(&entry.epset, &pTask->info.epSet);
|
||||||
|
entry.nodeId = pTask->info.nodeId;
|
||||||
|
entry.hbTimestamp = -1;
|
||||||
|
|
||||||
|
taosHashPut(pHash, &entry.nodeId, sizeof(entry.nodeId), &entry, sizeof(entry));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
taosWUnLockLatch(&pStream->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
SArray* plist = taosArrayInit(taosHashGetSize(pHash), sizeof(SNodeEntry));
|
||||||
|
|
||||||
|
// convert to list
|
||||||
|
pIter = NULL;
|
||||||
|
while((pIter = taosHashIterate(pHash, pIter)) != NULL) {
|
||||||
|
SNodeEntry* pEntry = (SNodeEntry*) pIter;
|
||||||
|
taosArrayPush(plist, pEntry);
|
||||||
|
}
|
||||||
|
taosHashCleanup(pHash);
|
||||||
|
|
||||||
|
return plist;
|
||||||
|
}
|
||||||
|
|
||||||
static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
|
static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
|
||||||
|
return 0;
|
||||||
int32_t old = atomic_val_compare_exchange_32(&mndNodeCheckSentinel, 0, 1);
|
int32_t old = atomic_val_compare_exchange_32(&mndNodeCheckSentinel, 0, 1);
|
||||||
if (old != 0) {
|
if (old != 0) {
|
||||||
mDebug("still in checking node change");
|
mDebug("still in checking node change");
|
||||||
|
@ -1962,98 +2027,111 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
|
||||||
mDebug("start to do node change checking");
|
mDebug("start to do node change checking");
|
||||||
|
|
||||||
SMnode *pMnode = pMsg->info.node;
|
SMnode *pMnode = pMsg->info.node;
|
||||||
|
if (execNodeList.pNodeEntryList == NULL) {
|
||||||
|
execNodeList.pNodeEntryList = doExtractNodeList(pMnode);
|
||||||
|
}
|
||||||
|
|
||||||
SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode);
|
SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode);
|
||||||
|
int64_t ts = taosGetTimestampSec();
|
||||||
|
|
||||||
SVgroupChangeInfo changeInfo = mndFindChangedVgroupInfo(pMnode, execNodeList.pNodeEntryList, pNodeSnapshot);
|
SVgroupChangeInfo changeInfo = mndFindChangedVgroupInfo(pMnode, execNodeList.pNodeEntryList, pNodeSnapshot);
|
||||||
|
|
||||||
if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) {
|
if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) {
|
||||||
mndProcessVgroupChange(pMnode, &changeInfo);
|
mndProcessVgroupChange(pMnode, &changeInfo);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
taosArrayDestroy(changeInfo.pUpdateNodeList);
|
||||||
|
taosHashCleanup(changeInfo.pDBMap);
|
||||||
|
|
||||||
|
// keep the new vnode snapshot
|
||||||
|
taosArrayDestroy(execNodeList.pNodeEntryList);
|
||||||
|
execNodeList.pNodeEntryList = pNodeSnapshot;
|
||||||
|
execNodeList.ts = ts;
|
||||||
|
|
||||||
mDebug("end to do node change checking");
|
mDebug("end to do node change checking");
|
||||||
|
atomic_store_32(&mndNodeCheckSentinel, 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo: this process should be executed by the write queue worker of the mnode
|
// todo: this process should be executed by the write queue worker of the mnode
|
||||||
int32_t mndProcessStreamHb(SRpcMsg *pReq) {
|
//int32_t mndProcessStreamHb(SRpcMsg *pReq) {
|
||||||
SMnode *pMnode = pReq->info.node;
|
// SMnode *pMnode = pReq->info.node;
|
||||||
SSdb *pSdb = pMnode->pSdb;
|
// SSdb *pSdb = pMnode->pSdb;
|
||||||
SStreamHbMsg req = {0};
|
// SStreamHbMsg req = {0};
|
||||||
int32_t code = TSDB_CODE_SUCCESS;
|
// int32_t code = TSDB_CODE_SUCCESS;
|
||||||
|
//
|
||||||
SDecoder decoder = {0};
|
// SDecoder decoder = {0};
|
||||||
tDecoderInit(&decoder, (uint8_t *)pReq->pCont, pReq->contLen);
|
// tDecoderInit(&decoder, (uint8_t *)pReq->pCont, pReq->contLen);
|
||||||
|
//
|
||||||
if (tStartDecode(&decoder) < 0) return -1;
|
// if (tStartDecode(&decoder) < 0) return -1;
|
||||||
|
//
|
||||||
if (tDecodeStreamHbMsg(&decoder, &req) < 0) {
|
// if (tDecodeStreamHbMsg(&decoder, &req) < 0) {
|
||||||
terrno = TSDB_CODE_INVALID_MSG;
|
// terrno = TSDB_CODE_INVALID_MSG;
|
||||||
return -1;
|
// return -1;
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
int64_t now = taosGetTimestampSec();
|
// int64_t now = taosGetTimestampSec();
|
||||||
mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d", req.vgId, req.numOfTasks);
|
// mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d", req.vgId, req.numOfTasks);
|
||||||
|
//
|
||||||
// timeout list
|
// // timeout list
|
||||||
bool nodeChanged = false;
|
// bool nodeChanged = false;
|
||||||
SArray* pList = taosArrayInit(4, sizeof(int32_t));
|
// SArray* pList = taosArrayInit(4, sizeof(int32_t));
|
||||||
|
//
|
||||||
// record the timeout node
|
// // record the timeout node
|
||||||
for(int32_t i = 0; i < taosArrayGetSize(execNodeList.pNodeEntryList); ++i) {
|
// for(int32_t i = 0; i < taosArrayGetSize(execNodeList.pNodeEntryList); ++i) {
|
||||||
SNodeEntry* pEntry = taosArrayGet(execNodeList.pNodeEntryList, i);
|
// SNodeEntry* pEntry = taosArrayGet(execNodeList.pNodeEntryList, i);
|
||||||
int64_t duration = now - pEntry->hbTimestamp;
|
// int64_t duration = now - pEntry->hbTimestamp;
|
||||||
if (duration > MND_STREAM_HB_INTERVAL) { // execNode timeout, try next
|
// if (duration > MND_STREAM_HB_INTERVAL) { // execNode timeout, try next
|
||||||
taosArrayPush(pList, &pEntry);
|
// taosArrayPush(pList, &pEntry);
|
||||||
mWarn("nodeId:%d stream node timeout, since last hb:%"PRId64"s", pEntry->nodeId, duration);
|
// mWarn("nodeId:%d stream node timeout, since last hb:%"PRId64"s", pEntry->nodeId, duration);
|
||||||
continue;
|
// continue;
|
||||||
}
|
|
||||||
|
|
||||||
if (pEntry->nodeId != req.vgId) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
pEntry->hbTimestamp = now;
|
|
||||||
|
|
||||||
// check epset to identify whether the node has been transferred to other dnodes.
|
|
||||||
// node the epset is changed, which means the node transfer has occurred for this node.
|
|
||||||
// if (!isEpsetEqual(&pEntry->epset, &req.epset)) {
|
|
||||||
// nodeChanged = true;
|
|
||||||
// break;
|
|
||||||
// }
|
// }
|
||||||
}
|
//
|
||||||
|
// if (pEntry->nodeId != req.vgId) {
|
||||||
// todo handle the node timeout case. Once the vnode is off-line, we should check the dnode status from mnode,
|
// continue;
|
||||||
// to identify whether the dnode is truely offline or not.
|
// }
|
||||||
|
//
|
||||||
// handle the node changed case
|
// pEntry->hbTimestamp = now;
|
||||||
if (!nodeChanged) {
|
//
|
||||||
return TSDB_CODE_SUCCESS;
|
// // check epset to identify whether the node has been transferred to other dnodes.
|
||||||
}
|
// // node the epset is changed, which means the node transfer has occurred for this node.
|
||||||
|
//// if (!isEpsetEqual(&pEntry->epset, &req.epset)) {
|
||||||
int32_t nodeId = req.vgId;
|
//// nodeChanged = true;
|
||||||
|
//// break;
|
||||||
{// check all streams that involved this vnode should update the epset info
|
//// }
|
||||||
SStreamObj *pStream = NULL;
|
// }
|
||||||
void *pIter = NULL;
|
//
|
||||||
while (1) {
|
// // todo handle the node timeout case. Once the vnode is off-line, we should check the dnode status from mnode,
|
||||||
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
|
// // to identify whether the dnode is truely offline or not.
|
||||||
if (pIter == NULL) {
|
//
|
||||||
break;
|
// // handle the node changed case
|
||||||
}
|
// if (!nodeChanged) {
|
||||||
|
// return TSDB_CODE_SUCCESS;
|
||||||
// update the related upstream and downstream tasks, todo remove this, no need this function
|
// }
|
||||||
taosWLockLatch(&pStream->lock);
|
//
|
||||||
// streamTaskUpdateEpInfo(pStream->tasks, req.vgId, &req.epset);
|
// int32_t nodeId = req.vgId;
|
||||||
// streamTaskUpdateEpInfo(pStream->pHTasksList, req.vgId, &req.epset);
|
//
|
||||||
taosWUnLockLatch(&pStream->lock);
|
// {// check all streams that involved this vnode should update the epset info
|
||||||
|
// SStreamObj *pStream = NULL;
|
||||||
// code = createStreamUpdateTrans(pMnode, pStream, nodeId, );
|
// void *pIter = NULL;
|
||||||
// if (code != TSDB_CODE_SUCCESS) {
|
// while (1) {
|
||||||
// todo
|
// pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
|
||||||
|
// if (pIter == NULL) {
|
||||||
|
// break;
|
||||||
// }
|
// }
|
||||||
}
|
//
|
||||||
}
|
// // update the related upstream and downstream tasks, todo remove this, no need this function
|
||||||
|
// taosWLockLatch(&pStream->lock);
|
||||||
return TSDB_CODE_SUCCESS;
|
//// streamTaskUpdateEpInfo(pStream->tasks, req.vgId, &req.epset);
|
||||||
}
|
//// streamTaskUpdateEpInfo(pStream->pHTasksList, req.vgId, &req.epset);
|
||||||
|
// taosWUnLockLatch(&pStream->lock);
|
||||||
|
//
|
||||||
|
//// code = createStreamUpdateTrans(pMnode, pStream, nodeId, );
|
||||||
|
//// if (code != TSDB_CODE_SUCCESS) {
|
||||||
|
//// todo
|
||||||
|
//// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return TSDB_CODE_SUCCESS;
|
||||||
|
//}
|
||||||
|
|
|
@ -658,6 +658,7 @@ int32_t streamTryExec(SStreamTask* pTask) {
|
||||||
qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus),
|
qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus),
|
||||||
pTask->status.schedStatus);
|
pTask->status.schedStatus);
|
||||||
|
|
||||||
|
// the inputQ is empty due to the checkpoint process, so we need to scan data from WAL here.
|
||||||
if ((!streamTaskShouldStop(&pTask->status)) && (!streamTaskShouldPause(&pTask->status))) {
|
if ((!streamTaskShouldStop(&pTask->status)) && (!streamTaskShouldPause(&pTask->status))) {
|
||||||
streamSchedExec(pTask);
|
streamSchedExec(pTask);
|
||||||
}
|
}
|
||||||
|
|
|
@ -547,6 +547,7 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void metaHbToMnode(void* param, void* tmrId) {
|
void metaHbToMnode(void* param, void* tmrId) {
|
||||||
|
#if 0
|
||||||
SStreamMeta* pMeta = param;
|
SStreamMeta* pMeta = param;
|
||||||
SStreamHbMsg hbMsg = {0};
|
SStreamHbMsg hbMsg = {0};
|
||||||
|
|
||||||
|
@ -592,4 +593,5 @@ void metaHbToMnode(void* param, void* tmrId) {
|
||||||
|
|
||||||
// next hb will be issued in 20sec.
|
// next hb will be issued in 20sec.
|
||||||
taosTmrReset(metaHbToMnode, 20000, pMeta, streamEnv.timer, pMeta->hbTmr);
|
taosTmrReset(metaHbToMnode, 20000, pMeta, streamEnv.timer, pMeta->hbTmr);
|
||||||
|
#endif
|
||||||
}
|
}
|
Loading…
Reference in New Issue