diff --git a/.gitignore b/.gitignore index 5f1e24109d..65e03e1933 100644 --- a/.gitignore +++ b/.gitignore @@ -127,4 +127,5 @@ tools/THANKS tools/NEWS tools/COPYING tools/BUGS -tools/taos-tools \ No newline at end of file +tools/taos-tools +tools/taosws-rs diff --git a/include/common/ttypes.h b/include/common/ttypes.h index eace9d7dd6..9ff315fc2e 100644 --- a/include/common/ttypes.h +++ b/include/common/ttypes.h @@ -340,7 +340,7 @@ typedef struct tDataTypeDescriptor { } tDataTypeDescriptor; extern tDataTypeDescriptor tDataTypes[TSDB_DATA_TYPE_MAX]; -bool isValidDataType(int32_t type); +bool isValidDataType(int32_t type); void assignVal(char *val, const char *src, int32_t len, int32_t type); void operateVal(void *dst, void *s1, void *s2, int32_t optr, int32_t type); diff --git a/include/dnode/mnode/mnode.h b/include/dnode/mnode/mnode.h index cdb1642a5c..c9e47c25b7 100644 --- a/include/dnode/mnode/mnode.h +++ b/include/dnode/mnode/mnode.h @@ -99,6 +99,7 @@ int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad); */ int32_t mndProcessRpcMsg(SRpcMsg *pMsg); int32_t mndProcessSyncMsg(SRpcMsg *pMsg); +int32_t mndProcessSyncCtrlMsg(SRpcMsg *pMsg); int32_t mndPreProcessQueryMsg(SRpcMsg *pMsg); void mndPostProcessQueryMsg(SRpcMsg *pMsg); diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 0f8220f19c..ff14e637d0 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -35,7 +35,12 @@ extern bool gRaftDetailLog; #define SYNC_MAX_PROGRESS_WAIT_MS 4000 #define SYNC_MAX_START_TIME_RANGE_MS (1000 * 20) #define SYNC_MAX_RECV_TIME_RANGE_MS 1200 +#define SYNC_DEL_WAL_MS (1000 * 60) #define SYNC_ADD_QUORUM_COUNT 3 +#define SYNC_MNODE_LOG_RETENTION 10000 +#define SYNC_VNODE_LOG_RETENTION 500 + +#define SYNC_APPEND_ENTRIES_TIMEOUT_MS 10000 #define SYNC_MAX_BATCH_SIZE 1 #define SYNC_INDEX_BEGIN 0 @@ -157,32 +162,15 @@ typedef struct SSyncLogStore { SLRUCache* pCache; void* data; - // append one log entry - int32_t (*appendEntry)(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); - - // get one log entry, user need to free pEntry->pCont - SSyncRaftEntry* (*getEntry)(struct SSyncLogStore* pLogStore, SyncIndex index); - - // truncate log with index, entries after the given index (>=index) will be deleted - int32_t (*truncate)(struct SSyncLogStore* pLogStore, SyncIndex fromIndex); - - // return index of last entry - SyncIndex (*getLastIndex)(struct SSyncLogStore* pLogStore); - - // return term of last entry - SyncTerm (*getLastTerm)(struct SSyncLogStore* pLogStore); - - // update log store commit index with "index" - int32_t (*updateCommitIndex)(struct SSyncLogStore* pLogStore, SyncIndex index); - - // return commit index of log - SyncIndex (*getCommitIndex)(struct SSyncLogStore* pLogStore); + int32_t (*syncLogUpdateCommitIndex)(struct SSyncLogStore* pLogStore, SyncIndex index); + SyncIndex (*syncLogCommitIndex)(struct SSyncLogStore* pLogStore); SyncIndex (*syncLogBeginIndex)(struct SSyncLogStore* pLogStore); SyncIndex (*syncLogEndIndex)(struct SSyncLogStore* pLogStore); - bool (*syncLogIsEmpty)(struct SSyncLogStore* pLogStore); + int32_t (*syncLogEntryCount)(struct SSyncLogStore* pLogStore); int32_t (*syncLogRestoreFromSnapshot)(struct SSyncLogStore* pLogStore, SyncIndex index); + bool (*syncLogIsEmpty)(struct SSyncLogStore* pLogStore); bool (*syncLogExist)(struct SSyncLogStore* pLogStore, SyncIndex index); SyncIndex (*syncLogWriteIndex)(struct SSyncLogStore* pLogStore); @@ -207,6 +195,7 @@ typedef struct SSyncInfo { SMsgCb* msgcb; int32_t (*FpSendMsg)(const SEpSet* pEpSet, SRpcMsg* pMsg); int32_t (*FpEqMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg); + int32_t (*FpEqCtrlMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg); } SSyncInfo; int32_t syncInit(); @@ -217,7 +206,6 @@ void syncStop(int64_t rid); int32_t syncSetStandby(int64_t rid); ESyncState syncGetMyRole(int64_t rid); bool syncIsReady(int64_t rid); -bool syncIsReadyForRead(int64_t rid); const char* syncGetMyRoleStr(int64_t rid); bool syncRestoreFinish(int64_t rid); SyncTerm syncGetMyTerm(int64_t rid); @@ -227,7 +215,7 @@ SyncGroupId syncGetVgId(int64_t rid); void syncGetEpSet(int64_t rid, SEpSet* pEpSet); void syncGetRetryEpSet(int64_t rid, SEpSet* pEpSet); int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak); -int32_t syncProposeBatch(int64_t rid, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize); +// int32_t syncProposeBatch(int64_t rid, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize); bool syncEnvIsStart(); const char* syncStr(ESyncState state); bool syncIsRestoreFinish(int64_t rid); @@ -241,6 +229,9 @@ int32_t syncReconfigBuild(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg int32_t syncLeaderTransfer(int64_t rid); int32_t syncLeaderTransferTo(int64_t rid, SNodeInfo newLeader); +int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex); +int32_t syncEndSnapshot(int64_t rid); + #ifdef __cplusplus } #endif diff --git a/include/libs/sync/syncTools.h b/include/libs/sync/syncTools.h index b2c743831a..d5c015bfb2 100644 --- a/include/libs/sync/syncTools.h +++ b/include/libs/sync/syncTools.h @@ -157,6 +157,8 @@ typedef enum ESyncTimeoutType { SYNC_TIMEOUT_HEARTBEAT, } ESyncTimeoutType; +const char* syncTimerTypeStr(enum ESyncTimeoutType timerType); + typedef struct SyncTimeout { uint32_t bytes; int32_t vgId; @@ -423,6 +425,7 @@ typedef struct SyncAppendEntriesReply { SyncTerm privateTerm; bool success; SyncIndex matchIndex; + SyncIndex lastSendIndex; int64_t startTime; } SyncAppendEntriesReply; @@ -456,6 +459,8 @@ typedef struct SyncHeartbeat { SyncTerm term; SyncIndex commitIndex; SyncTerm privateTerm; + SyncTerm minMatchIndex; + } SyncHeartbeat; SyncHeartbeat* syncHeartbeatBuild(int32_t vgId); @@ -676,24 +681,17 @@ void syncReconfigFinishLog2(char* s, const SyncReconfigFinish* pMsg); // on message ---------------------- int32_t syncNodeOnPingCb(SSyncNode* ths, SyncPing* pMsg); int32_t syncNodeOnPingReplyCb(SSyncNode* ths, SyncPingReply* pMsg); -int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg); -int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncIndex* pRetIndex); -int32_t syncNodeOnClientRequestBatchCb(SSyncNode* ths, SyncClientRequestBatch* pMsg); -int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg); -int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg); -int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg); -int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); +int32_t syncNodeOnTimer(SSyncNode* ths, SyncTimeout* pMsg); -int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg); -int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg); -int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg); -int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); - -int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg); -int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); - -int32_t syncNodeOnSnapshotSendCb(SSyncNode* ths, SyncSnapshotSend* pMsg); -int32_t syncNodeOnSnapshotRspCb(SSyncNode* ths, SyncSnapshotRsp* pMsg); +int32_t syncNodeOnHeartbeat(SSyncNode* ths, SyncHeartbeat* pMsg); +int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, SyncHeartbeatReply* pMsg); +int32_t syncNodeOnClientRequest(SSyncNode* ths, SyncClientRequest* pMsg, SyncIndex* pRetIndex); +int32_t syncNodeOnRequestVote(SSyncNode* ths, SyncRequestVote* pMsg); +int32_t syncNodeOnRequestVoteReply(SSyncNode* ths, SyncRequestVoteReply* pMsg); +int32_t syncNodeOnAppendEntries(SSyncNode* ths, SyncAppendEntries* pMsg); +int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, SyncAppendEntriesReply* pMsg); +int32_t syncNodeOnSnapshot(SSyncNode* ths, SyncSnapshotSend* pMsg); +int32_t syncNodeOnSnapshotReply(SSyncNode* ths, SyncSnapshotRsp* pMsg); int32_t syncNodeOnHeartbeat(SSyncNode* ths, SyncHeartbeat* pMsg); int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, SyncHeartbeatReply* pMsg); @@ -707,8 +705,8 @@ typedef int32_t (*FpOnRequestVoteReplyCb)(SSyncNode* ths, SyncRequestVoteReply* typedef int32_t (*FpOnAppendEntriesCb)(SSyncNode* ths, SyncAppendEntries* pMsg); typedef int32_t (*FpOnAppendEntriesReplyCb)(SSyncNode* ths, SyncAppendEntriesReply* pMsg); typedef int32_t (*FpOnTimeoutCb)(SSyncNode* pSyncNode, SyncTimeout* pMsg); -typedef int32_t (*FpOnSnapshotSendCb)(SSyncNode* ths, SyncSnapshotSend* pMsg); -typedef int32_t (*FpOnSnapshotRspCb)(SSyncNode* ths, SyncSnapshotRsp* pMsg); +typedef int32_t (*FpOnSnapshotCb)(SSyncNode* ths, SyncSnapshotSend* pMsg); +typedef int32_t (*FpOnSnapshotReplyCb)(SSyncNode* ths, SyncSnapshotRsp* pMsg); // option ---------------------------------- bool syncNodeSnapshotEnable(SSyncNode* pSyncNode); diff --git a/packaging/tools/post.sh b/packaging/tools/post.sh index 708a93c4fa..37307f0543 100755 --- a/packaging/tools/post.sh +++ b/packaging/tools/post.sh @@ -118,6 +118,7 @@ function kill_taosd() { } function install_include() { + ${csudo}mkdir -p ${inc_link_dir} ${csudo}rm -f ${inc_link_dir}/taos.h ${inc_link_dir}/taosdef.h ${inc_link_dir}/taoserror.h ${inc_link_dir}/taosudf.h || : [ -f ${inc_link_dir}/taosws.h ] && ${csudo}rm -f ${inc_link_dir}/taosws.h ||: diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index beff06700b..0bf254e455 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1897,7 +1897,7 @@ int32_t setResultDataPtr(SReqResultInfo* pResultInfo, TAOS_FIELD* pFields, int32 // check fields for (int32_t i = 0; i < numOfCols; ++i) { - int16_t type = *(int16_t*)p; + int8_t type = *(int8_t*)p; p += sizeof(int8_t); int32_t bytes = *(int32_t*)p; diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index 945562ef36..e586018c19 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -46,38 +46,38 @@ int32_t genericRspCallback(void* param, SDataBuf* pMsg, int32_t code) { int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { SRequestObj* pRequest = param; + if (code != TSDB_CODE_SUCCESS) { - taosMemoryFree(pMsg->pEpSet); - taosMemoryFree(pMsg->pData); setErrno(pRequest, code); tsem_post(&pRequest->body.rspSem); - return code; + goto End; } STscObj* pTscObj = pRequest->pTscObj; SConnectRsp connectRsp = {0}; - tDeserializeSConnectRsp(pMsg->pData, pMsg->len, &connectRsp); + if (tDeserializeSConnectRsp(pMsg->pData, pMsg->len, &connectRsp) != 0) { + code = TSDB_CODE_TSC_INVALID_VERSION; + setErrno(pRequest, code); + tsem_post(&pRequest->body.rspSem); + goto End; + } int32_t now = taosGetTimestampSec(); int32_t delta = abs(now - connectRsp.svrTimestamp); if (delta > timestampDeltaLimit) { code = TSDB_CODE_TIME_UNSYNCED; tscError("time diff:%ds is too big", delta); - taosMemoryFree(pMsg->pEpSet); - taosMemoryFree(pMsg->pData); setErrno(pRequest, code); tsem_post(&pRequest->body.rspSem); - return code; + goto End; } /*assert(connectRsp.epSet.numOfEps > 0);*/ if (connectRsp.epSet.numOfEps == 0) { - taosMemoryFree(pMsg->pEpSet); - taosMemoryFree(pMsg->pData); setErrno(pRequest, TSDB_CODE_MND_APP_ERROR); tsem_post(&pRequest->body.rspSem); - return code; + goto End; } if (connectRsp.dnodeNum == 1) { @@ -113,14 +113,15 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { hbRegisterConn(pTscObj->pAppInfo->pAppHbMgr, pTscObj->id, connectRsp.clusterId, connectRsp.connType); - // pRequest->body.resInfo.pRspMsg = pMsg->pData; tscDebug("0x%" PRIx64 " clusterId:%" PRId64 ", totalConn:%" PRId64, pRequest->requestId, connectRsp.clusterId, pTscObj->pAppInfo->numOfConns); - taosMemoryFree(pMsg->pData); - taosMemoryFree(pMsg->pEpSet); tsem_post(&pRequest->body.rspSem); - return 0; +End: + + taosMemoryFree(pMsg->pEpSet); + taosMemoryFree(pMsg->pData); + return code; } SMsgSendInfo* buildMsgInfoImpl(SRequestObj* pRequest) { @@ -194,7 +195,7 @@ int32_t processUseDbRsp(void* param, SDataBuf* pMsg, int32_t code) { SUseDbRsp usedbRsp = {0}; tDeserializeSUseDbRsp(pMsg->pData, pMsg->len, &usedbRsp); - if(strlen(usedbRsp.db) == 0){ + if (strlen(usedbRsp.db) == 0) { return TSDB_CODE_MND_DB_NOT_EXIST; } @@ -292,7 +293,7 @@ int32_t processDropDbRsp(void* param, SDataBuf* pMsg, int32_t code) { tDeserializeSDropDbRsp(pMsg->pData, pMsg->len, &dropdbRsp); struct SCatalog* pCatalog = NULL; - int32_t code = catalogGetHandle(pRequest->pTscObj->pAppInfo->clusterId, &pCatalog); + int32_t code = catalogGetHandle(pRequest->pTscObj->pAppInfo->clusterId, &pCatalog); if (TSDB_CODE_SUCCESS == code) { catalogRemoveDB(pCatalog, dropdbRsp.db, dropdbRsp.uid); } diff --git a/source/client/src/clientTmqConnector.c b/source/client/src/clientTmqConnector.c index 6edf76124e..42988b16fe 100644 --- a/source/client/src/clientTmqConnector.c +++ b/source/client/src/clientTmqConnector.c @@ -73,8 +73,8 @@ JNIEXPORT void JNICALL Java_com_taosdata_jdbc_tmq_TMQConnector_tmqConfDestroyImp if (conf == NULL) { jniDebug("jobj:%p, tmq config is already destroyed", jobj); } else { - tmq_conf_destroy(conf); jniDebug("jobj:%p, config:%p, tmq successfully destroy config", jobj, conf); + tmq_conf_destroy(conf); } } @@ -206,6 +206,7 @@ JNIEXPORT void JNICALL Java_com_taosdata_jdbc_tmq_TMQConnector_tmqCommitAsync(JN tmq_t *tmq = (tmq_t *)jtmq; if (tmq == NULL) { jniError("jobj:%p, tmq is closed", jobj); + return; } TAOS_RES *res = (TAOS_RES *)jres; consumer = (*env)->NewGlobalRef(env, consumer); @@ -252,6 +253,7 @@ JNIEXPORT jstring JNICALL Java_com_taosdata_jdbc_tmq_TMQConnector_tmqGetTopicNam TAOS_RES *res = (TAOS_RES *)jres; if (res == NULL) { jniDebug("jobj:%p, invalid res handle", jobj); + return NULL; } return (*env)->NewStringUTF(env, tmq_get_topic_name(res)); } @@ -259,6 +261,7 @@ JNIEXPORT jstring JNICALL Java_com_taosdata_jdbc_tmq_TMQConnector_tmqGetDbName(J TAOS_RES *res = (TAOS_RES *)jres; if (res == NULL) { jniDebug("jobj:%p, invalid res handle", jobj); + return NULL; } return (*env)->NewStringUTF(env, tmq_get_db_name(res)); } @@ -266,6 +269,7 @@ JNIEXPORT jint JNICALL Java_com_taosdata_jdbc_tmq_TMQConnector_tmqGetVgroupId(JN TAOS_RES *res = (TAOS_RES *)jres; if (res == NULL) { jniDebug("jobj:%p, invalid res handle", jobj); + return -1; } return tmq_get_vgroup_id(res); } @@ -275,6 +279,7 @@ JNIEXPORT jstring JNICALL Java_com_taosdata_jdbc_tmq_TMQConnector_tmqGetTableNam TAOS_RES *res = (TAOS_RES *)jres; if (res == NULL) { jniDebug("jobj:%p, invalid res handle", jobj); + return NULL; } return (*env)->NewStringUTF(env, tmq_get_table_name(res)); } diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index fda1a05290..64f56212af 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -2030,21 +2030,24 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SSDataBlock* pDataB tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NORM, var, true, offset, k); + } else if (colDataIsNull_s(pColInfoData, j)) { + tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NULL, NULL, + false, offset, k); } else { tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NORM, var, true, offset, k); } break; - case TSDB_DATA_TYPE_NCHAR: { - void* data = colDataGetData(pColInfoData, j); - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, TSDB_DATA_TYPE_NCHAR, TD_VTYPE_NORM, data, true, - offset, k); - break; - } + case TSDB_DATA_TYPE_NCHAR: case TSDB_DATA_TYPE_VARCHAR: { // TSDB_DATA_TYPE_BINARY - void* data = colDataGetData(pColInfoData, j); - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, TSDB_DATA_TYPE_VARCHAR, TD_VTYPE_NORM, data, true, - offset, k); + if (colDataIsNull_s(pColInfoData, j)) { + tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pColInfoData->info.type, TD_VTYPE_NULL, NULL, + false, offset, k); + } else { + void* data = colDataGetData(pColInfoData, j); + tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pColInfoData->info.type, TD_VTYPE_NORM, data, + true, offset, k); + } break; } case TSDB_DATA_TYPE_VARBINARY: @@ -2057,7 +2060,10 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SSDataBlock* pDataB break; default: if (pColInfoData->info.type < TSDB_DATA_TYPE_MAX && pColInfoData->info.type > TSDB_DATA_TYPE_NULL) { - if (pCol->type == pColInfoData->info.type) { + if (colDataIsNull_s(pColInfoData, j)) { + tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NULL, NULL, false, offset, + k); + } else if (pCol->type == pColInfoData->info.type) { tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, var, true, offset, k); } else { diff --git a/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h b/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h index 6d06535447..b47742b4ed 100644 --- a/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h +++ b/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h @@ -34,6 +34,7 @@ typedef struct SMnodeMgmt { SSingleWorker readWorker; SSingleWorker writeWorker; SSingleWorker syncWorker; + SSingleWorker syncCtrlWorker; bool stopped; int32_t refCount; TdThreadRwlock lock; @@ -53,6 +54,7 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt); void mmStopWorker(SMnodeMgmt *pMgmt); int32_t mmPutMsgToWriteQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); +int32_t mmPutMsgToSyncCtrlQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutMsgToQueryQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutMsgToFetchQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 4c1b307b90..89b68febd5 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -197,6 +197,9 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_SEND, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_RSP, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_HEARTBEAT, mmPutMsgToSyncCtrlQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_HEARTBEAT_REPLY, mmPutMsgToSyncCtrlQueue, 1) == NULL) goto _OVER; + code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c index 16e6f67409..e0a39a6bf1 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c @@ -67,6 +67,24 @@ static void mmProcessRpcMsg(SQueueInfo *pInfo, SRpcMsg *pMsg) { taosFreeQitem(pMsg); } +static void mmProcessSyncCtrlMsg(SQueueInfo *pInfo, SRpcMsg *pMsg) { + SMnodeMgmt *pMgmt = pInfo->ahandle; + pMsg->info.node = pMgmt->pMnode; + + const STraceId *trace = &pMsg->info.traceId; + dGTrace("msg:%p, get from mnode-sync-ctrl queue", pMsg); + + SMsgHead *pHead = pMsg->pCont; + pHead->contLen = ntohl(pHead->contLen); + pHead->vgId = ntohl(pHead->vgId); + + int32_t code = mndProcessSyncCtrlMsg(pMsg); + + dGTrace("msg:%p, is freed, code:0x%x", pMsg, code); + rpcFreeCont(pMsg->pCont); + taosFreeQitem(pMsg); +} + static void mmProcessSyncMsg(SQueueInfo *pInfo, SRpcMsg *pMsg) { SMnodeMgmt *pMgmt = pInfo->ahandle; pMsg->info.node = pMgmt->pMnode; @@ -108,6 +126,10 @@ int32_t mmPutMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { return mmPutMsgToWorker(pMgmt, &pMgmt->syncWorker, pMsg); } +int32_t mmPutMsgToSyncCtrlQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { + return mmPutMsgToWorker(pMgmt, &pMgmt->syncCtrlWorker, pMsg); +} + int32_t mmPutMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { return mmPutMsgToWorker(pMgmt, &pMgmt->readWorker, pMsg); } @@ -144,6 +166,9 @@ int32_t mmPutMsgToQueue(SMnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { case SYNC_QUEUE: pWorker = &pMgmt->syncWorker; break; + case SYNC_CTRL_QUEUE: + pWorker = &pMgmt->syncCtrlWorker; + break; default: terrno = TSDB_CODE_INVALID_PARA; } @@ -223,6 +248,18 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt) { return -1; } + SSingleWorkerCfg scCfg = { + .min = 1, + .max = 1, + .name = "mnode-sync-ctrl", + .fp = (FItem)mmProcessSyncCtrlMsg, + .param = pMgmt, + }; + if (tSingleWorkerInit(&pMgmt->syncCtrlWorker, &scCfg) != 0) { + dError("failed to start mnode mnode-sync-ctrl worker since %s", terrstr()); + return -1; + } + dDebug("mnode workers are initialized"); return 0; } @@ -235,5 +272,6 @@ void mmStopWorker(SMnodeMgmt *pMgmt) { tSingleWorkerCleanup(&pMgmt->readWorker); tSingleWorkerCleanup(&pMgmt->writeWorker); tSingleWorkerCleanup(&pMgmt->syncWorker); + tSingleWorkerCleanup(&pMgmt->syncCtrlWorker); dDebug("mnode workers are closed"); } diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index b81e2d886d..d682fa9cc5 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -66,7 +66,7 @@ void vmGetMonitorInfo(SVnodeMgmt *pMgmt, SMonVmInfo *pInfo) { pInfo->vstat.totalVnodes = totalVnodes; pInfo->vstat.masterNum = masterNum; - pInfo->vstat.numOfSelectReqs = numOfSelectReqs - pMgmt->state.numOfSelectReqs; + pInfo->vstat.numOfSelectReqs = numOfSelectReqs; pInfo->vstat.numOfInsertReqs = numOfInsertReqs; // delta pInfo->vstat.numOfInsertSuccessReqs = numOfInsertSuccessReqs; // delta pInfo->vstat.numOfBatchInsertReqs = numOfBatchInsertReqs; // delta @@ -188,7 +188,7 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { req.walRollPeriod, req.walSegmentSize, req.hashMethod, req.hashBegin, req.hashEnd, req.hashPrefix, req.hashSuffix, req.replica, req.selfIndex, req.strict); for (int32_t i = 0; i < req.replica; ++i) { - dInfo("vgId:%d, replica:%d fqdn:%s port:%u", req.vgId, req.replicas[i].id, req.replicas[i].fqdn, + dInfo("vgId:%d, replica:%d id:%d fqdn:%s port:%u", req.vgId, i, req.replicas[i].id, req.replicas[i].fqdn, req.replicas[i].port); } vmGenerateVnodeCfg(&req, &vnodeCfg); @@ -273,7 +273,17 @@ int32_t vmProcessAlterVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { } int32_t vgId = alterReq.vgId; - dInfo("vgId:%d, start to alter vnode replica", alterReq.vgId); + dInfo("vgId:%d, start to alter vnode, replica:%d selfIndex:%d strict:%d", alterReq.vgId, alterReq.replica, + alterReq.selfIndex, alterReq.strict); + for (int32_t i = 0; i < alterReq.replica; ++i) { + dInfo("vgId:%d, replica:%d ep:%s:%u", alterReq.vgId, i, alterReq.replicas[i].fqdn, alterReq.replicas[i].port); + } + + if (alterReq.replica <= 0 || alterReq.selfIndex < 0 || alterReq.selfIndex >= alterReq.replica) { + terrno = TSDB_CODE_INVALID_MSG; + dError("vgId:%d, failed to alter replica since invalid msg", alterReq.vgId); + return -1; + } SVnodeObj *pVnode = vmAcquireVnode(pMgmt, vgId); if (pVnode == NULL) { diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index f825407b45..f36604eb27 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -241,6 +241,8 @@ static void *vmCloseVnodeInThread(void *param) { static void vmCloseVnodes(SVnodeMgmt *pMgmt) { dInfo("start to close all vnodes"); + tSingleWorkerCleanup(&pMgmt->mgmtWorker); + dInfo("vnodes mgmt worker is stopped"); int32_t numOfVnodes = 0; SVnodeObj **ppVnodes = vmGetVnodeListFromHash(pMgmt, &numOfVnodes); diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 8cf89b7f35..76d181761b 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -234,11 +234,9 @@ static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtyp return code; } -int32_t vmPutMsgToSyncQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_QUEUE); } +int32_t vmPutMsgToSyncCtrlQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_CTRL_QUEUE); } -int32_t vmPutMsgToSyncCtrlQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { - return vmPutMsgToQueue(pMgmt, pMsg, SYNC_CTRL_QUEUE); -} +int32_t vmPutMsgToSyncQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_QUEUE); } int32_t vmPutMsgToWriteQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, WRITE_QUEUE); } @@ -405,7 +403,6 @@ int32_t vmStartWorker(SVnodeMgmt *pMgmt) { } void vmStopWorker(SVnodeMgmt *pMgmt) { - tSingleWorkerCleanup(&pMgmt->mgmtWorker); tWWorkerCleanup(&pMgmt->writePool); tWWorkerCleanup(&pMgmt->applyPool); tWWorkerCleanup(&pMgmt->syncPool); diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index ccfd7e4a2d..fbfa1b73be 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -250,6 +250,7 @@ static int32_t mndInitSdb(SMnode *pMnode) { opt.path = pMnode->path; opt.pMnode = pMnode; opt.pWal = pMnode->pWal; + opt.sync = pMnode->syncMgmt.sync; pMnode->pSdb = sdbInit(&opt); if (pMnode->pSdb == NULL) { @@ -381,6 +382,7 @@ SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) { mError("failed to open mnode since %s", terrstr()); return NULL; } + memset(pMnode, 0, sizeof(SMnode)); char timestr[24] = "1970-01-01 00:00:00.00"; (void)taosParseTime(timestr, &pMnode->checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, 0); @@ -474,6 +476,45 @@ void mndStop(SMnode *pMnode) { mndCleanupTimer(pMnode); } +int32_t mndProcessSyncCtrlMsg(SRpcMsg *pMsg) { + SMnode *pMnode = pMsg->info.node; + SSyncMgmt *pMgmt = &pMnode->syncMgmt; + int32_t code = 0; + + mInfo("vgId:%d, process sync ctrl msg", 1); + + if (!syncEnvIsStart()) { + mError("failed to process sync msg:%p type:%s since syncEnv stop", pMsg, TMSG_INFO(pMsg->msgType)); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + return -1; + } + + SSyncNode *pSyncNode = syncNodeAcquire(pMgmt->sync); + if (pSyncNode == NULL) { + mError("failed to process sync msg:%p type:%s since syncNode is null", pMsg, TMSG_INFO(pMsg->msgType)); + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + return -1; + } + + if (pMsg->msgType == TDMT_SYNC_HEARTBEAT) { + SyncHeartbeat *pSyncMsg = syncHeartbeatFromRpcMsg2(pMsg); + code = syncNodeOnHeartbeat(pSyncNode, pSyncMsg); + syncHeartbeatDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_HEARTBEAT_REPLY) { + SyncHeartbeatReply *pSyncMsg = syncHeartbeatReplyFromRpcMsg2(pMsg); + code = syncNodeOnHeartbeatReply(pSyncNode, pSyncMsg); + syncHeartbeatReplyDestroy(pSyncMsg); + } + + syncNodeRelease(pSyncNode); + + if (code != 0) { + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + } + return code; +} + int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; SSyncMgmt *pMgmt = &pMnode->syncMgmt; @@ -492,89 +533,64 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { return -1; } - // ToDo: ugly! use function pointer - if (syncNodeStrategy(pSyncNode) == SYNC_STRATEGY_STANDARD_SNAPSHOT) { - if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { - SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); - code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); - syncTimeoutDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING) { - SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); - code = syncNodeOnPingCb(pSyncNode, pSyncMsg); - syncPingDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { - SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); - code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); - syncPingReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { - SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); - code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL); - syncClientRequestDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { - SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); - code = syncNodeOnRequestVoteSnapshotCb(pSyncNode, pSyncMsg); - syncRequestVoteDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { - SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); - code = syncNodeOnRequestVoteReplySnapshotCb(pSyncNode, pSyncMsg); - syncRequestVoteReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { - SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); - code = syncNodeOnAppendEntriesSnapshotCb(pSyncNode, pSyncMsg); - syncAppendEntriesDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { - SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); - code = syncNodeOnAppendEntriesReplySnapshotCb(pSyncNode, pSyncMsg); - syncAppendEntriesReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { - SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pMsg); - code = syncNodeOnSnapshotSendCb(pSyncNode, pSyncMsg); - syncSnapshotSendDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { - SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pMsg); - code = syncNodeOnSnapshotRspCb(pSyncNode, pSyncMsg); - syncSnapshotRspDestroy(pSyncMsg); - } else { - mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType)); - code = -1; - } + if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { + SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); + code = syncNodeOnTimer(pSyncNode, pSyncMsg); + syncTimeoutDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_PING) { + SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); + code = syncNodeOnPingCb(pSyncNode, pSyncMsg); + syncPingDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { + SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); + code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); + syncPingReplyDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { + SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); + code = syncNodeOnClientRequest(pSyncNode, pSyncMsg, NULL); + syncClientRequestDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { + SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); + code = syncNodeOnRequestVote(pSyncNode, pSyncMsg); + syncRequestVoteDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { + SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); + code = syncNodeOnRequestVoteReply(pSyncNode, pSyncMsg); + syncRequestVoteReplyDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { + SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); + code = syncNodeOnAppendEntries(pSyncNode, pSyncMsg); + syncAppendEntriesDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { + SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); + code = syncNodeOnAppendEntriesReply(pSyncNode, pSyncMsg); + syncAppendEntriesReplyDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { + SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pMsg); + code = syncNodeOnSnapshot(pSyncNode, pSyncMsg); + syncSnapshotSendDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { + SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pMsg); + code = syncNodeOnSnapshotReply(pSyncNode, pSyncMsg); + syncSnapshotRspDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_SET_MNODE_STANDBY) { + code = syncSetStandby(pMgmt->sync); + SRpcMsg rsp = {.code = code, .info = pMsg->info}; + tmsgSendRsp(&rsp); + } else { - if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { - SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); - code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); - syncTimeoutDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING) { - SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); - code = syncNodeOnPingCb(pSyncNode, pSyncMsg); - syncPingDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { - SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); - code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); - syncPingReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { - SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); - code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL); - syncClientRequestDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { - SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); - code = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg); - syncRequestVoteDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { - SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); - code = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg); - syncRequestVoteReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { - SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); - code = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg); - syncAppendEntriesDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { - SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); - code = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg); - syncAppendEntriesReplyDestroy(pSyncMsg); - } else { - mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType)); - code = -1; - } + mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType)); + code = -1; } syncNodeRelease(pSyncNode); diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index ac95dd2795..cd6fe380e1 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -71,8 +71,8 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM mInfo("trans:%d, is proposed and post sem", transId); } pMgmt->transId = 0; - taosWUnLockLatch(&pMgmt->lock); tsem_post(&pMgmt->syncSem); + taosWUnLockLatch(&pMgmt->lock); } else { taosWUnLockLatch(&pMgmt->lock); STrans *pTrans = mndAcquireTrans(pMnode, transId); @@ -113,27 +113,7 @@ void mndRestoreFinish(struct SSyncFSM *pFsm) { } } -void mndReConfig(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SReConfigCbMeta *cbMeta) { - SMnode *pMnode = pFsm->data; - SSyncMgmt *pMgmt = &pMnode->syncMgmt; - - pMgmt->errCode = cbMeta->code; - mInfo("trans:-1, sync reconfig is proposed, saved:%d code:0x%x, index:%" PRId64 " term:%" PRId64, pMgmt->transId, - cbMeta->code, cbMeta->index, cbMeta->term); - - taosWLockLatch(&pMgmt->lock); - if (pMgmt->transId == -1) { - if (pMgmt->errCode != 0) { - mError("trans:-1, failed to propose sync reconfig since %s, post sem", tstrerror(pMgmt->errCode)); - } else { - mInfo("trans:-1, sync reconfig is proposed, saved:%d code:0x%x, index:%" PRId64 " term:%" PRId64 " post sem", - pMgmt->transId, cbMeta->code, cbMeta->index, cbMeta->term); - } - pMgmt->transId = 0; - tsem_post(&pMgmt->syncSem); - } - taosWUnLockLatch(&pMgmt->lock); -} +void mndReConfig(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SReConfigCbMeta *cbMeta) {} int32_t mndSnapshotStartRead(struct SSyncFSM *pFsm, void *pParam, void **ppReader) { mInfo("start to read snapshot from sdb"); @@ -179,11 +159,14 @@ void mndLeaderTransfer(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cb static void mndBecomeFollower(struct SSyncFSM *pFsm) { SMnode *pMnode = pFsm->data; - mInfo("vgId:1, become follower and post sem"); + mInfo("vgId:1, become follower"); taosWLockLatch(&pMnode->syncMgmt.lock); if (pMnode->syncMgmt.transId != 0) { + mInfo("vgId:1, become follower and post sem, trans:%d, failed to propose since not leader", + pMnode->syncMgmt.transId); pMnode->syncMgmt.transId = 0; + pMnode->syncMgmt.errCode = TSDB_CODE_SYN_NOT_LEADER; tsem_post(&pMnode->syncMgmt.syncSem); } taosWUnLockLatch(&pMnode->syncMgmt.lock); @@ -292,6 +275,7 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) { int32_t code = syncPropose(pMgmt->sync, &req, isWeak); if (code == 0) { + mInfo("trans:%d, is proposing and wait sem", pMgmt->transId); tsem_wait(&pMgmt->syncSem); } else if (code > 0) { mInfo("trans:%d, confirm at once since replica is 1, continue execute", transId); @@ -301,12 +285,16 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) { sdbWriteWithoutFree(pMnode->pSdb, pRaw); sdbSetApplyInfo(pMnode->pSdb, req.info.conn.applyIndex, req.info.conn.applyTerm, SYNC_INDEX_INVALID); code = 0; - } else if (code == -1 && terrno == TSDB_CODE_SYN_NOT_LEADER) { - terrno = TSDB_CODE_APP_NOT_READY; - } else if (code == -1 && terrno == TSDB_CODE_SYN_INTERNAL_ERROR) { - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; } else { - terrno = TSDB_CODE_APP_ERROR; + taosWLockLatch(&pMgmt->lock); + mInfo("trans:%d, failed to proposed since %s", transId, terrstr()); + pMgmt->transId = 0; + taosWUnLockLatch(&pMgmt->lock); + if (terrno == TSDB_CODE_SYN_NOT_LEADER) { + terrno = TSDB_CODE_APP_NOT_READY; + } else { + terrno = TSDB_CODE_APP_ERROR; + } } rpcFreeCont(req.pCont); @@ -315,6 +303,7 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) { return code; } + if (pMgmt->errCode != 0) terrno = pMgmt->errCode; return pMgmt->errCode; } @@ -328,6 +317,7 @@ void mndSyncStart(SMnode *pMnode) { void mndSyncStop(SMnode *pMnode) { taosWLockLatch(&pMnode->syncMgmt.lock); if (pMnode->syncMgmt.transId != 0) { + mInfo("vgId:1, is stopped and post sem, trans:%d", pMnode->syncMgmt.transId); pMnode->syncMgmt.transId = 0; tsem_post(&pMnode->syncMgmt.syncSem); } diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 5bbd64e04a..6f430ad3e4 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -778,7 +778,7 @@ static int32_t mndTransSync(SMnode *pMnode, STrans *pTrans) { mInfo("trans:%d, sync to other mnodes, stage:%s", pTrans->id, mndTransStr(pTrans->stage)); int32_t code = mndSyncPropose(pMnode, pRaw, pTrans->id); if (code != 0) { - mError("trans:%d, failed to sync since %s", pTrans->id, terrstr()); + mError("trans:%d, failed to sync, errno:%s code:%s", pTrans->id, terrstr(), tstrerror(code)); sdbFreeRaw(pRaw); return -1; } diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 11f5a9f12a..f46fc5d23e 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -341,6 +341,12 @@ static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *p alterReq.selfIndex = v; } } + alterReq.replica = pVgroup->replica; + mInfo("vgId:%d, start to alter vnode, replica:%d selfIndex:%d strict:%d", alterReq.vgId, alterReq.replica, + alterReq.selfIndex, alterReq.strict); + for (int32_t i = 0; i < alterReq.replica; ++i) { + mInfo("vgId:%d, replica:%d ep:%s:%u", alterReq.vgId, i, alterReq.replicas[i].fqdn, alterReq.replicas[i].port); + } if (alterReq.selfIndex == -1) { terrno = TSDB_CODE_MND_APP_ERROR; diff --git a/source/dnode/mnode/impl/test/CMakeLists.txt b/source/dnode/mnode/impl/test/CMakeLists.txt index 7db8485561..a002b20bde 100644 --- a/source/dnode/mnode/impl/test/CMakeLists.txt +++ b/source/dnode/mnode/impl/test/CMakeLists.txt @@ -1,7 +1,7 @@ enable_testing() add_subdirectory(acct) -add_subdirectory(db) +#add_subdirectory(db) #add_subdirectory(dnode) add_subdirectory(func) #add_subdirectory(mnode) @@ -14,4 +14,4 @@ add_subdirectory(snode) add_subdirectory(stb) add_subdirectory(topic) add_subdirectory(trans) -add_subdirectory(user) +#add_subdirectory(user) diff --git a/source/dnode/mnode/impl/test/user/CMakeLists.txt b/source/dnode/mnode/impl/test/user/CMakeLists.txt index 23c67bc6ca..6e2301fca0 100644 --- a/source/dnode/mnode/impl/test/user/CMakeLists.txt +++ b/source/dnode/mnode/impl/test/user/CMakeLists.txt @@ -5,7 +5,8 @@ target_link_libraries( PUBLIC sut ) -# add_test( -# NAME userTest -# COMMAND userTest -# ) + +#add_test( +# NAME userTest +# COMMAND userTest +#) diff --git a/source/dnode/mnode/sdb/CMakeLists.txt b/source/dnode/mnode/sdb/CMakeLists.txt index 2001a70da2..186c85004a 100644 --- a/source/dnode/mnode/sdb/CMakeLists.txt +++ b/source/dnode/mnode/sdb/CMakeLists.txt @@ -5,5 +5,5 @@ target_include_directories( PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/inc" ) target_link_libraries( - sdb os common util wal -) \ No newline at end of file + sdb os common util wal sync +) diff --git a/source/dnode/mnode/sdb/inc/sdb.h b/source/dnode/mnode/sdb/inc/sdb.h index d4db4709ca..a6d81ecc0d 100644 --- a/source/dnode/mnode/sdb/inc/sdb.h +++ b/source/dnode/mnode/sdb/inc/sdb.h @@ -169,6 +169,7 @@ typedef struct SSdbRow { typedef struct SSdb { SMnode *pMnode; SWal *pWal; + int64_t sync; char *currDir; char *tmpDir; int64_t commitIndex; @@ -212,6 +213,7 @@ typedef struct SSdbOpt { const char *path; SMnode *pMnode; SWal *pWal; + int64_t sync; } SSdbOpt; /** diff --git a/source/dnode/mnode/sdb/src/sdb.c b/source/dnode/mnode/sdb/src/sdb.c index e73fd28e71..648ccff432 100644 --- a/source/dnode/mnode/sdb/src/sdb.c +++ b/source/dnode/mnode/sdb/src/sdb.c @@ -53,6 +53,7 @@ SSdb *sdbInit(SSdbOpt *pOption) { } pSdb->pWal = pOption->pWal; + pSdb->sync = pOption->sync; pSdb->applyIndex = -1; pSdb->applyTerm = -1; pSdb->applyConfig = -1; diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index 5eedcc545a..f2a18b7212 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "sdb.h" +#include "sync.h" #include "tchecksum.h" #include "wal.h" @@ -456,14 +457,25 @@ int32_t sdbWriteFile(SSdb *pSdb, int32_t delta) { taosThreadMutexLock(&pSdb->filelock); if (pSdb->pWal != NULL) { - code = walBeginSnapshot(pSdb->pWal, pSdb->applyIndex); + // code = walBeginSnapshot(pSdb->pWal, pSdb->applyIndex); + if (pSdb->sync == 0) { + code = 0; + } else { + code = syncBeginSnapshot(pSdb->sync, pSdb->applyIndex); + } } if (code == 0) { code = sdbWriteFileImp(pSdb); } if (code == 0) { if (pSdb->pWal != NULL) { - code = walEndSnapshot(pSdb->pWal); + // code = walEndSnapshot(pSdb->pWal); + + if (pSdb->sync == 0) { + code = 0; + } else { + code = syncEndSnapshot(pSdb->sync); + } } } if (code != 0) { diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index be0d6fdc4d..099a039b15 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -120,7 +120,11 @@ typedef struct SMetaFltParam { } SMetaFltParam; +// TODO, refactor later int32_t metaFilterTableIds(SMeta *pMeta, SMetaFltParam *param, SArray *results); +int32_t metaFilterCreateTime(SMeta *pMeta, SMetaFltParam *parm, SArray *pUids); +int32_t metaFilterTableName(SMeta *pMeta, SMetaFltParam *param, SArray *pUids); +int32_t metaFilterTtl(SMeta *pMeta, SMetaFltParam *param, SArray *pUids); #if 1 // refact APIs below (TODO) typedef SVCreateTbReq STbCfg; diff --git a/source/dnode/vnode/src/inc/meta.h b/source/dnode/vnode/src/inc/meta.h index dbe2f80150..9e2fe4aaf0 100644 --- a/source/dnode/vnode/src/inc/meta.h +++ b/source/dnode/vnode/src/inc/meta.h @@ -86,8 +86,12 @@ struct SMeta { TTB* pSuidIdx; // ivt idx and idx void* pTagIvtIdx; - TTB* pTagIdx; - TTB* pTtlIdx; + + TTB* pTagIdx; + TTB* pTtlIdx; + + TTB* pCtimeIdx; // table created time idx + TTB* pNcolIdx; // ncol of table idx, normal table only TTB* pSmaIdx; @@ -142,6 +146,16 @@ typedef struct { int64_t smaUid; } SSmaIdxKey; +typedef struct { + int64_t ctime; + tb_uid_t uid; +} SCtimeIdxKey; + +typedef struct { + int64_t ncol; + tb_uid_t uid; +} SNcolIdxKey; + // metaTable ================== int metaCreateTagIdxKey(tb_uid_t suid, int32_t cid, const void* pTagData, int32_t nTagData, int8_t type, tb_uid_t uid, STagIdxKey** ppTagIdxKey, int32_t* nTagIdxKey); diff --git a/source/dnode/vnode/src/meta/metaOpen.c b/source/dnode/vnode/src/meta/metaOpen.c index 2198033db9..1b5f742559 100644 --- a/source/dnode/vnode/src/meta/metaOpen.c +++ b/source/dnode/vnode/src/meta/metaOpen.c @@ -24,6 +24,9 @@ static int uidIdxKeyCmpr(const void *pKey1, int kLen1, const void *pKey2, int kL static int smaIdxKeyCmpr(const void *pKey1, int kLen1, const void *pKey2, int kLen2); static int taskIdxKeyCmpr(const void *pKey1, int kLen1, const void *pKey2, int kLen2); +static int ctimeIdxCmpr(const void *pKey1, int kLen1, const void *pKey2, int kLen2); +static int ncolIdxCmpr(const void *pKey1, int kLen1, const void *pKey2, int kLen2); + static int32_t metaInitLock(SMeta *pMeta) { return taosThreadRwlockInit(&pMeta->lock, NULL); } static int32_t metaDestroyLock(SMeta *pMeta) { return taosThreadRwlockDestroy(&pMeta->lock); } @@ -139,6 +142,20 @@ int metaOpen(SVnode *pVnode, SMeta **ppMeta, int8_t rollback) { goto _err; } + // idx table create time + ret = tdbTbOpen("ctime.idx", sizeof(SCtimeIdxKey), 0, ctimeIdxCmpr, pMeta->pEnv, &pMeta->pCtimeIdx, 0); + if (ret < 0) { + metaError("vgId:%d, failed to open meta ctime index since %s", TD_VID(pVnode), tstrerror(terrno)); + goto _err; + } + + // idx num of col, normal table only + ret = tdbTbOpen("ncol.idx", sizeof(SNcolIdxKey), 0, ncolIdxCmpr, pMeta->pEnv, &pMeta->pNcolIdx, 0); + if (ret < 0) { + metaError("vgId:%d, failed to open meta ncol index since %s", TD_VID(pVnode), tstrerror(terrno)); + goto _err; + } + ret = tdbTbOpen("stream.task.db", sizeof(int64_t), -1, taskIdxKeyCmpr, pMeta->pEnv, &pMeta->pStreamDb, 0); if (ret < 0) { metaError("vgId:%d, failed to open meta stream task index since %s", TD_VID(pVnode), tstrerror(terrno)); @@ -166,6 +183,8 @@ int metaOpen(SVnode *pVnode, SMeta **ppMeta, int8_t rollback) { _err: if (pMeta->pIdx) metaCloseIdx(pMeta); if (pMeta->pStreamDb) tdbTbClose(pMeta->pStreamDb); + if (pMeta->pNcolIdx) tdbTbClose(pMeta->pNcolIdx); + if (pMeta->pCtimeIdx) tdbTbClose(pMeta->pCtimeIdx); if (pMeta->pSmaIdx) tdbTbClose(pMeta->pSmaIdx); if (pMeta->pTtlIdx) tdbTbClose(pMeta->pTtlIdx); if (pMeta->pTagIvtIdx) indexClose(pMeta->pTagIvtIdx); @@ -187,6 +206,8 @@ int metaClose(SMeta *pMeta) { if (pMeta->pCache) metaCacheClose(pMeta); if (pMeta->pIdx) metaCloseIdx(pMeta); if (pMeta->pStreamDb) tdbTbClose(pMeta->pStreamDb); + if (pMeta->pNcolIdx) tdbTbClose(pMeta->pNcolIdx); + if (pMeta->pCtimeIdx) tdbTbClose(pMeta->pCtimeIdx); if (pMeta->pSmaIdx) tdbTbClose(pMeta->pSmaIdx); if (pMeta->pTtlIdx) tdbTbClose(pMeta->pTtlIdx); if (pMeta->pTagIvtIdx) indexClose(pMeta->pTagIvtIdx); @@ -391,6 +412,43 @@ static int ttlIdxKeyCmpr(const void *pKey1, int kLen1, const void *pKey2, int kL return 0; } +static int ctimeIdxCmpr(const void *pKey1, int kLen1, const void *pKey2, int kLen2) { + SCtimeIdxKey *pCtimeIdxKey1 = (SCtimeIdxKey *)pKey1; + SCtimeIdxKey *pCtimeIdxKey2 = (SCtimeIdxKey *)pKey2; + if (pCtimeIdxKey1->ctime > pCtimeIdxKey2->ctime) { + return 1; + } else if (pCtimeIdxKey1->ctime < pCtimeIdxKey2->ctime) { + return -1; + } + + if (pCtimeIdxKey1->uid > pCtimeIdxKey2->uid) { + return 1; + } else if (pCtimeIdxKey1->uid < pCtimeIdxKey2->uid) { + return -1; + } + + return 0; +} + +static int ncolIdxCmpr(const void *pKey1, int kLen1, const void *pKey2, int kLen2) { + SNcolIdxKey *pNcolIdxKey1 = (SNcolIdxKey *)pKey1; + SNcolIdxKey *pNcolIdxKey2 = (SNcolIdxKey *)pKey2; + + if (pNcolIdxKey1->ncol > pNcolIdxKey2->ncol) { + return 1; + } else if (pNcolIdxKey1->ncol < pNcolIdxKey2->ncol) { + return -1; + } + + if (pNcolIdxKey1->uid > pNcolIdxKey2->uid) { + return 1; + } else if (pNcolIdxKey1->uid < pNcolIdxKey2->uid) { + return -1; + } + + return 0; +} + static int smaIdxKeyCmpr(const void *pKey1, int kLen1, const void *pKey2, int kLen2) { SSmaIdxKey *pSmaIdxKey1 = (SSmaIdxKey *)pKey1; SSmaIdxKey *pSmaIdxKey2 = (SSmaIdxKey *)pKey2; diff --git a/source/dnode/vnode/src/meta/metaQuery.c b/source/dnode/vnode/src/meta/metaQuery.c index 87c788cc37..0727d32674 100644 --- a/source/dnode/vnode/src/meta/metaQuery.c +++ b/source/dnode/vnode/src/meta/metaQuery.c @@ -1038,6 +1038,143 @@ typedef struct { int32_t vLen; } SIdxCursor; +int32_t metaFilterCreateTime(SMeta *pMeta, SMetaFltParam *param, SArray *pUids) { + int32_t ret = 0; + + SIdxCursor *pCursor = NULL; + pCursor = (SIdxCursor *)taosMemoryCalloc(1, sizeof(SIdxCursor)); + pCursor->pMeta = pMeta; + pCursor->suid = param->suid; + pCursor->cid = param->cid; + pCursor->type = param->type; + + metaRLock(pMeta); + ret = tdbTbcOpen(pMeta->pCtimeIdx, &pCursor->pCur, NULL); + if (ret != 0) { + goto END; + } + int64_t uidLimit = param->reverse ? INT64_MAX : 0; + + SCtimeIdxKey ctimeKey = {.ctime = *(int64_t *)(param->val), .uid = uidLimit}; + SCtimeIdxKey *pCtimeKey = &ctimeKey; + + int cmp = 0; + if (tdbTbcMoveTo(pCursor->pCur, &ctimeKey, sizeof(ctimeKey), &cmp) < 0) { + goto END; + } + bool first = true; + int32_t valid = 0; + while (1) { + void *entryKey = NULL; + int32_t nEntryKey = -1; + valid = tdbTbcGet(pCursor->pCur, (const void **)&entryKey, &nEntryKey, NULL, NULL); + if (valid < 0) break; + + SCtimeIdxKey *p = entryKey; + + int32_t cmp = (*param->filterFunc)((void *)&p->ctime, (void *)&pCtimeKey->ctime, param->type); + if (cmp == 0) taosArrayPush(pUids, &p->uid); + if (cmp == -1) break; + valid = param->reverse ? tdbTbcMoveToPrev(pCursor->pCur) : tdbTbcMoveToNext(pCursor->pCur); + if (valid < 0) break; + } + +END: + if (pCursor->pMeta) metaULock(pCursor->pMeta); + if (pCursor->pCur) tdbTbcClose(pCursor->pCur); + taosMemoryFree(pCursor); + return ret; +} + +int32_t metaFilterTableName(SMeta *pMeta, SMetaFltParam *param, SArray *pUids) { + int32_t ret = 0; + char *buf = NULL; + + STagIdxKey *pKey = NULL; + int32_t nKey = 0; + + SIdxCursor *pCursor = NULL; + pCursor = (SIdxCursor *)taosMemoryCalloc(1, sizeof(SIdxCursor)); + pCursor->pMeta = pMeta; + pCursor->suid = param->suid; + pCursor->cid = param->cid; + pCursor->type = param->type; + + char *pName = param->val; + + metaRLock(pMeta); + ret = tdbTbcOpen(pMeta->pNameIdx, &pCursor->pCur, NULL); + if (ret != 0) { + goto END; + } + + int cmp = 0; + if (tdbTbcMoveTo(pCursor->pCur, pName, strlen(pName) + 1, &cmp) < 0) { + goto END; + } + bool first = true; + int32_t valid = 0; + while (1) { + void *pEntryKey = NULL, *pEntryVal = NULL; + int32_t nEntryKey = -1, nEntryVal = 0; + valid = tdbTbcGet(pCursor->pCur, (const void **)pEntryKey, &nEntryKey, (const void **)&pEntryVal, &nEntryVal); + if (valid < 0) break; + + char *pTableKey = (char *)pEntryKey; + int32_t cmp = (*param->filterFunc)(pTableKey, pName, pCursor->type); + if (cmp == 0) { + tb_uid_t tuid = *(tb_uid_t *)pEntryVal; + taosArrayPush(pUids, &tuid); + } else if (cmp == 1) { + // next + } else { + break; + } + valid = param->reverse ? tdbTbcMoveToPrev(pCursor->pCur) : tdbTbcMoveToNext(pCursor->pCur); + if (valid < 0) { + break; + } + } + +END: + if (pCursor->pMeta) metaULock(pCursor->pMeta); + if (pCursor->pCur) tdbTbcClose(pCursor->pCur); + taosMemoryFree(buf); + taosMemoryFree(pKey); + + taosMemoryFree(pCursor); + + return ret; +} +int32_t metaFilterTtl(SMeta *pMeta, SMetaFltParam *param, SArray *pUids) { + int32_t ret = 0; + char *buf = NULL; + + STtlIdxKey *pKey = NULL; + int32_t nKey = 0; + + SIdxCursor *pCursor = NULL; + pCursor = (SIdxCursor *)taosMemoryCalloc(1, sizeof(SIdxCursor)); + pCursor->pMeta = pMeta; + pCursor->suid = param->suid; + pCursor->cid = param->cid; + pCursor->type = param->type; + + metaRLock(pMeta); + ret = tdbTbcOpen(pMeta->pTtlIdx, &pCursor->pCur, NULL); + +END: + if (pCursor->pMeta) metaULock(pCursor->pMeta); + if (pCursor->pCur) tdbTbcClose(pCursor->pCur); + taosMemoryFree(buf); + taosMemoryFree(pKey); + + taosMemoryFree(pCursor); + + return ret; + // impl later + return 0; +} int32_t metaFilterTableIds(SMeta *pMeta, SMetaFltParam *param, SArray *pUids) { int32_t ret = 0; char *buf = NULL; @@ -1053,7 +1190,7 @@ int32_t metaFilterTableIds(SMeta *pMeta, SMetaFltParam *param, SArray *pUids) { pCursor->type = param->type; metaRLock(pMeta); - ret = tdbTbcOpen(pMeta->pTagIdx, &pCursor->pCur, NULL); + ret = tdbTbcOpen(pMeta->pCtimeIdx, &pCursor->pCur, NULL); if (ret < 0) { goto END; } @@ -1064,6 +1201,7 @@ int32_t metaFilterTableIds(SMeta *pMeta, SMetaFltParam *param, SArray *pUids) { if (param->val == NULL) { metaError("vgId:%d, failed to filter NULL data", TD_VID(pMeta->pVnode)); + ret = -1; goto END; } else { if (IS_VAR_DATA_TYPE(param->type)) { @@ -1104,9 +1242,11 @@ int32_t metaFilterTableIds(SMeta *pMeta, SMetaFltParam *param, SArray *pUids) { valid = tdbTbcGet(pCursor->pCur, (const void **)&entryKey, &nEntryKey, (const void **)&entryVal, &nEntryVal); if (valid < 0) { + tdbFree(entryVal); break; } STagIdxKey *p = entryKey; + if (p == NULL) break; if (p->type != pCursor->type) { if (first) { valid = param->reverse ? tdbTbcMoveToPrev(pCursor->pCur) : tdbTbcMoveToNext(pCursor->pCur); @@ -1116,7 +1256,7 @@ int32_t metaFilterTableIds(SMeta *pMeta, SMetaFltParam *param, SArray *pUids) { break; } } - if (p == NULL || p->suid != pKey->suid) { + if (p->suid != pKey->suid) { break; } first = false; diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index d16c5b19f3..b6c4b35d9b 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -27,6 +27,11 @@ static int metaUpdateSuidIdx(SMeta *pMeta, const SMetaEntry *pME); static int metaUpdateTagIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry); static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type); static void metaDestroyTagIdxKey(STagIdxKey *pTagIdxKey); +// opt ins_tables query +static int metaUpdateCtimeIdx(SMeta *pMeta, const SMetaEntry *pME); +static int metaDeleteCtimeIdx(SMeta *pMeta, const SMetaEntry *pME); +static int metaUpdateNcolIdx(SMeta *pMeta, const SMetaEntry *pME); +static int metaDeleteNcolIdx(SMeta *pMeta, const SMetaEntry *pME); static void metaGetEntryInfo(const SMetaEntry *pEntry, SMetaInfo *pInfo) { pInfo->uid = pEntry->uid; @@ -551,6 +556,26 @@ static void metaBuildTtlIdxKey(STtlIdxKey *ttlKey, const SMetaEntry *pME) { ttlKey->dtime = ctime / 1000 + ttlDays * tsTtlUnit; ttlKey->uid = pME->uid; } +static int metaBuildCtimeIdxKey(SCtimeIdxKey *ctimeKey, const SMetaEntry *pME) { + int64_t ctime; + if (pME->type == TSDB_CHILD_TABLE) { + ctime = pME->ctbEntry.ctime; + } else if (pME->type == TSDB_NORMAL_TABLE) { + ctime = pME->ntbEntry.ctime; + } else { + return -1; + } + + ctimeKey->ctime = ctime; + ctimeKey->uid = pME->uid; + return 0; +} + +static int metaBuildNColIdxKey(SNcolIdxKey *ncolKey, const SMetaEntry *pME) { + ncolKey->ncol = pME->ntbEntry.schemaRow.nCols; + ncolKey->uid = pME->uid; + return 0; +} static int metaDeleteTtlIdx(SMeta *pMeta, const SMetaEntry *pME) { STtlIdxKey ttlKey = {0}; @@ -632,6 +657,9 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) { tdbTbDelete(pMeta->pNameIdx, e.name, strlen(e.name) + 1, &pMeta->txn); tdbTbDelete(pMeta->pUidIdx, &uid, sizeof(uid), &pMeta->txn); + if (e.type == TSDB_CHILD_TABLE || e.type == TSDB_NORMAL_TABLE) metaDeleteCtimeIdx(pMeta, &e); + if (e.type == TSDB_NORMAL_TABLE) metaDeleteNcolIdx(pMeta, &e); + if (e.type != TSDB_SUPER_TABLE) metaDeleteTtlIdx(pMeta, &e); if (e.type == TSDB_CHILD_TABLE) { @@ -658,6 +686,37 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) { return 0; } +// opt ins_tables +int metaUpdateCtimeIdx(SMeta *pMeta, const SMetaEntry *pME) { + SCtimeIdxKey ctimeKey = {0}; + if (metaBuildCtimeIdxKey(&ctimeKey, pME) < 0) { + return 0; + } + return tdbTbInsert(pMeta->pCtimeIdx, &ctimeKey, sizeof(ctimeKey), NULL, 0, &pMeta->txn); +} + +int metaDeleteCtimeIdx(SMeta *pMeta, const SMetaEntry *pME) { + SCtimeIdxKey ctimeKey = {0}; + if (metaBuildCtimeIdxKey(&ctimeKey, pME) < 0) { + return 0; + } + return tdbTbDelete(pMeta->pCtimeIdx, &ctimeKey, sizeof(ctimeKey), &pMeta->txn); +} +int metaUpdateNcolIdx(SMeta *pMeta, const SMetaEntry *pME) { + SNcolIdxKey ncolKey = {0}; + if (metaBuildNColIdxKey(&ncolKey, pME) < 0) { + return 0; + } + return tdbTbInsert(pMeta->pNcolIdx, &ncolKey, sizeof(ncolKey), NULL, 0, &pMeta->txn); +} + +int metaDeleteNcolIdx(SMeta *pMeta, const SMetaEntry *pME) { + SNcolIdxKey ncolKey = {0}; + if (metaBuildNColIdxKey(&ncolKey, pME) < 0) { + return 0; + } + return tdbTbDelete(pMeta->pNcolIdx, &ncolKey, sizeof(ncolKey), &pMeta->txn); +} static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAlterTbReq, STableMetaRsp *pMetaRsp) { void *pVal = NULL; @@ -1278,12 +1337,14 @@ static int metaUpdateTagIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry) { ret = metaSaveJsonVarToIdx(pMeta, pCtbEntry, pTagColumn); goto end; } - if (metaCreateTagIdxKey(pCtbEntry->ctbEntry.suid, pTagColumn->colId, pTagData, nTagData, pTagColumn->type, - pCtbEntry->uid, &pTagIdxKey, &nTagIdxKey) < 0) { - ret = -1; - goto end; + if (pTagData != NULL) { + if (metaCreateTagIdxKey(pCtbEntry->ctbEntry.suid, pTagColumn->colId, pTagData, nTagData, pTagColumn->type, + pCtbEntry->uid, &pTagIdxKey, &nTagIdxKey) < 0) { + ret = -1; + goto end; + } + tdbTbUpsert(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, NULL, 0, &pMeta->txn); } - tdbTbUpsert(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, NULL, 0, &pMeta->txn); end: metaDestroyTagIdxKey(pTagIdxKey); tDecoderClear(&dc); @@ -1370,6 +1431,12 @@ int metaHandleEntry(SMeta *pMeta, const SMetaEntry *pME) { } } + if (metaUpdateCtimeIdx(pMeta, pME) < 0) goto _err; + + if (pME->type == TSDB_NORMAL_TABLE) { + if (metaUpdateNcolIdx(pMeta, pME) < 0) goto _err; + } + if (pME->type != TSDB_SUPER_TABLE) { if (metaUpdateTtlIdx(pMeta, pME) < 0) goto _err; } diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index bb394e8acc..101a6786de 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -1492,4 +1492,11 @@ void tsdbCacheSetCapacity(SVnode *pVnode, size_t capacity) { size_t tsdbCacheGetCapacity(SVnode *pVnode) { return taosLRUCacheGetCapacity(pVnode->pTsdb->lruCache); } -size_t tsdbCacheGetUsage(SVnode *pVnode) { return taosLRUCacheGetUsage(pVnode->pTsdb->lruCache); } +size_t tsdbCacheGetUsage(SVnode *pVnode) { + size_t usage = 0; + if (pVnode->pTsdb != NULL) { + usage = taosLRUCacheGetUsage(pVnode->pTsdb->lruCache); + } + + return usage; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 88bbf67758..257a7d0baa 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -1891,6 +1891,10 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* } else { init = true; STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return code; + } + code = tRowMergerInit(&merge, piRow, pSchema); if (code != TSDB_CODE_SUCCESS) { return code; @@ -2286,7 +2290,7 @@ _end: if (pResBlock->info.rows > 0) { tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%d, elapsed time:%.2f ms %s", - pReader, pBlockScanInfo->uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pReader, pResBlock->info.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, pResBlock->info.rows, el, pReader->idStr); } diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 7cbd27e762..63a576f1a2 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -15,7 +15,7 @@ #include "vnd.h" -#define VND_INFO_FNAME "vnode.json" +#define VND_INFO_FNAME "vnode.json" #define VND_INFO_FNAME_TMP "vnode_tmp.json" static int vnodeEncodeInfo(const SVnodeInfo *pInfo, char **ppData); @@ -228,7 +228,9 @@ int vnodeCommit(SVnode *pVnode) { code = terrno; TSDB_CHECK_CODE(code, lino, _exit); } - walBeginSnapshot(pVnode->pWal, pVnode->state.applied); + + // walBeginSnapshot(pVnode->pWal, pVnode->state.applied); + syncBeginSnapshot(pVnode->sync, pVnode->state.applied); code = smaPreCommit(pVnode->pSma); TSDB_CHECK_CODE(code, lino, _exit); @@ -282,7 +284,8 @@ int vnodeCommit(SVnode *pVnode) { } // apply the commit (TODO) - walEndSnapshot(pVnode->pWal); + // walEndSnapshot(pVnode->pWal); + syncEndSnapshot(pVnode->sync); _exit: if (code) { diff --git a/source/dnode/vnode/src/vnd/vnodeQuery.c b/source/dnode/vnode/src/vnd/vnodeQuery.c index 9adef918ba..f1bbb2d4b3 100644 --- a/source/dnode/vnode/src/vnd/vnodeQuery.c +++ b/source/dnode/vnode/src/vnd/vnodeQuery.c @@ -15,10 +15,13 @@ #include "vnd.h" -#define VNODE_GET_LOAD_RESET_VALS(pVar, oVal, vType) \ - do { \ - int##vType##_t newVal = atomic_sub_fetch_##vType(&(pVar), (oVal)); \ - ASSERT(newVal >= 0); \ +#define VNODE_GET_LOAD_RESET_VALS(pVar, oVal, vType, tags) \ + do { \ + int##vType##_t newVal = atomic_sub_fetch_##vType(&(pVar), (oVal)); \ + ASSERT(newVal >= 0); \ + if (newVal < 0) { \ + vWarn("vgId:%d %s, abnormal val:%" PRIi64 ", old val:%" PRIi64, TD_VID(pVnode), tags, newVal, (oVal)); \ + } \ } while (0) int vnodeQueryOpen(SVnode *pVnode) { @@ -435,10 +438,10 @@ int32_t vnodeGetLoad(SVnode *pVnode, SVnodeLoad *pLoad) { * @param pLoad */ void vnodeResetLoad(SVnode *pVnode, SVnodeLoad *pLoad) { - VNODE_GET_LOAD_RESET_VALS(pVnode->statis.nInsert, pLoad->numOfInsertReqs, 64); - VNODE_GET_LOAD_RESET_VALS(pVnode->statis.nInsertSuccess, pLoad->numOfInsertSuccessReqs, 64); - VNODE_GET_LOAD_RESET_VALS(pVnode->statis.nBatchInsert, pLoad->numOfBatchInsertReqs, 64); - VNODE_GET_LOAD_RESET_VALS(pVnode->statis.nBatchInsertSuccess, pLoad->numOfBatchInsertSuccessReqs, 64); + VNODE_GET_LOAD_RESET_VALS(pVnode->statis.nInsert, pLoad->numOfInsertReqs, 64, "nInsert"); + VNODE_GET_LOAD_RESET_VALS(pVnode->statis.nInsertSuccess, pLoad->numOfInsertSuccessReqs, 64, "nInsertSuccess"); + VNODE_GET_LOAD_RESET_VALS(pVnode->statis.nBatchInsert, pLoad->numOfBatchInsertReqs, 64, "nBatchInsert"); + VNODE_GET_LOAD_RESET_VALS(pVnode->statis.nBatchInsertSuccess, pLoad->numOfBatchInsertSuccessReqs, 64, "nBatchInsertSuccess"); } void vnodeGetInfo(SVnode *pVnode, const char **dbname, int32_t *vgId) { diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index f59e28daaf..1863203f4a 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -292,116 +292,67 @@ int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) { vGTrace("vgId:%d, sync msg:%p will be processed, type:%s", pVnode->config.vgId, pMsg, TMSG_INFO(pMsg->msgType)); - if (syncNodeStrategy(pSyncNode) == SYNC_STRATEGY_NO_SNAPSHOT) { - if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { - SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); - syncTimeoutDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING) { - SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnPingCb(pSyncNode, pSyncMsg); - syncPingDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { - SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); - syncPingReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { - SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL); - syncClientRequestDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST_BATCH) { - SyncClientRequestBatch *pSyncMsg = syncClientRequestBatchFromRpcMsg(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnClientRequestBatchCb(pSyncNode, pSyncMsg); - syncClientRequestBatchDestroyDeep(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { - SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg); - syncRequestVoteDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { - SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg); - syncRequestVoteReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { - SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg); - syncAppendEntriesDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { - SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg); - syncAppendEntriesReplyDestroy(pSyncMsg); - } else { - vGError("vgId:%d, msg:%p failed to process since error msg type:%d", pVnode->config.vgId, pMsg, pMsg->msgType); - code = -1; - } + if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { + SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); + ASSERT(pSyncMsg != NULL); + code = syncNodeOnTimer(pSyncNode, pSyncMsg); + syncTimeoutDestroy(pSyncMsg); - } else if (syncNodeStrategy(pSyncNode) == SYNC_STRATEGY_WAL_FIRST) { - // use wal first strategy - if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { - SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); - syncTimeoutDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING) { - SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnPingCb(pSyncNode, pSyncMsg); - syncPingDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { - SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); - syncPingReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { - SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL); - syncClientRequestDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST_BATCH) { - SyncClientRequestBatch *pSyncMsg = syncClientRequestBatchFromRpcMsg(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnClientRequestBatchCb(pSyncNode, pSyncMsg); - syncClientRequestBatchDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { - SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnRequestVoteSnapshotCb(pSyncNode, pSyncMsg); - syncRequestVoteDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { - SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnRequestVoteReplySnapshotCb(pSyncNode, pSyncMsg); - syncRequestVoteReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_BATCH) { - SyncAppendEntriesBatch *pSyncMsg = syncAppendEntriesBatchFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnAppendEntriesSnapshot2Cb(pSyncNode, pSyncMsg); - syncAppendEntriesBatchDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { - SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); - ASSERT(pSyncMsg != NULL); - code = syncNodeOnAppendEntriesReplySnapshot2Cb(pSyncNode, pSyncMsg); - syncAppendEntriesReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { - SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pMsg); - code = syncNodeOnSnapshotSendCb(pSyncNode, pSyncMsg); - syncSnapshotSendDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { - SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pMsg); - code = syncNodeOnSnapshotRspCb(pSyncNode, pSyncMsg); - syncSnapshotRspDestroy(pSyncMsg); - } else { - vGError("vgId:%d, msg:%p failed to process since error msg type:%d", pVnode->config.vgId, pMsg, pMsg->msgType); - code = -1; - } + } else if (pMsg->msgType == TDMT_SYNC_PING) { + SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); + ASSERT(pSyncMsg != NULL); + code = syncNodeOnPingCb(pSyncNode, pSyncMsg); + syncPingDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { + SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); + ASSERT(pSyncMsg != NULL); + code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); + syncPingReplyDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { + SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); + ASSERT(pSyncMsg != NULL); + code = syncNodeOnClientRequest(pSyncNode, pSyncMsg, NULL); + syncClientRequestDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { + SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); + ASSERT(pSyncMsg != NULL); + code = syncNodeOnRequestVote(pSyncNode, pSyncMsg); + syncRequestVoteDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { + SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); + ASSERT(pSyncMsg != NULL); + code = syncNodeOnRequestVoteReply(pSyncNode, pSyncMsg); + syncRequestVoteReplyDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { + SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); + ASSERT(pSyncMsg != NULL); + code = syncNodeOnAppendEntries(pSyncNode, pSyncMsg); + syncAppendEntriesDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { + SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); + ASSERT(pSyncMsg != NULL); + code = syncNodeOnAppendEntriesReply(pSyncNode, pSyncMsg); + syncAppendEntriesReplyDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { + SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pMsg); + code = syncNodeOnSnapshot(pSyncNode, pSyncMsg); + syncSnapshotSendDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { + SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pMsg); + code = syncNodeOnSnapshotReply(pSyncNode, pSyncMsg); + syncSnapshotRspDestroy(pSyncMsg); + + } else { + vGError("vgId:%d, msg:%p failed to process since error msg type:%d", pVnode->config.vgId, pMsg, pMsg->msgType); + code = -1; } vTrace("vgId:%d, sync msg:%p is processed, type:%s code:0x%x", pVnode->config.vgId, pMsg, TMSG_INFO(pMsg->msgType), @@ -413,6 +364,19 @@ int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) { return code; } +static int32_t vnodeSyncEqCtrlMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { + if (msgcb == NULL) { + return -1; + } + + int32_t code = tmsgPutToQueue(msgcb, SYNC_CTRL_QUEUE, pMsg); + if (code != 0) { + rpcFreeCont(pMsg->pCont); + pMsg->pCont = NULL; + } + return code; +} + static int32_t vnodeSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { if (msgcb == NULL) { return -1; @@ -683,6 +647,7 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path) { .msgcb = NULL, .FpSendMsg = vnodeSyncSendMsg, .FpEqMsg = vnodeSyncEqMsg, + .FpEqCtrlMsg = vnodeSyncEqCtrlMsg, }; snprintf(syncInfo.path, sizeof(syncInfo.path), "%s%ssync", path, TD_DIRSEP); diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 20ca8b7112..d94eb80994 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -541,6 +541,12 @@ typedef struct { SSnapContext* sContext; } SStreamRawScanInfo; +typedef struct SSysTableIndex { + int8_t init; + SArray *uids; + int32_t lastIdx; +} SSysTableIndex; + typedef struct SSysTableScanInfo { SRetrieveMetaTableRsp* pRsp; SRetrieveTableReq req; @@ -553,6 +559,7 @@ typedef struct SSysTableScanInfo { bool showRewrite; SNode* pCondition; // db_name filter condition, to discard data that are not in current database SMTbCursor* pCur; // cursor for iterate the local table meta store. + SSysTableIndex* pIdx; // idx for local table meta SArray* scanCols; // SArray scan column id list SName name; SSDataBlock* pRes; @@ -966,9 +973,7 @@ SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** dowStreams, size_ SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pTableScanNode, SReadHandle* readHandle, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, - SSDataBlock* pResBlock, SInterval* pInterval, int32_t primaryTsSlotId, - STimeWindowAggSupp* pTwAggSupp, SIntervalPhysiNode* pPhyNode, +SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, bool isStream); SOperatorInfo* createMergeIntervalOperatorInfo(SOperatorInfo* downstream, SMergeIntervalPhysiNode* pIntervalPhyNode, SExecTaskInfo* pTaskInfo); diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index bf24fe3a3e..afec5896cd 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -422,16 +422,6 @@ static SColumnInfoData* getColInfoResult(void* metaHandle, int64_t suid, SArray* goto end; } } - /*else { - code = metaGetTableTagsByUids(metaHandle, suid, uidList, tags); - if (code != 0) { - terrno = code; - qError("failed to get table from meta idx, reason: %s, suid:%" PRId64, tstrerror(code), suid); - goto end; - } else { - qInfo("succ to get table from meta idx, suid:%" PRId64, suid); - } - }*/ int32_t rows = taosArrayGetSize(uidList); if (rows == 0) { @@ -1212,11 +1202,10 @@ void createExprFromOneNode(SExprInfo* pExp, SNode* pNode, int16_t slotId) { #if 1 // todo refactor: add the parameter for tbname function const char* name = "tbname"; - int32_t len = strlen(name); + int32_t len = strlen(name); if (!pFuncNode->pParameterList && (memcmp(pExprNode->_function.functionName, name, len) == 0) && pExprNode->_function.functionName[len] == 0) { - pFuncNode->pParameterList = nodesMakeList(); ASSERT(LIST_LENGTH(pFuncNode->pParameterList) == 0); SValueNode* res = (SValueNode*)nodesMakeNode(QUERY_NODE_VALUE); @@ -1261,13 +1250,13 @@ void createExprFromOneNode(SExprInfo* pExp, SNode* pNode, int16_t slotId) { } else if (type == QUERY_NODE_CASE_WHEN) { pExp->pExpr->nodeType = QUERY_NODE_OPERATOR; SCaseWhenNode* pCaseNode = (SCaseWhenNode*)pNode; - + pExp->base.pParam = taosMemoryCalloc(1, sizeof(SFunctParam)); pExp->base.numOfParams = 1; - + SDataType* pType = &pCaseNode->node.resType; - pExp->base.resSchema = createResSchema(pType->type, pType->bytes, slotId, pType->scale, - pType->precision, pCaseNode->node.aliasName); + pExp->base.resSchema = + createResSchema(pType->type, pType->bytes, slotId, pType->scale, pType->precision, pCaseNode->node.aliasName); pExp->pExpr->_optrRoot.pRootNode = pNode; } else { ASSERT(0); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index c9711f13dd..e55404935c 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -3049,12 +3049,6 @@ SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SAggPhysiN SSDataBlock* pResBlock = createResDataBlock(pAggNode->node.pOutputDataBlockDesc); initBasicInfo(&pInfo->binfo, pResBlock); - int32_t numOfScalarExpr = 0; - SExprInfo* pScalarExprInfo = NULL; - if (pAggNode->pExprs != NULL) { - pScalarExprInfo = createExprInfo(pAggNode->pExprs, NULL, &numOfScalarExpr); - } - size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; initResultSizeInfo(&pOperator->resultInfo, 4096); @@ -3065,6 +3059,12 @@ SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SAggPhysiN goto _error; } + int32_t numOfScalarExpr = 0; + SExprInfo* pScalarExprInfo = NULL; + if (pAggNode->pExprs != NULL) { + pScalarExprInfo = createExprInfo(pAggNode->pExprs, NULL, &numOfScalarExpr); + } + code = initExprSupp(&pInfo->scalarExprSup, pScalarExprInfo, numOfScalarExpr); if (code != TSDB_CODE_SUCCESS) { goto _error; @@ -3720,28 +3720,8 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo } else if (QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL == type) { SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; - SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &num); - SSDataBlock* pResBlock = createResDataBlock(pPhyNode->pOutputDataBlockDesc); - - SInterval interval = {.interval = pIntervalPhyNode->interval, - .sliding = pIntervalPhyNode->sliding, - .intervalUnit = pIntervalPhyNode->intervalUnit, - .slidingUnit = pIntervalPhyNode->slidingUnit, - .offset = pIntervalPhyNode->offset, - .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision}; - - STimeWindowAggSupp as = { - .waterMark = pIntervalPhyNode->window.watermark, - .calTrigger = pIntervalPhyNode->window.triggerType, - .maxTs = INT64_MIN, - }; - ASSERT(as.calTrigger != STREAM_TRIGGER_MAX_DELAY); - - int32_t tsSlotId = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; - bool isStream = (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type); - pOptr = createIntervalOperatorInfo(ops[0], pExprInfo, num, pResBlock, &interval, tsSlotId, &as, pIntervalPhyNode, - pTaskInfo, isStream); - + bool isStream = (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type); + pOptr = createIntervalOperatorInfo(ops[0], pIntervalPhyNode, pTaskInfo, isStream); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL == type) { pOptr = createStreamIntervalOperatorInfo(ops[0], pPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL == type) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 4e1b07e662..115a6ab559 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -39,6 +39,64 @@ static int32_t buildSysDbTableInfo(const SSysTableScanInfo* pInfo, int32_t capac static int32_t buildDbTableInfoBlock(bool sysInfo, const SSDataBlock* p, const SSysTableMeta* pSysDbTableMeta, size_t size, const char* dbName); +static char* SYSTABLE_IDX_COLUMN[] = {"table_name", "db_name", "create_time", "columns", + "ttl", "stable_name", "vgroup_id', 'uid", "type"}; + +static char* SYSTABLE_IDX_EXCEPT[] = {"db_name", "vgroup_id"}; + +typedef int32_t (*__sys_filte)(void* pMeta, SNode* cond, SArray* result); +typedef int32_t (*__sys_check)(SNode* cond); + +typedef struct { + const char* name; + __sys_check chkFunc; + __sys_filte fltFunc; +} SSTabFltFuncDef; + +typedef struct { + void* pMeta; + void* pVnode; +} SSTabFltArg; + +static int32_t sysChkFilter__Comm(SNode* pNode); +static int32_t sysChkFilter__DBName(SNode* pNode); +static int32_t sysChkFilter__VgroupId(SNode* pNode); +static int32_t sysChkFilter__TableName(SNode* pNode); +static int32_t sysChkFilter__CreateTime(SNode* pNode); +static int32_t sysChkFilter__Ncolumn(SNode* pNode); +static int32_t sysChkFilter__Ttl(SNode* pNode); +static int32_t sysChkFilter__STableName(SNode* pNode); +static int32_t sysChkFilter__Uid(SNode* pNode); +static int32_t sysChkFilter__Type(SNode* pNode); + +static int32_t sysFilte__DbName(void* pMeta, SNode* pNode, SArray* result); +static int32_t sysFilte__VgroupId(void* pMeta, SNode* pNode, SArray* result); +static int32_t sysFilte__TableName(void* pMeta, SNode* pNode, SArray* result); +static int32_t sysFilte__CreateTime(void* pMeta, SNode* pNode, SArray* result); +static int32_t sysFilte__Ncolumn(void* pMeta, SNode* pNode, SArray* result); +static int32_t sysFilte__Ttl(void* pMeta, SNode* pNode, SArray* result); +static int32_t sysFilte__STableName(void* pMeta, SNode* pNode, SArray* result); +static int32_t sysFilte__Uid(void* pMeta, SNode* pNode, SArray* result); +static int32_t sysFilte__Type(void* pMeta, SNode* pNode, SArray* result); + +const SSTabFltFuncDef filterDict[] = { + {.name = "table_name", .chkFunc = sysChkFilter__TableName, .fltFunc = sysFilte__TableName}, + {.name = "db_name", .chkFunc = sysChkFilter__DBName, .fltFunc = sysFilte__DbName}, + {.name = "create_time", .chkFunc = sysChkFilter__CreateTime, .fltFunc = sysFilte__CreateTime}, + {.name = "columns", .chkFunc = sysChkFilter__Ncolumn, .fltFunc = sysFilte__Ncolumn}, + {.name = "ttl", .chkFunc = sysChkFilter__Ttl, .fltFunc = sysFilte__Ttl}, + {.name = "stable_name", .chkFunc = sysChkFilter__STableName, .fltFunc = sysFilte__STableName}, + {.name = "vgroup_id", .chkFunc = sysChkFilter__VgroupId, .fltFunc = sysFilte__VgroupId}, + {.name = "uid", .chkFunc = sysChkFilter__Uid, .fltFunc = sysFilte__Uid}, + {.name = "type", .chkFunc = sysChkFilter__Type, .fltFunc = sysFilte__Type}}; + +#define SYSTAB_FILTER_DICT_SIZE (sizeof(filterDict) / sizeof(filterDict[0])) + +static int32_t optSysTabFilte(void* arg, SNode* cond, SArray* result); +static int32_t optSysTabFilteImpl(void* arg, SNode* cond, SArray* result); +static int32_t optSysCheckOper(SNode* pOpear); +static int32_t optSysMergeRslt(SArray* multiRslt, SArray* reslt); + static bool processBlockWithProbability(const SSampleExecInfo* pInfo); static int32_t sysTableUserTagsFillOneTableTags(const SSysTableScanInfo* pInfo, SMetaReader* smrSuperTable, @@ -2323,6 +2381,11 @@ static void destroySysScanOperator(void* param) { metaCloseTbCursor(pInfo->pCur); pInfo->pCur = NULL; } + if (pInfo->pIdx) { + taosArrayDestroy(pInfo->pIdx->uids); + taosMemoryFree(pInfo->pIdx); + pInfo->pIdx = NULL; + } taosArrayDestroy(pInfo->scanCols); taosMemoryFreeClear(pInfo->pUser); @@ -2790,9 +2853,798 @@ static int32_t sysTableUserTagsFillOneTableTags(const SSysTableScanInfo* pInfo, return TSDB_CODE_SUCCESS; } +typedef int (*__optSysFilter)(void* a, void* b, int16_t dtype); + +int optSysDoCompare(__compar_fn_t func, int8_t comparType, void* a, void* b) { + int32_t cmp = func(a, b); + switch (comparType) { + case OP_TYPE_LOWER_THAN: + if (cmp < 0) return 0; + break; + case OP_TYPE_LOWER_EQUAL: { + if (cmp <= 0) return 0; + break; + } + case OP_TYPE_GREATER_THAN: { + if (cmp > 0) return 0; + break; + } + case OP_TYPE_GREATER_EQUAL: { + if (cmp >= 0) return 0; + break; + } + case OP_TYPE_EQUAL: { + if (cmp == 0) return 0; + break; + } + default: + return -1; + } + return 1; +} + +static int optSysFilterFuncImpl__LowerThan(void* a, void* b, int16_t dtype) { + __compar_fn_t func = getComparFunc(dtype, 0); + return optSysDoCompare(func, OP_TYPE_LOWER_THAN, a, b); +} +static int optSysFilterFuncImpl__LowerEqual(void* a, void* b, int16_t dtype) { + __compar_fn_t func = getComparFunc(dtype, 0); + return optSysDoCompare(func, OP_TYPE_LOWER_EQUAL, a, b); +} +static int optSysFilterFuncImpl__GreaterThan(void* a, void* b, int16_t dtype) { + __compar_fn_t func = getComparFunc(dtype, 0); + return optSysDoCompare(func, OP_TYPE_GREATER_THAN, a, b); +} +static int optSysFilterFuncImpl__GreaterEqual(void* a, void* b, int16_t dtype) { + __compar_fn_t func = getComparFunc(dtype, 0); + return optSysDoCompare(func, OP_TYPE_GREATER_EQUAL, a, b); +} +static int optSysFilterFuncImpl__Equal(void* a, void* b, int16_t dtype) { + __compar_fn_t func = getComparFunc(dtype, 0); + return optSysDoCompare(func, OP_TYPE_EQUAL, a, b); +} + +static int optSysFilterFuncImpl__NoEqual(void* a, void* b, int16_t dtype) { + __compar_fn_t func = getComparFunc(dtype, 0); + return optSysDoCompare(func, OP_TYPE_NOT_EQUAL, a, b); +} +static __optSysFilter optSysGetFilterFunc(int32_t ctype, bool* reverse) { + if (ctype == OP_TYPE_LOWER_EQUAL || ctype == OP_TYPE_LOWER_THAN) { + *reverse = true; + } + if (ctype == OP_TYPE_LOWER_THAN) + return optSysFilterFuncImpl__LowerThan; + else if (ctype == OP_TYPE_LOWER_EQUAL) + return optSysFilterFuncImpl__LowerEqual; + else if (ctype == OP_TYPE_GREATER_THAN) + return optSysFilterFuncImpl__GreaterThan; + else if (ctype == OP_TYPE_GREATER_EQUAL) + return optSysFilterFuncImpl__GreaterEqual; + else if (ctype == OP_TYPE_EQUAL) + return optSysFilterFuncImpl__Equal; + else if (ctype == OP_TYPE_NOT_EQUAL) + return optSysFilterFuncImpl__NoEqual; + return NULL; +} +static int32_t sysFilte__DbName(void* arg, SNode* pNode, SArray* result) { + void* pVnode = ((SSTabFltArg*)arg)->pVnode; + + const char* db = NULL; + vnodeGetInfo(pVnode, &db, NULL); + + SName sn = {0}; + char dbname[TSDB_DB_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; + tNameFromString(&sn, db, T_NAME_ACCT | T_NAME_DB); + + tNameGetDbName(&sn, varDataVal(dbname)); + varDataSetLen(dbname, strlen(varDataVal(dbname))); + + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + + bool reverse = false; + __optSysFilter func = optSysGetFilterFunc(pOper->opType, &reverse); + if (func == NULL) return -1; + + int ret = func(dbname, pVal->datum.p, TSDB_DATA_TYPE_VARCHAR); + if (ret == 0) return 0; + + return -2; +} +static int32_t sysFilte__VgroupId(void* arg, SNode* pNode, SArray* result) { + void* pVnode = ((SSTabFltArg*)arg)->pVnode; + + int64_t vgId = 0; + vnodeGetInfo(pVnode, NULL, (int32_t*)&vgId); + + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + + bool reverse = false; + + __optSysFilter func = optSysGetFilterFunc(pOper->opType, &reverse); + if (func == NULL) return -1; + + int ret = func(&vgId, &pVal->datum.i, TSDB_DATA_TYPE_BIGINT); + if (ret == 0) return 0; + + return -1; +} +static int32_t sysFilte__TableName(void* arg, SNode* pNode, SArray* result) { + void* pMeta = ((SSTabFltArg*)arg)->pMeta; + + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + bool reverse = false; + + __optSysFilter func = optSysGetFilterFunc(pOper->opType, &reverse); + if (func == NULL) return -1; + + SMetaFltParam param = {.suid = 0, + .cid = 0, + .type = TSDB_DATA_TYPE_VARCHAR, + .val = pVal->datum.p, + .reverse = reverse, + .filterFunc = func}; + + int32_t ret = metaFilterCreateTime(pMeta, ¶m, result); + if (ret == 0) return 0; + + return -1; +} + +static int32_t sysFilte__CreateTime(void* arg, SNode* pNode, SArray* result) { + void* pMeta = ((SSTabFltArg*)arg)->pMeta; + + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + bool reverse = false; + + __optSysFilter func = optSysGetFilterFunc(pOper->opType, &reverse); + SMetaFltParam param = {.suid = 0, + .cid = 0, + .type = TSDB_DATA_TYPE_BIGINT, + .val = &pVal->datum.i, + .reverse = reverse, + .filterFunc = func}; + int32_t ret = metaFilterCreateTime(pMeta, ¶m, result); + if (func == NULL) return -1; + return 0; +} +static int32_t sysFilte__Ncolumn(void* arg, SNode* pNode, SArray* result) { + void* pMeta = ((SSTabFltArg*)arg)->pMeta; + + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + bool reverse = false; + + __optSysFilter func = optSysGetFilterFunc(pOper->opType, &reverse); + if (func == NULL) return -1; + return -1; +} + +static int32_t sysFilte__Ttl(void* arg, SNode* pNode, SArray* result) { + void* pMeta = ((SSTabFltArg*)arg)->pMeta; + + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + bool reverse = false; + + __optSysFilter func = optSysGetFilterFunc(pOper->opType, &reverse); + if (func == NULL) return -1; + return -1; +} +static int32_t sysFilte__STableName(void* arg, SNode* pNode, SArray* result) { + void* pMeta = ((SSTabFltArg*)arg)->pMeta; + + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + bool reverse = false; + + __optSysFilter func = optSysGetFilterFunc(pOper->opType, &reverse); + if (func == NULL) return -1; + return -1; +} +static int32_t sysFilte__Uid(void* arg, SNode* pNode, SArray* result) { + void* pMeta = ((SSTabFltArg*)arg)->pMeta; + + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + bool reverse = false; + + __optSysFilter func = optSysGetFilterFunc(pOper->opType, &reverse); + if (func == NULL) return -1; + return -1; +} +static int32_t sysFilte__Type(void* arg, SNode* pNode, SArray* result) { + void* pMeta = ((SSTabFltArg*)arg)->pMeta; + + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + bool reverse = false; + + __optSysFilter func = optSysGetFilterFunc(pOper->opType, &reverse); + if (func == NULL) return -1; + return -1; +} +static int32_t sysChkFilter__Comm(SNode* pNode) { + // impl + SOperatorNode* pOper = (SOperatorNode*)pNode; + EOperatorType opType = pOper->opType; + if (opType != OP_TYPE_EQUAL && opType != OP_TYPE_LOWER_EQUAL && opType != OP_TYPE_LOWER_THAN && + OP_TYPE_GREATER_EQUAL && opType != OP_TYPE_GREATER_THAN) { + return -1; + } + return 0; +} + +static int32_t sysChkFilter__DBName(SNode* pNode) { + SOperatorNode* pOper = (SOperatorNode*)pNode; + + if (pOper->opType != OP_TYPE_EQUAL && pOper->opType != OP_TYPE_NOT_EQUAL) { + return -1; + } + + SValueNode* pVal = (SValueNode*)pOper->pRight; + if (!IS_STR_DATA_TYPE(pVal->node.resType.type)) { + return -1; + } + + return 0; +} +static int32_t sysChkFilter__VgroupId(SNode* pNode) { + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + if (!IS_INTEGER_TYPE(pVal->node.resType.type)) { + return -1; + } + return sysChkFilter__Comm(pNode); +} +static int32_t sysChkFilter__TableName(SNode* pNode) { + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + if (!IS_STR_DATA_TYPE(pVal->node.resType.type)) { + return -1; + } + return sysChkFilter__Comm(pNode); +} +static int32_t sysChkFilter__CreateTime(SNode* pNode) { + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + + if (!IS_TIMESTAMP_TYPE(pVal->node.resType.type)) { + return -1; + } + return sysChkFilter__Comm(pNode); +} + +static int32_t sysChkFilter__Ncolumn(SNode* pNode) { + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + + if (!IS_INTEGER_TYPE(pVal->node.resType.type)) { + return -1; + } + return sysChkFilter__Comm(pNode); +} +static int32_t sysChkFilter__Ttl(SNode* pNode) { + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + + if (!IS_INTEGER_TYPE(pVal->node.resType.type)) { + return -1; + } + return sysChkFilter__Comm(pNode); +} +static int32_t sysChkFilter__STableName(SNode* pNode) { + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + if (!IS_STR_DATA_TYPE(pVal->node.resType.type)) { + return -1; + } + return sysChkFilter__Comm(pNode); +} +static int32_t sysChkFilter__Uid(SNode* pNode) { + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + if (!IS_INTEGER_TYPE(pVal->node.resType.type)) { + return -1; + } + return sysChkFilter__Comm(pNode); +} +static int32_t sysChkFilter__Type(SNode* pNode) { + SOperatorNode* pOper = (SOperatorNode*)pNode; + SValueNode* pVal = (SValueNode*)pOper->pRight; + if (!IS_INTEGER_TYPE(pVal->node.resType.type)) { + return -1; + } + return sysChkFilter__Comm(pNode); +} +static int32_t optSysTabFilteImpl(void* arg, SNode* cond, SArray* result) { + if (optSysCheckOper(cond) != 0) return -1; + + SOperatorNode* pNode = (SOperatorNode*)cond; + + int8_t i = 0; + for (; i < SYSTAB_FILTER_DICT_SIZE; i++) { + if (strcmp(filterDict[i].name, ((SColumnNode*)(pNode->pLeft))->colName) == 0) { + break; + } + } + if (i >= SYSTAB_FILTER_DICT_SIZE) return -1; + + if (filterDict[i].chkFunc(cond) != 0) return -1; + + return filterDict[i].fltFunc(arg, cond, result); +} + +static int32_t optSysCheckOper(SNode* pOpear) { + if (nodeType(pOpear) != QUERY_NODE_OPERATOR) return -1; + + SOperatorNode* pOper = (SOperatorNode*)pOpear; + if (pOper->opType < OP_TYPE_GREATER_THAN || pOper->opType > OP_TYPE_NOT_EQUAL) { + return -1; + } + + if (nodeType(pOper->pLeft) != QUERY_NODE_COLUMN || nodeType(pOper->pRight) != QUERY_NODE_VALUE) { + return -1; + } + return 0; +} +static int32_t optSysMergeRslt(SArray* mRslt, SArray* rslt) { + // TODO, find comm mem from mRslt + for (int i = 0; i < taosArrayGetSize(mRslt); i++) { + SArray* aRslt = taosArrayGetP(mRslt, i); + taosArrayAddAll(rslt, aRslt); + } + return 0; +} + +static int32_t optSysTabFilte(void* arg, SNode* cond, SArray* result) { + int ret = -1; + if (nodeType(cond) == QUERY_NODE_OPERATOR) { + ret = optSysTabFilteImpl(arg, cond, result); + if (ret == 0) { + SOperatorNode* pOper = (SOperatorNode*)cond; + SColumnNode* pCol = (SColumnNode*)pOper->pLeft; + if (0 == strcmp(pCol->colName, "create_time")) { + return 0; + } + return -1; + } + return ret; + } + + if (nodeType(cond) != QUERY_NODE_LOGIC_CONDITION || ((SLogicConditionNode*)cond)->condType != LOGIC_COND_TYPE_AND) { + return ret; + } + + SLogicConditionNode* pNode = (SLogicConditionNode*)cond; + SNodeList* pList = (SNodeList*)pNode->pParameterList; + + int32_t len = LIST_LENGTH(pList); + if (len <= 0) return ret; + + bool hasIdx = false; + bool hasRslt = true; + SArray* mRslt = taosArrayInit(len, POINTER_BYTES); + + SListCell* cell = pList->pHead; + for (int i = 0; i < len; i++) { + if (cell == NULL) break; + + SArray* aRslt = taosArrayInit(16, sizeof(int64_t)); + + ret = optSysTabFilteImpl(arg, cell->pNode, aRslt); + if (ret == 0) { + // has index + hasIdx = true; + taosArrayPush(mRslt, &aRslt); + } else if (ret == -2) { + // current vg + hasIdx = true; + hasRslt = false; + taosArrayDestroy(aRslt); + break; + } else { + taosArrayDestroy(aRslt); + } + cell = cell->pNext; + } + if (hasRslt && hasIdx) { + optSysMergeRslt(mRslt, result); + } + + for (int i = 0; i < taosArrayGetSize(mRslt); i++) { + SArray* aRslt = taosArrayGetP(mRslt, i); + taosArrayDestroy(aRslt); + } + taosArrayDestroy(mRslt); + if (hasRslt == false) { + return -2; + } + if (hasRslt && hasIdx) { + cell = pList->pHead; + for (int i = 0; i < len; i++) { + if (cell == NULL) break; + SOperatorNode* pOper = (SOperatorNode*)cell->pNode; + SColumnNode* pCol = (SColumnNode*)pOper->pLeft; + if (0 == strcmp(pCol->colName, "create_time")) { + return 0; + } + cell = cell->pNext; + } + return -1; + } + return -1; +} + +static SSDataBlock* sysTableBuildUserTablesByUids(SOperatorInfo* pOperator) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SSysTableScanInfo* pInfo = pOperator->info; + + SSysTableIndex* pIdx = pInfo->pIdx; + blockDataCleanup(pInfo->pRes); + int32_t numOfRows = 0; + + int ret = 0; + + const char* db = NULL; + int32_t vgId = 0; + vnodeGetInfo(pInfo->readHandle.vnode, &db, &vgId); + + SName sn = {0}; + char dbname[TSDB_DB_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; + tNameFromString(&sn, db, T_NAME_ACCT | T_NAME_DB); + + tNameGetDbName(&sn, varDataVal(dbname)); + varDataSetLen(dbname, strlen(varDataVal(dbname))); + + SSDataBlock* p = buildInfoSchemaTableMetaBlock(TSDB_INS_TABLE_TABLES); + blockDataEnsureCapacity(p, pOperator->resultInfo.capacity); + + char n[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; + int32_t i = pIdx->lastIdx; + for (; i < taosArrayGetSize(pIdx->uids); i++) { + tb_uid_t* uid = taosArrayGet(pIdx->uids, i); + + SMetaReader mr = {0}; + metaReaderInit(&mr, pInfo->readHandle.meta, 0); + int32_t ret = metaGetTableEntryByUid(&mr, *uid); + if (ret < 0) { + metaReaderClear(&mr); + continue; + } + STR_TO_VARSTR(n, mr.me.name); + + // table name + SColumnInfoData* pColInfoData = taosArrayGet(p->pDataBlock, 0); + colDataAppend(pColInfoData, numOfRows, n, false); + + // database name + pColInfoData = taosArrayGet(p->pDataBlock, 1); + colDataAppend(pColInfoData, numOfRows, dbname, false); + + // vgId + pColInfoData = taosArrayGet(p->pDataBlock, 6); + colDataAppend(pColInfoData, numOfRows, (char*)&vgId, false); + + int32_t tableType = mr.me.type; + if (tableType == TSDB_CHILD_TABLE) { + // create time + int64_t ts = mr.me.ctbEntry.ctime; + pColInfoData = taosArrayGet(p->pDataBlock, 2); + colDataAppend(pColInfoData, numOfRows, (char*)&ts, false); + + SMetaReader mr1 = {0}; + metaReaderInit(&mr1, pInfo->readHandle.meta, META_READER_NOLOCK); + + int64_t suid = mr.me.ctbEntry.suid; + int32_t code = metaGetTableEntryByUid(&mr1, suid); + if (code != TSDB_CODE_SUCCESS) { + qError("failed to get super table meta, cname:%s, suid:0x%" PRIx64 ", code:%s, %s", pInfo->pCur->mr.me.name, + suid, tstrerror(terrno), GET_TASKID(pTaskInfo)); + metaReaderClear(&mr1); + metaReaderClear(&mr); + T_LONG_JMP(pTaskInfo->env, terrno); + } + pColInfoData = taosArrayGet(p->pDataBlock, 3); + colDataAppend(pColInfoData, numOfRows, (char*)&mr1.me.stbEntry.schemaRow.nCols, false); + + // super table name + STR_TO_VARSTR(n, mr1.me.name); + pColInfoData = taosArrayGet(p->pDataBlock, 4); + colDataAppend(pColInfoData, numOfRows, n, false); + metaReaderClear(&mr1); + + // table comment + pColInfoData = taosArrayGet(p->pDataBlock, 8); + if (mr.me.ctbEntry.commentLen > 0) { + char comment[TSDB_TB_COMMENT_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_TO_VARSTR(comment, mr.me.ctbEntry.comment); + colDataAppend(pColInfoData, numOfRows, comment, false); + } else if (mr.me.ctbEntry.commentLen == 0) { + char comment[VARSTR_HEADER_SIZE + VARSTR_HEADER_SIZE] = {0}; + STR_TO_VARSTR(comment, ""); + colDataAppend(pColInfoData, numOfRows, comment, false); + } else { + colDataAppendNULL(pColInfoData, numOfRows); + } + + // uid + pColInfoData = taosArrayGet(p->pDataBlock, 5); + colDataAppend(pColInfoData, numOfRows, (char*)&mr.me.uid, false); + + // ttl + pColInfoData = taosArrayGet(p->pDataBlock, 7); + colDataAppend(pColInfoData, numOfRows, (char*)&mr.me.ctbEntry.ttlDays, false); + + STR_TO_VARSTR(n, "CHILD_TABLE"); + + } else if (tableType == TSDB_NORMAL_TABLE) { + // create time + pColInfoData = taosArrayGet(p->pDataBlock, 2); + colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.ntbEntry.ctime, false); + + // number of columns + pColInfoData = taosArrayGet(p->pDataBlock, 3); + colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.ntbEntry.schemaRow.nCols, false); + + // super table name + pColInfoData = taosArrayGet(p->pDataBlock, 4); + colDataAppendNULL(pColInfoData, numOfRows); + + // table comment + pColInfoData = taosArrayGet(p->pDataBlock, 8); + if (mr.me.ntbEntry.commentLen > 0) { + char comment[TSDB_TB_COMMENT_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_TO_VARSTR(comment, mr.me.ntbEntry.comment); + colDataAppend(pColInfoData, numOfRows, comment, false); + } else if (mr.me.ntbEntry.commentLen == 0) { + char comment[VARSTR_HEADER_SIZE + VARSTR_HEADER_SIZE] = {0}; + STR_TO_VARSTR(comment, ""); + colDataAppend(pColInfoData, numOfRows, comment, false); + } else { + colDataAppendNULL(pColInfoData, numOfRows); + } + + // uid + pColInfoData = taosArrayGet(p->pDataBlock, 5); + colDataAppend(pColInfoData, numOfRows, (char*)&mr.me.uid, false); + + // ttl + pColInfoData = taosArrayGet(p->pDataBlock, 7); + colDataAppend(pColInfoData, numOfRows, (char*)&mr.me.ntbEntry.ttlDays, false); + + STR_TO_VARSTR(n, "NORMAL_TABLE"); + // impl later + } + + metaReaderClear(&mr); + + pColInfoData = taosArrayGet(p->pDataBlock, 9); + colDataAppend(pColInfoData, numOfRows, n, false); + + if (++numOfRows >= pOperator->resultInfo.capacity) { + p->info.rows = numOfRows; + pInfo->pRes->info.rows = numOfRows; + + relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock, false); + doFilterResult(pInfo); + + blockDataCleanup(p); + numOfRows = 0; + + if (pInfo->pRes->info.rows > 0) { + break; + } + } + } + + if (numOfRows > 0) { + p->info.rows = numOfRows; + pInfo->pRes->info.rows = numOfRows; + + relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock, false); + doFilterResult(pInfo); + + blockDataCleanup(p); + numOfRows = 0; + } + + if (i >= taosArrayGetSize(pIdx->uids)) { + doSetOperatorCompleted(pOperator); + } else { + pIdx->lastIdx = i; + } + + blockDataDestroy(p); + + pInfo->loadInfo.totalRows += pInfo->pRes->info.rows; + return (pInfo->pRes->info.rows == 0) ? NULL : pInfo->pRes; +} +static SSDataBlock* sysTableBuildUserTables(SOperatorInfo* pOperator) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SSysTableScanInfo* pInfo = pOperator->info; + if (pInfo->pCur == NULL) { + pInfo->pCur = metaOpenTbCursor(pInfo->readHandle.meta); + } + + blockDataCleanup(pInfo->pRes); + int32_t numOfRows = 0; + + const char* db = NULL; + int32_t vgId = 0; + vnodeGetInfo(pInfo->readHandle.vnode, &db, &vgId); + + SName sn = {0}; + char dbname[TSDB_DB_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; + tNameFromString(&sn, db, T_NAME_ACCT | T_NAME_DB); + + tNameGetDbName(&sn, varDataVal(dbname)); + varDataSetLen(dbname, strlen(varDataVal(dbname))); + + SSDataBlock* p = buildInfoSchemaTableMetaBlock(TSDB_INS_TABLE_TABLES); + blockDataEnsureCapacity(p, pOperator->resultInfo.capacity); + + char n[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; + + int32_t ret = 0; + while ((ret = metaTbCursorNext(pInfo->pCur)) == 0) { + STR_TO_VARSTR(n, pInfo->pCur->mr.me.name); + + // table name + SColumnInfoData* pColInfoData = taosArrayGet(p->pDataBlock, 0); + colDataAppend(pColInfoData, numOfRows, n, false); + + // database name + pColInfoData = taosArrayGet(p->pDataBlock, 1); + colDataAppend(pColInfoData, numOfRows, dbname, false); + + // vgId + pColInfoData = taosArrayGet(p->pDataBlock, 6); + colDataAppend(pColInfoData, numOfRows, (char*)&vgId, false); + + int32_t tableType = pInfo->pCur->mr.me.type; + if (tableType == TSDB_CHILD_TABLE) { + // create time + int64_t ts = pInfo->pCur->mr.me.ctbEntry.ctime; + pColInfoData = taosArrayGet(p->pDataBlock, 2); + colDataAppend(pColInfoData, numOfRows, (char*)&ts, false); + + SMetaReader mr = {0}; + metaReaderInit(&mr, pInfo->readHandle.meta, META_READER_NOLOCK); + + uint64_t suid = pInfo->pCur->mr.me.ctbEntry.suid; + int32_t code = metaGetTableEntryByUid(&mr, suid); + if (code != TSDB_CODE_SUCCESS) { + qError("failed to get super table meta, cname:%s, suid:0x%" PRIx64 ", code:%s, %s", pInfo->pCur->mr.me.name, + suid, tstrerror(terrno), GET_TASKID(pTaskInfo)); + metaReaderClear(&mr); + metaCloseTbCursor(pInfo->pCur); + pInfo->pCur = NULL; + T_LONG_JMP(pTaskInfo->env, terrno); + } + + // number of columns + pColInfoData = taosArrayGet(p->pDataBlock, 3); + colDataAppend(pColInfoData, numOfRows, (char*)&mr.me.stbEntry.schemaRow.nCols, false); + + // super table name + STR_TO_VARSTR(n, mr.me.name); + pColInfoData = taosArrayGet(p->pDataBlock, 4); + colDataAppend(pColInfoData, numOfRows, n, false); + metaReaderClear(&mr); + + // table comment + pColInfoData = taosArrayGet(p->pDataBlock, 8); + if (pInfo->pCur->mr.me.ctbEntry.commentLen > 0) { + char comment[TSDB_TB_COMMENT_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_TO_VARSTR(comment, pInfo->pCur->mr.me.ctbEntry.comment); + colDataAppend(pColInfoData, numOfRows, comment, false); + } else if (pInfo->pCur->mr.me.ctbEntry.commentLen == 0) { + char comment[VARSTR_HEADER_SIZE + VARSTR_HEADER_SIZE] = {0}; + STR_TO_VARSTR(comment, ""); + colDataAppend(pColInfoData, numOfRows, comment, false); + } else { + colDataAppendNULL(pColInfoData, numOfRows); + } + + // uid + pColInfoData = taosArrayGet(p->pDataBlock, 5); + colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.uid, false); + + // ttl + pColInfoData = taosArrayGet(p->pDataBlock, 7); + colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.ctbEntry.ttlDays, false); + + STR_TO_VARSTR(n, "CHILD_TABLE"); + } else if (tableType == TSDB_NORMAL_TABLE) { + // create time + pColInfoData = taosArrayGet(p->pDataBlock, 2); + colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.ntbEntry.ctime, false); + + // number of columns + pColInfoData = taosArrayGet(p->pDataBlock, 3); + colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.ntbEntry.schemaRow.nCols, false); + + // super table name + pColInfoData = taosArrayGet(p->pDataBlock, 4); + colDataAppendNULL(pColInfoData, numOfRows); + + // table comment + pColInfoData = taosArrayGet(p->pDataBlock, 8); + if (pInfo->pCur->mr.me.ntbEntry.commentLen > 0) { + char comment[TSDB_TB_COMMENT_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_TO_VARSTR(comment, pInfo->pCur->mr.me.ntbEntry.comment); + colDataAppend(pColInfoData, numOfRows, comment, false); + } else if (pInfo->pCur->mr.me.ntbEntry.commentLen == 0) { + char comment[VARSTR_HEADER_SIZE + VARSTR_HEADER_SIZE] = {0}; + STR_TO_VARSTR(comment, ""); + colDataAppend(pColInfoData, numOfRows, comment, false); + } else { + colDataAppendNULL(pColInfoData, numOfRows); + } + + // uid + pColInfoData = taosArrayGet(p->pDataBlock, 5); + colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.uid, false); + + // ttl + pColInfoData = taosArrayGet(p->pDataBlock, 7); + colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.ntbEntry.ttlDays, false); + + STR_TO_VARSTR(n, "NORMAL_TABLE"); + } + + pColInfoData = taosArrayGet(p->pDataBlock, 9); + colDataAppend(pColInfoData, numOfRows, n, false); + + if (++numOfRows >= pOperator->resultInfo.capacity) { + p->info.rows = numOfRows; + pInfo->pRes->info.rows = numOfRows; + + relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock, false); + doFilterResult(pInfo); + + blockDataCleanup(p); + numOfRows = 0; + + if (pInfo->pRes->info.rows > 0) { + break; + } + } + } + + if (numOfRows > 0) { + p->info.rows = numOfRows; + pInfo->pRes->info.rows = numOfRows; + + relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock, false); + doFilterResult(pInfo); + + blockDataCleanup(p); + numOfRows = 0; + } + + blockDataDestroy(p); + + // todo temporarily free the cursor here, the true reason why the free is not valid needs to be found + if (ret != 0) { + metaCloseTbCursor(pInfo->pCur); + pInfo->pCur = NULL; + doSetOperatorCompleted(pOperator); + } + + pInfo->loadInfo.totalRows += pInfo->pRes->info.rows; + return (pInfo->pRes->info.rows == 0) ? NULL : pInfo->pRes; +} + static SSDataBlock* sysTableScanUserTables(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SSysTableScanInfo* pInfo = pOperator->info; + + SNode* pCondition = pInfo->pCondition; if (pOperator->status == OP_EXEC_DONE) { return NULL; } @@ -2807,179 +3659,38 @@ static SSDataBlock* sysTableScanUserTables(SOperatorInfo* pOperator) { doSetOperatorCompleted(pOperator); return (pInfo->pRes->info.rows == 0) ? NULL : pInfo->pRes; } else { - if (pInfo->pCur == NULL) { - pInfo->pCur = metaOpenTbCursor(pInfo->readHandle.meta); - } + if (pInfo->showRewrite == false) { + if (pCondition != NULL && pInfo->pIdx == NULL) { + SSTabFltArg arg = {.pMeta = pInfo->readHandle.meta, .pVnode = pInfo->readHandle.vnode}; - blockDataCleanup(pInfo->pRes); - int32_t numOfRows = 0; + SSysTableIndex* idx = taosMemoryMalloc(sizeof(SSysTableIndex)); + idx->init = 0; + idx->uids = taosArrayInit(128, sizeof(int64_t)); + idx->lastIdx = 0; - const char* db = NULL; - int32_t vgId = 0; - vnodeGetInfo(pInfo->readHandle.vnode, &db, &vgId); + pInfo->pIdx = idx; // set idx arg - SName sn = {0}; - char dbname[TSDB_DB_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; - tNameFromString(&sn, db, T_NAME_ACCT | T_NAME_DB); - - tNameGetDbName(&sn, varDataVal(dbname)); - varDataSetLen(dbname, strlen(varDataVal(dbname))); - - SSDataBlock* p = buildInfoSchemaTableMetaBlock(TSDB_INS_TABLE_TABLES); - blockDataEnsureCapacity(p, pOperator->resultInfo.capacity); - - char n[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; - - int32_t ret = 0; - while ((ret = metaTbCursorNext(pInfo->pCur)) == 0) { - STR_TO_VARSTR(n, pInfo->pCur->mr.me.name); - - // table name - SColumnInfoData* pColInfoData = taosArrayGet(p->pDataBlock, 0); - colDataAppend(pColInfoData, numOfRows, n, false); - - // database name - pColInfoData = taosArrayGet(p->pDataBlock, 1); - colDataAppend(pColInfoData, numOfRows, dbname, false); - - // vgId - pColInfoData = taosArrayGet(p->pDataBlock, 6); - colDataAppend(pColInfoData, numOfRows, (char*)&vgId, false); - - int32_t tableType = pInfo->pCur->mr.me.type; - if (tableType == TSDB_CHILD_TABLE) { - // create time - int64_t ts = pInfo->pCur->mr.me.ctbEntry.ctime; - pColInfoData = taosArrayGet(p->pDataBlock, 2); - colDataAppend(pColInfoData, numOfRows, (char*)&ts, false); - - SMetaReader mr = {0}; - metaReaderInit(&mr, pInfo->readHandle.meta, META_READER_NOLOCK); - - uint64_t suid = pInfo->pCur->mr.me.ctbEntry.suid; - int32_t code = metaGetTableEntryByUid(&mr, suid); - if (code != TSDB_CODE_SUCCESS) { - qError("failed to get super table meta, cname:%s, suid:0x%" PRIx64 ", code:%s, %s", pInfo->pCur->mr.me.name, - suid, tstrerror(terrno), GET_TASKID(pTaskInfo)); - metaReaderClear(&mr); - metaCloseTbCursor(pInfo->pCur); - pInfo->pCur = NULL; - T_LONG_JMP(pTaskInfo->env, terrno); - } - - // number of columns - pColInfoData = taosArrayGet(p->pDataBlock, 3); - colDataAppend(pColInfoData, numOfRows, (char*)&mr.me.stbEntry.schemaRow.nCols, false); - - // super table name - STR_TO_VARSTR(n, mr.me.name); - pColInfoData = taosArrayGet(p->pDataBlock, 4); - colDataAppend(pColInfoData, numOfRows, n, false); - metaReaderClear(&mr); - - // table comment - pColInfoData = taosArrayGet(p->pDataBlock, 8); - if (pInfo->pCur->mr.me.ctbEntry.commentLen > 0) { - char comment[TSDB_TB_COMMENT_LEN + VARSTR_HEADER_SIZE] = {0}; - STR_TO_VARSTR(comment, pInfo->pCur->mr.me.ctbEntry.comment); - colDataAppend(pColInfoData, numOfRows, comment, false); - } else if (pInfo->pCur->mr.me.ctbEntry.commentLen == 0) { - char comment[VARSTR_HEADER_SIZE + VARSTR_HEADER_SIZE] = {0}; - STR_TO_VARSTR(comment, ""); - colDataAppend(pColInfoData, numOfRows, comment, false); - } else { - colDataAppendNULL(pColInfoData, numOfRows); - } - - // uid - pColInfoData = taosArrayGet(p->pDataBlock, 5); - colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.uid, false); - - // ttl - pColInfoData = taosArrayGet(p->pDataBlock, 7); - colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.ctbEntry.ttlDays, false); - - STR_TO_VARSTR(n, "CHILD_TABLE"); - } else if (tableType == TSDB_NORMAL_TABLE) { - // create time - pColInfoData = taosArrayGet(p->pDataBlock, 2); - colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.ntbEntry.ctime, false); - - // number of columns - pColInfoData = taosArrayGet(p->pDataBlock, 3); - colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.ntbEntry.schemaRow.nCols, false); - - // super table name - pColInfoData = taosArrayGet(p->pDataBlock, 4); - colDataAppendNULL(pColInfoData, numOfRows); - - // table comment - pColInfoData = taosArrayGet(p->pDataBlock, 8); - if (pInfo->pCur->mr.me.ntbEntry.commentLen > 0) { - char comment[TSDB_TB_COMMENT_LEN + VARSTR_HEADER_SIZE] = {0}; - STR_TO_VARSTR(comment, pInfo->pCur->mr.me.ntbEntry.comment); - colDataAppend(pColInfoData, numOfRows, comment, false); - } else if (pInfo->pCur->mr.me.ntbEntry.commentLen == 0) { - char comment[VARSTR_HEADER_SIZE + VARSTR_HEADER_SIZE] = {0}; - STR_TO_VARSTR(comment, ""); - colDataAppend(pColInfoData, numOfRows, comment, false); - } else { - colDataAppendNULL(pColInfoData, numOfRows); - } - - // uid - pColInfoData = taosArrayGet(p->pDataBlock, 5); - colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.uid, false); - - // ttl - pColInfoData = taosArrayGet(p->pDataBlock, 7); - colDataAppend(pColInfoData, numOfRows, (char*)&pInfo->pCur->mr.me.ntbEntry.ttlDays, false); - - STR_TO_VARSTR(n, "NORMAL_TABLE"); - } - - pColInfoData = taosArrayGet(p->pDataBlock, 9); - colDataAppend(pColInfoData, numOfRows, n, false); - - if (++numOfRows >= pOperator->resultInfo.capacity) { - p->info.rows = numOfRows; - pInfo->pRes->info.rows = numOfRows; - - relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock, false); - doFilterResult(pInfo); - - blockDataCleanup(p); - numOfRows = 0; - - if (pInfo->pRes->info.rows > 0) { - break; + int flt = optSysTabFilte(&arg, pCondition, idx->uids); + if (flt == 0) { + pInfo->pIdx->init = 1; + SSDataBlock* blk = sysTableBuildUserTablesByUids(pOperator); + return blk; + } else if (flt == -2) { + qDebug("%s failed to get sys table info by idx, empty result", GET_TASKID(pTaskInfo)); + return NULL; + } else if (flt == -1) { + // not idx + qDebug("%s failed to get sys table info by idx, scan sys table one by one", GET_TASKID(pTaskInfo)); } + } else if (pCondition != NULL && (pInfo->pIdx != NULL && pInfo->pIdx->init == 1)) { + SSDataBlock* blk = sysTableBuildUserTablesByUids(pOperator); + return blk; } } - if (numOfRows > 0) { - p->info.rows = numOfRows; - pInfo->pRes->info.rows = numOfRows; - - relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock, false); - doFilterResult(pInfo); - - blockDataCleanup(p); - numOfRows = 0; - } - - blockDataDestroy(p); - - // todo temporarily free the cursor here, the true reason why the free is not valid needs to be found - if (ret != 0) { - metaCloseTbCursor(pInfo->pCur); - pInfo->pCur = NULL; - doSetOperatorCompleted(pOperator); - } - - pInfo->loadInfo.totalRows += pInfo->pRes->info.rows; - return (pInfo->pRes->info.rows == 0) ? NULL : pInfo->pRes; + return sysTableBuildUserTables(pOperator); } + return NULL; } static SSDataBlock* sysTableScanUserSTables(SOperatorInfo* pOperator) { diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index e89cd5e537..6dda6488a0 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -1751,9 +1751,7 @@ void initStreamFunciton(SqlFunctionCtx* pCtx, int32_t numOfExpr) { } } -SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, - SSDataBlock* pResBlock, SInterval* pInterval, int32_t primaryTsSlotId, - STimeWindowAggSupp* pTwAggSupp, SIntervalPhysiNode* pPhyNode, +SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SIntervalPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, bool isStream) { SIntervalAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SIntervalAggOperatorInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); @@ -1761,46 +1759,63 @@ SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* goto _error; } + SSDataBlock* pResBlock = createResDataBlock(pPhyNode->window.node.pOutputDataBlockDesc); + initBasicInfo(&pInfo->binfo, pResBlock); + + SExprSupp* pSup = &pOperator->exprSupp; + pInfo->primaryTsIndex = ((SColumnNode*)pPhyNode->window.pTspk)->slotId; + + size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; + initResultSizeInfo(&pOperator->resultInfo, 4096); + + int32_t num = 0; + SExprInfo* pExprInfo = createExprInfo(pPhyNode->window.pFuncs, NULL, &num); + int32_t code = initAggInfo(pSup, &pInfo->aggSup, pExprInfo, num, keyBufSize, pTaskInfo->id.str); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + SInterval interval = {.interval = pPhyNode->interval, + .sliding = pPhyNode->sliding, + .intervalUnit = pPhyNode->intervalUnit, + .slidingUnit = pPhyNode->slidingUnit, + .offset = pPhyNode->offset, + .precision = ((SColumnNode*)pPhyNode->window.pTspk)->node.resType.precision}; + + STimeWindowAggSupp as = { + .waterMark = pPhyNode->window.watermark, + .calTrigger = pPhyNode->window.triggerType, + .maxTs = INT64_MIN, + }; + + ASSERT(as.calTrigger != STREAM_TRIGGER_MAX_DELAY); + pOperator->pTaskInfo = pTaskInfo; pInfo->win = pTaskInfo->window; pInfo->inputOrder = (pPhyNode->window.inputTsOrder == ORDER_ASC) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; pInfo->resultTsOrder = (pPhyNode->window.outputTsOrder == ORDER_ASC) ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; - pInfo->interval = *pInterval; + pInfo->interval = interval; pInfo->execModel = pTaskInfo->execModel; - pInfo->twAggSup = *pTwAggSupp; + pInfo->twAggSup = as; pInfo->pCondition = pPhyNode->window.node.pConditions; pInfo->binfo.mergeResultBlock = pPhyNode->window.mergeDataBlock; if (pPhyNode->window.pExprs != NULL) { int32_t numOfScalar = 0; SExprInfo* pScalarExprInfo = createExprInfo(pPhyNode->window.pExprs, NULL, &numOfScalar); - int32_t code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar); + code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar); if (code != TSDB_CODE_SUCCESS) { goto _error; } } - pInfo->primaryTsIndex = primaryTsSlotId; - SExprSupp* pSup = &pOperator->exprSupp; - - size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; - initResultSizeInfo(&pOperator->resultInfo, 4096); - - int32_t code = initAggInfo(pSup, &pInfo->aggSup, pExprInfo, numOfCols, keyBufSize, pTaskInfo->id.str); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - initBasicInfo(&pInfo->binfo, pResBlock); - if (isStream) { - ASSERT(numOfCols > 0); + ASSERT(num > 0); initStreamFunciton(pSup->pCtx, pSup->numOfExprs); } initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pInfo->win); - - pInfo->timeWindowInterpo = timeWindowinterpNeeded(pSup->pCtx, numOfCols, pInfo); + pInfo->timeWindowInterpo = timeWindowinterpNeeded(pSup->pCtx, num, pInfo); if (pInfo->timeWindowInterpo) { pInfo->binfo.resultRowInfo.openWindow = tdListNew(sizeof(SOpenWindowInfo)); if (pInfo->binfo.resultRowInfo.openWindow == NULL) { @@ -1827,7 +1842,9 @@ SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* return pOperator; _error: - destroyIntervalOperatorInfo(pInfo); + if (pInfo != NULL) { + destroyIntervalOperatorInfo(pInfo); + } taosMemoryFreeClear(pOperator); pTaskInfo->code = code; return NULL; diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 7ad5c1365c..71fad2e27c 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -524,8 +524,6 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { while (1) { SSDataBlock* pDataBlock = getSortedBlockDataInner(pHandle, &pHandle->cmpParam, numOfRows); if (pDataBlock == NULL) { - taosArrayDestroy(pResList); - taosArrayDestroy(pPageIdList); break; } diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index d5d59beb47..dd5e4178bd 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -2988,8 +2988,6 @@ int32_t lastFunction(SqlFunctionCtx* pCtx) { static void firstLastTransferInfo(SqlFunctionCtx* pCtx, SFirstLastRes* pInput, SFirstLastRes* pOutput, bool isFirst, int32_t rowIndex) { - SInputColumnInfoData* pColInfo = &pCtx->input; - if (pOutput->hasResult) { if (isFirst) { if (pInput->ts > pOutput->ts) { diff --git a/source/libs/index/inc/indexFst.h b/source/libs/index/inc/indexFst.h index 4c5bca864a..ddeaee06ea 100644 --- a/source/libs/index/inc/indexFst.h +++ b/source/libs/index/inc/indexFst.h @@ -290,21 +290,21 @@ bool fstVerify(Fst* fst); // refactor this function bool fstBuilderNodeCompileTo(FstBuilderNode* b, IdxFstFile* wrt, CompiledAddr lastAddr, CompiledAddr startAddr); -typedef struct StreamState { +typedef struct FstStreamState { FstNode* node; uint64_t trans; FstOutput out; void* autState; -} StreamState; +} FstStreamState; -void streamStateDestroy(void* s); +void fstStreamStateDestroy(void* s); typedef struct FStmSt { Fst* fst; FAutoCtx* aut; SArray* inp; FstOutput emptyOutput; - SArray* stack; // + SArray* stack; // FstBoundWithData* endAt; } FStmSt; @@ -317,14 +317,14 @@ typedef struct FStmStRslt { FStmStRslt* swsResultCreate(FstSlice* data, FstOutput fOut, void* state); void swsResultDestroy(FStmStRslt* result); -typedef void* (*StreamCallback)(void*); +typedef void* (*streamCallback__fn)(void*); FStmSt* stmStCreate(Fst* fst, FAutoCtx* automation, FstBoundWithData* min, FstBoundWithData* max); void stmStDestroy(FStmSt* sws); bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min); -FStmStRslt* stmStNextWith(FStmSt* sws, StreamCallback callback); +FStmStRslt* stmStNextWith(FStmSt* sws, streamCallback__fn callback); FStmBuilder* stmBuilderCreate(Fst* fst, FAutoCtx* aut); diff --git a/source/libs/index/src/indexFst.c b/source/libs/index/src/indexFst.c index aed2ec3ee3..bbffcfa6c1 100644 --- a/source/libs/index/src/indexFst.c +++ b/source/libs/index/src/indexFst.c @@ -1165,7 +1165,7 @@ FStmSt* stmStCreate(Fst* fst, FAutoCtx* automation, FstBoundWithData* min, FstBo sws->emptyOutput.null = true; sws->emptyOutput.out = 0; - sws->stack = (SArray*)taosArrayInit(256, sizeof(StreamState)); + sws->stack = (SArray*)taosArrayInit(256, sizeof(FstStreamState)); sws->endAt = max; stmStSeekMin(sws, min); @@ -1177,7 +1177,7 @@ void stmStDestroy(FStmSt* sws) { } taosArrayDestroy(sws->inp); - taosArrayDestroyEx(sws->stack, streamStateDestroy); + taosArrayDestroyEx(sws->stack, fstStreamStateDestroy); taosMemoryFree(sws); } @@ -1188,10 +1188,10 @@ bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min) { if (fstBoundWithDataIsIncluded(min)) { sws->emptyOutput.out = fstEmptyFinalOutput(sws->fst, &(sws->emptyOutput.null)); } - StreamState s = {.node = fstGetRoot(sws->fst), - .trans = 0, - .out = {.null = false, .out = 0}, - .autState = automFuncs[aut->type].start(aut)}; // auto.start callback + FstStreamState s = {.node = fstGetRoot(sws->fst), + .trans = 0, + .out = {.null = false, .out = 0}, + .autState = automFuncs[aut->type].start(aut)}; // auto.start callback taosArrayPush(sws->stack, &s); return true; } @@ -1223,7 +1223,7 @@ bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min) { autState = automFuncs[aut->type].accept(aut, preState, b); taosArrayPush(sws->inp, &b); - StreamState s = {.node = node, .trans = res + 1, .out = {.null = false, .out = out}, .autState = preState}; + FstStreamState s = {.node = node, .trans = res + 1, .out = {.null = false, .out = out}, .autState = preState}; node = NULL; taosArrayPush(sws->stack, &s); @@ -1244,7 +1244,7 @@ bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min) { } } - StreamState s = {.node = node, .trans = i, .out = {.null = false, .out = out}, .autState = autState}; + FstStreamState s = {.node = node, .trans = i, .out = {.null = false, .out = out}, .autState = autState}; taosArrayPush(sws->stack, &s); taosMemoryFree(trans); return true; @@ -1255,7 +1255,7 @@ bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min) { uint32_t sz = taosArrayGetSize(sws->stack); if (sz != 0) { - StreamState* s = taosArrayGet(sws->stack, sz - 1); + FstStreamState* s = taosArrayGet(sws->stack, sz - 1); if (inclusize) { s->trans -= 1; taosArrayPop(sws->inp); @@ -1264,7 +1264,7 @@ bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min) { uint64_t trans = s->trans; FstTransition trn; fstNodeGetTransitionAt(n, trans - 1, &trn); - StreamState s = { + FstStreamState s = { .node = fstGetNode(sws->fst, trn.addr), .trans = 0, .out = {.null = false, .out = out}, .autState = autState}; taosArrayPush(sws->stack, &s); return true; @@ -1274,14 +1274,14 @@ bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min) { return false; } -FStmStRslt* stmStNextWith(FStmSt* sws, StreamCallback callback) { +FStmStRslt* stmStNextWith(FStmSt* sws, streamCallback__fn callback) { FAutoCtx* aut = sws->aut; FstOutput output = sws->emptyOutput; if (output.null == false) { FstSlice emptySlice = fstSliceCreate(NULL, 0); if (fstBoundWithDataExceededBy(sws->endAt, &emptySlice)) { - taosArrayDestroyEx(sws->stack, streamStateDestroy); - sws->stack = (SArray*)taosArrayInit(256, sizeof(StreamState)); + taosArrayDestroyEx(sws->stack, fstStreamStateDestroy); + sws->stack = (SArray*)taosArrayInit(256, sizeof(FstStreamState)); return NULL; } void* start = automFuncs[aut->type].start(aut); @@ -1292,12 +1292,12 @@ FStmStRslt* stmStNextWith(FStmSt* sws, StreamCallback callback) { } SArray* nodes = taosArrayInit(8, sizeof(FstNode*)); while (taosArrayGetSize(sws->stack) > 0) { - StreamState* p = (StreamState*)taosArrayPop(sws->stack); + FstStreamState* p = (FstStreamState*)taosArrayPop(sws->stack); if (p->trans >= FST_NODE_LEN(p->node) || !automFuncs[aut->type].canMatch(aut, p->autState)) { if (FST_NODE_ADDR(p->node) != fstGetRootAddr(sws->fst)) { taosArrayPop(sws->inp); } - streamStateDestroy(p); + fstStreamStateDestroy(p); continue; } FstTransition trn; @@ -1318,10 +1318,10 @@ FStmStRslt* stmStNextWith(FStmSt* sws, StreamCallback callback) { isMatch = automFuncs[aut->type].isMatch(aut, eofState); } } - StreamState s1 = {.node = p->node, .trans = p->trans + 1, .out = p->out, .autState = p->autState}; + FstStreamState s1 = {.node = p->node, .trans = p->trans + 1, .out = p->out, .autState = p->autState}; taosArrayPush(sws->stack, &s1); - StreamState s2 = {.node = nextNode, .trans = 0, .out = {.null = false, .out = out}, .autState = nextState}; + FstStreamState s2 = {.node = nextNode, .trans = 0, .out = {.null = false, .out = out}, .autState = nextState}; taosArrayPush(sws->stack, &s2); int32_t isz = taosArrayGetSize(sws->inp); @@ -1331,8 +1331,8 @@ FStmStRslt* stmStNextWith(FStmSt* sws, StreamCallback callback) { } FstSlice slice = fstSliceCreate(buf, isz); if (fstBoundWithDataExceededBy(sws->endAt, &slice)) { - taosArrayDestroyEx(sws->stack, streamStateDestroy); - sws->stack = (SArray*)taosArrayInit(256, sizeof(StreamState)); + taosArrayDestroyEx(sws->stack, fstStreamStateDestroy); + sws->stack = (SArray*)taosArrayInit(256, sizeof(FstStreamState)); taosMemoryFreeClear(buf); fstSliceDestroy(&slice); taosArrayDestroy(nodes); @@ -1375,11 +1375,11 @@ void swsResultDestroy(FStmStRslt* result) { taosMemoryFree(result); } -void streamStateDestroy(void* s) { +void fstStreamStateDestroy(void* s) { if (NULL == s) { return; } - StreamState* ss = (StreamState*)s; + FstStreamState* ss = (FstStreamState*)s; fstNodeDestroy(ss->node); } diff --git a/source/libs/index/src/indexJson.c b/source/libs/index/src/indexJson.c index 8ce625dfb9..32b794cb71 100644 --- a/source/libs/index/src/indexJson.c +++ b/source/libs/index/src/indexJson.c @@ -30,7 +30,7 @@ int indexJsonPut(SIndexJson *index, SIndexJsonMultiTerm *terms, uint64_t uid) { } else { p->colType = TSDB_DATA_TYPE_DOUBLE; } - IDX_TYPE_ADD_EXTERN_TYPE(p->colType, TSDB_DATA_TYPE_JSON); + IDX_TYPE_ADD_EXTERN_TYPE((p->colType), TSDB_DATA_TYPE_JSON); } // handle put return indexPut(index, terms, uid); diff --git a/source/libs/sync/inc/syncAppendEntries.h b/source/libs/sync/inc/syncAppendEntries.h index e15c85d73b..dc40e2fc72 100644 --- a/source/libs/sync/inc/syncAppendEntries.h +++ b/source/libs/sync/inc/syncAppendEntries.h @@ -92,9 +92,8 @@ extern "C" { // /\ UNCHANGED <> // /\ UNCHANGED <> // -int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg); -int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg); -int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg); + +int32_t syncNodeOnAppendEntries(SSyncNode* ths, SyncAppendEntries* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncAppendEntriesReply.h b/source/libs/sync/inc/syncAppendEntriesReply.h index 03148252fb..0227d832fc 100644 --- a/source/libs/sync/inc/syncAppendEntriesReply.h +++ b/source/libs/sync/inc/syncAppendEntriesReply.h @@ -40,9 +40,7 @@ extern "C" { // /\ Discard(m) // /\ UNCHANGED <> // -int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); -int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); -int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); +int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, SyncAppendEntriesReply* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncElection.h b/source/libs/sync/inc/syncElection.h index 128dbf4050..9ccd9dd28f 100644 --- a/source/libs/sync/inc/syncElection.h +++ b/source/libs/sync/inc/syncElection.h @@ -37,12 +37,10 @@ extern "C" { // msource |-> i, // mdest |-> j]) // /\ UNCHANGED <> -// -int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode); -int32_t syncNodeRequestVotePeersSnapshot(SSyncNode* pSyncNode); int32_t syncNodeElect(SSyncNode* pSyncNode); -int32_t syncNodeRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg); +int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode); +int32_t syncNodeSendRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncEnv.h b/source/libs/sync/inc/syncEnv.h index e6d2bd4920..2a37e000e2 100644 --- a/source/libs/sync/inc/syncEnv.h +++ b/source/libs/sync/inc/syncEnv.h @@ -28,13 +28,13 @@ extern "C" { #include "trpc.h" #include "ttimer.h" -#define TIMER_MAX_MS 0x7FFFFFFF -#define ENV_TICK_TIMER_MS 1000 -#define PING_TIMER_MS 5000 -#define ELECT_TIMER_MS_MIN 5000 -#define ELECT_TIMER_MS_MAX (ELECT_TIMER_MS_MIN * 2) +#define TIMER_MAX_MS 0x7FFFFFFF +#define ENV_TICK_TIMER_MS 1000 +#define PING_TIMER_MS 5000 +#define ELECT_TIMER_MS_MIN 5000 +#define ELECT_TIMER_MS_MAX (ELECT_TIMER_MS_MIN * 2) #define ELECT_TIMER_MS_RANGE (ELECT_TIMER_MS_MAX - ELECT_TIMER_MS_MIN) -#define HEARTBEAT_TIMER_MS 900 +#define HEARTBEAT_TIMER_MS 900 #define EMPTY_RAFT_ID ((SRaftId){.addr = 0, .vgId = 0}) diff --git a/source/libs/sync/inc/syncIO.h b/source/libs/sync/inc/syncIO.h index 68d2334101..cfc4dd2472 100644 --- a/source/libs/sync/inc/syncIO.h +++ b/source/libs/sync/inc/syncIO.h @@ -56,9 +56,8 @@ typedef struct SSyncIO { int32_t (*FpOnSyncAppendEntries)(SSyncNode *pSyncNode, SyncAppendEntries *pMsg); int32_t (*FpOnSyncAppendEntriesReply)(SSyncNode *pSyncNode, SyncAppendEntriesReply *pMsg); int32_t (*FpOnSyncTimeout)(SSyncNode *pSyncNode, SyncTimeout *pMsg); - - int32_t (*FpOnSyncSnapshotSend)(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg); - int32_t (*FpOnSyncSnapshotRsp)(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg); + int32_t (*FpOnSyncSnapshot)(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg); + int32_t (*FpOnSyncSnapshotReply)(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg); int8_t isStart; diff --git a/source/libs/sync/inc/syncIndexMgr.h b/source/libs/sync/inc/syncIndexMgr.h index fb85b89419..e8f17537b4 100644 --- a/source/libs/sync/inc/syncIndexMgr.h +++ b/source/libs/sync/inc/syncIndexMgr.h @@ -45,8 +45,8 @@ void syncIndexMgrDestroy(SSyncIndexMgr *pSyncIndexMgr); void syncIndexMgrClear(SSyncIndexMgr *pSyncIndexMgr); void syncIndexMgrSetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, SyncIndex index); SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId); -cJSON *syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr); -char *syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr); +cJSON * syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr); +char * syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr); void syncIndexMgrSetStartTime(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, int64_t startTime); int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId); diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 4e2a8647b2..a158430a0f 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -57,9 +57,37 @@ typedef struct SRaftCfg SRaftCfg; typedef struct SSyncRespMgr SSyncRespMgr; typedef struct SSyncSnapshotSender SSyncSnapshotSender; typedef struct SSyncSnapshotReceiver SSyncSnapshotReceiver; +typedef struct SSyncTimer SSyncTimer; +typedef struct SSyncHbTimerData SSyncHbTimerData; extern bool gRaftDetailLog; +typedef struct SSyncHbTimerData { + SSyncNode* pSyncNode; + SSyncTimer* pTimer; + SRaftId destId; + uint64_t logicClock; +} SSyncHbTimerData; + +typedef struct SSyncTimer { + void* pTimer; + TAOS_TMR_CALLBACK timerCb; + uint64_t logicClock; + uint64_t counter; + int32_t timerMS; + SRaftId destId; + void* pData; +} SSyncTimer; + +int32_t syncHbTimerInit(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer, SRaftId destId); +int32_t syncHbTimerStart(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer); +int32_t syncHbTimerStop(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer); + +typedef struct SPeerState { + SyncIndex lastSendIndex; + int64_t lastSendTime; +} SPeerState; + typedef struct SSyncNode { // init by SSyncInfo SyncGroupId vgId; @@ -73,6 +101,7 @@ typedef struct SSyncNode { const SMsgCb* msgcb; int32_t (*FpSendMsg)(const SEpSet* pEpSet, SRpcMsg* pMsg); int32_t (*FpEqMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg); + int32_t (*FpEqCtrlMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg); // init internal SNodeInfo myNodeInfo; @@ -138,6 +167,9 @@ typedef struct SSyncNode { TAOS_TMR_CALLBACK FpHeartbeatTimerCB; // Timer Fp uint64_t heartbeatTimerCounter; + // peer heartbeat timer + SSyncTimer peerHeartbeatTimerArr[TSDB_MAX_REPLICA]; + // callback FpOnPingCb FpOnPing; FpOnPingReplyCb FpOnPingReply; @@ -147,8 +179,8 @@ typedef struct SSyncNode { FpOnRequestVoteReplyCb FpOnRequestVoteReply; FpOnAppendEntriesCb FpOnAppendEntries; FpOnAppendEntriesReplyCb FpOnAppendEntriesReply; - FpOnSnapshotSendCb FpOnSnapshotSend; - FpOnSnapshotRspCb FpOnSnapshotRsp; + FpOnSnapshotCb FpOnSnapshot; + FpOnSnapshotReplyCb FpOnSnapshotReply; // tools SSyncRespMgr* pSyncRespMgr; @@ -159,9 +191,15 @@ typedef struct SSyncNode { SSyncSnapshotSender* senders[TSDB_MAX_REPLICA]; SSyncSnapshotReceiver* pNewNodeReceiver; + SPeerState peerStates[TSDB_MAX_REPLICA]; + // is config changing bool changing; + int64_t snapshottingIndex; + int64_t snapshottingTime; + int64_t minMatchIndex; + int64_t startTime; int64_t leaderTime; int64_t lastReplicateTime; @@ -174,7 +212,6 @@ void syncNodeStart(SSyncNode* pSyncNode); void syncNodeStartStandBy(SSyncNode* pSyncNode); void syncNodeClose(SSyncNode* pSyncNode); int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak); -int32_t syncNodeProposeBatch(SSyncNode* pSyncNode, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize); // option bool syncNodeSnapshotEnable(SSyncNode* pSyncNode); @@ -197,23 +234,21 @@ int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms); int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode); int32_t syncNodeStartHeartbeatTimer(SSyncNode* pSyncNode); -int32_t syncNodeStartHeartbeatTimerNow(SSyncNode* pSyncNode); -int32_t syncNodeStartHeartbeatTimerMS(SSyncNode* pSyncNode, int32_t ms); int32_t syncNodeStopHeartbeatTimer(SSyncNode* pSyncNode); int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode); -int32_t syncNodeRestartHeartbeatTimerNow(SSyncNode* pSyncNode); -int32_t syncNodeRestartNowHeartbeatTimerMS(SSyncNode* pSyncNode, int32_t ms); // utils -------------- -int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pSyncNode, SRpcMsg* pMsg); -int32_t syncNodeSendMsgByInfo(const SNodeInfo* nodeInfo, SSyncNode* pSyncNode, SRpcMsg* pMsg); -cJSON* syncNode2Json(const SSyncNode* pSyncNode); -char* syncNode2Str(const SSyncNode* pSyncNode); -void syncNodeEventLog(const SSyncNode* pSyncNode, char* str); -void syncNodeErrorLog(const SSyncNode* pSyncNode, char* str); -char* syncNode2SimpleStr(const SSyncNode* pSyncNode); -bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config); -void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex); +int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pSyncNode, SRpcMsg* pMsg); +int32_t syncNodeSendMsgByInfo(const SNodeInfo* nodeInfo, SSyncNode* pSyncNode, SRpcMsg* pMsg); +cJSON* syncNode2Json(const SSyncNode* pSyncNode); +char* syncNode2Str(const SSyncNode* pSyncNode); +void syncNodeEventLog(const SSyncNode* pSyncNode, char* str); +void syncNodeErrorLog(const SSyncNode* pSyncNode, char* str); +char* syncNode2SimpleStr(const SSyncNode* pSyncNode); +bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config); +void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex); +SyncIndex syncMinMatchIndex(SSyncNode* pSyncNode); +char* syncNodePeerState2Str(const SSyncNode* pSyncNode); SSyncNode* syncNodeAcquire(int64_t rid); void syncNodeRelease(SSyncNode* pNode); @@ -221,6 +256,7 @@ void syncNodeRelease(SSyncNode* pNode); // raft state change -------------- void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term); void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term); +void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm); void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr); void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr); @@ -240,21 +276,23 @@ void syncNodeMaybeUpdateCommitBySnapshot(SSyncNode* pSyncNode); SyncIndex syncNodeGetLastIndex(const SSyncNode* pSyncNode); SyncTerm syncNodeGetLastTerm(SSyncNode* pSyncNode); int32_t syncNodeGetLastIndexTerm(SSyncNode* pSyncNode, SyncIndex* pLastIndex, SyncTerm* pLastTerm); - SyncIndex syncNodeSyncStartIndex(SSyncNode* pSyncNode); - SyncIndex syncNodeGetPreIndex(SSyncNode* pSyncNode, SyncIndex index); SyncTerm syncNodeGetPreTerm(SSyncNode* pSyncNode, SyncIndex index); int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex* pPreIndex, SyncTerm* pPreTerm); bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg); -int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag); +int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag); +int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex); int32_t syncNodePreCommit(SSyncNode* ths, SSyncRaftEntry* pEntry, int32_t code); int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg); bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId); SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId); +SSyncTimer* syncNodeGetHbTimer(SSyncNode* ths, SRaftId* pDestId); +SPeerState* syncNodeGetPeerState(SSyncNode* ths, const SRaftId* pDestId); +bool syncNodeNeedSendAppendEntries(SSyncNode* ths, const SRaftId* pDestId, const SyncAppendEntries* pMsg); int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta); int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta); @@ -271,7 +309,12 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode); +bool syncNodeIsMnode(SSyncNode* pSyncNode); +int32_t syncNodePeerStateInit(SSyncNode* pSyncNode); + // trace log +void syncLogRecvTimer(SSyncNode* pSyncNode, const SyncTimeout* pMsg, const char* s); + void syncLogSendRequestVote(SSyncNode* pSyncNode, const SyncRequestVote* pMsg, const char* s); void syncLogRecvRequestVote(SSyncNode* pSyncNode, const SyncRequestVote* pMsg, const char* s); diff --git a/source/libs/sync/inc/syncRaftCfg.h b/source/libs/sync/inc/syncRaftCfg.h index ba0f973815..e193e16c02 100644 --- a/source/libs/sync/inc/syncRaftCfg.h +++ b/source/libs/sync/inc/syncRaftCfg.h @@ -45,8 +45,8 @@ int32_t raftCfgIndexClose(SRaftCfgIndex *pRaftCfgIndex); int32_t raftCfgIndexPersist(SRaftCfgIndex *pRaftCfgIndex); int32_t raftCfgIndexAddConfigIndex(SRaftCfgIndex *pRaftCfgIndex, SyncIndex configIndex); -cJSON *raftCfgIndex2Json(SRaftCfgIndex *pRaftCfgIndex); -char *raftCfgIndex2Str(SRaftCfgIndex *pRaftCfgIndex); +cJSON * raftCfgIndex2Json(SRaftCfgIndex *pRaftCfgIndex); +char * raftCfgIndex2Str(SRaftCfgIndex *pRaftCfgIndex); int32_t raftCfgIndexFromJson(const cJSON *pRoot, SRaftCfgIndex *pRaftCfgIndex); int32_t raftCfgIndexFromStr(const char *s, SRaftCfgIndex *pRaftCfgIndex); @@ -73,14 +73,14 @@ int32_t raftCfgClose(SRaftCfg *pRaftCfg); int32_t raftCfgPersist(SRaftCfg *pRaftCfg); int32_t raftCfgAddConfigIndex(SRaftCfg *pRaftCfg, SyncIndex configIndex); -cJSON *syncCfg2Json(SSyncCfg *pSyncCfg); -char *syncCfg2Str(SSyncCfg *pSyncCfg); -char *syncCfg2SimpleStr(SSyncCfg *pSyncCfg); +cJSON * syncCfg2Json(SSyncCfg *pSyncCfg); +char * syncCfg2Str(SSyncCfg *pSyncCfg); +char * syncCfg2SimpleStr(SSyncCfg *pSyncCfg); int32_t syncCfgFromJson(const cJSON *pRoot, SSyncCfg *pSyncCfg); int32_t syncCfgFromStr(const char *s, SSyncCfg *pSyncCfg); -cJSON *raftCfg2Json(SRaftCfg *pRaftCfg); -char *raftCfg2Str(SRaftCfg *pRaftCfg); +cJSON * raftCfg2Json(SRaftCfg *pRaftCfg); +char * raftCfg2Str(SRaftCfg *pRaftCfg); int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg); int32_t raftCfgFromStr(const char *s, SRaftCfg *pRaftCfg); diff --git a/source/libs/sync/inc/syncReplication.h b/source/libs/sync/inc/syncReplication.h index edce124ee5..4f15a45cec 100644 --- a/source/libs/sync/inc/syncReplication.h +++ b/source/libs/sync/inc/syncReplication.h @@ -50,16 +50,15 @@ extern "C" { // msource |-> i, // mdest |-> j]) // /\ UNCHANGED <> -// -int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode); -int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode); -int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode); -int32_t syncNodeAppendEntriesOnePeer(SSyncNode* pSyncNode, SRaftId* pDestId, SyncIndex nextIndex); +int32_t syncNodeHeartbeatPeers(SSyncNode* pSyncNode); +int32_t syncNodeSendHeartbeat(SSyncNode* pSyncNode, const SRaftId* pDestId, const SyncHeartbeat* pMsg); -int32_t syncNodeReplicate(SSyncNode* pSyncNode, bool isTimer); -int32_t syncNodeAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg); -int32_t syncNodeAppendEntriesBatch(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntriesBatch* pMsg); +int32_t syncNodeReplicate(SSyncNode* pSyncNode); +int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId); + +int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* pDestId, const SyncAppendEntries* pMsg); +int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* pDestId, const SyncAppendEntries* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncRequestVote.h b/source/libs/sync/inc/syncRequestVote.h index 3fe8dc0237..73b4a0efae 100644 --- a/source/libs/sync/inc/syncRequestVote.h +++ b/source/libs/sync/inc/syncRequestVote.h @@ -49,8 +49,7 @@ extern "C" { // m) // /\ UNCHANGED <> // -int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg); -int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg); +int32_t syncNodeOnRequestVote(SSyncNode* ths, SyncRequestVote* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncRequestVoteReply.h b/source/libs/sync/inc/syncRequestVoteReply.h index ac47a8d026..6bef18405c 100644 --- a/source/libs/sync/inc/syncRequestVoteReply.h +++ b/source/libs/sync/inc/syncRequestVoteReply.h @@ -44,8 +44,7 @@ extern "C" { // /\ Discard(m) // /\ UNCHANGED <> // -int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg); -int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg); +int32_t syncNodeOnRequestVoteReply(SSyncNode* ths, SyncRequestVoteReply* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncRespMgr.h b/source/libs/sync/inc/syncRespMgr.h index 28978af77e..22e1005e5c 100644 --- a/source/libs/sync/inc/syncRespMgr.h +++ b/source/libs/sync/inc/syncRespMgr.h @@ -32,9 +32,9 @@ typedef struct SRespStub { } SRespStub; typedef struct SSyncRespMgr { - SHashObj *pRespHash; + SHashObj * pRespHash; int64_t ttl; - void *data; + void * data; TdThreadMutex mutex; uint64_t seqNum; } SSyncRespMgr; diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index 6fb558e45c..b8b7af2dda 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -28,10 +28,10 @@ extern "C" { #include "syncMessage.h" #include "taosdef.h" -#define SYNC_SNAPSHOT_SEQ_INVALID -1 +#define SYNC_SNAPSHOT_SEQ_INVALID -1 #define SYNC_SNAPSHOT_SEQ_FORCE_CLOSE -2 -#define SYNC_SNAPSHOT_SEQ_BEGIN 0 -#define SYNC_SNAPSHOT_SEQ_END 0x7FFFFFFF +#define SYNC_SNAPSHOT_SEQ_BEGIN 0 +#define SYNC_SNAPSHOT_SEQ_END 0x7FFFFFFF #define SYNC_SNAPSHOT_RETRY_MS 5000 @@ -51,6 +51,7 @@ typedef struct SSyncSnapshotSender { int32_t replicaIndex; SyncTerm term; SyncTerm privateTerm; + int64_t startTime; bool finish; } SSyncSnapshotSender; @@ -67,6 +68,8 @@ cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender); char *snapshotSender2Str(SSyncSnapshotSender *pSender); char *snapshotSender2SimpleStr(SSyncSnapshotSender *pSender, char *event); +int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId); + //--------------------------------------------------- typedef struct SSyncSnapshotReceiver { bool start; @@ -94,8 +97,8 @@ char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event) //--------------------------------------------------- // on message -int32_t syncNodeOnSnapshotSendCb(SSyncNode *ths, SyncSnapshotSend *pMsg); -int32_t syncNodeOnSnapshotRspCb(SSyncNode *ths, SyncSnapshotRsp *pMsg); +int32_t syncNodeOnSnapshot(SSyncNode *ths, SyncSnapshotSend *pMsg); +int32_t syncNodeOnSnapshotReply(SSyncNode *ths, SyncSnapshotRsp *pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncTimeout.h b/source/libs/sync/inc/syncTimeout.h index 25c26c909d..112a3d8610 100644 --- a/source/libs/sync/inc/syncTimeout.h +++ b/source/libs/sync/inc/syncTimeout.h @@ -39,7 +39,7 @@ extern "C" { // /\ voterLog' = [voterLog EXCEPT ![i] = [j \in {} |-> <<>>]] // /\ UNCHANGED <> // -int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg); +int32_t syncNodeOnTimer(SSyncNode* ths, SyncTimeout* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 939dcac3d6..170a57a7a9 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -89,240 +89,6 @@ // /\ UNCHANGED <> // -int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { - int32_t ret = 0; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - syncLogRecvAppendEntries(ths, pMsg, "maybe replica already dropped"); - return -1; - } - - // maybe update term - if (pMsg->term > ths->pRaftStore->currentTerm) { - syncNodeUpdateTerm(ths, pMsg->term); - } - ASSERT(pMsg->term <= ths->pRaftStore->currentTerm); - - // reset elect timer - if (pMsg->term == ths->pRaftStore->currentTerm) { - ths->leaderCache = pMsg->srcId; - syncNodeResetElectTimer(ths); - } - ASSERT(pMsg->dataLen >= 0); - - // return to follower state - if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE) { - syncLogRecvAppendEntries(ths, pMsg, "candidate to follower"); - syncNodeBecomeFollower(ths, "from candidate by append entries"); - return -1; // ret or reply? - } - - SyncTerm localPreLogTerm = 0; - if (pMsg->prevLogIndex >= SYNC_INDEX_BEGIN && pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) { - SSyncRaftEntry* pEntry = ths->pLogStore->getEntry(ths->pLogStore, pMsg->prevLogIndex); - if (pEntry == NULL) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "getEntry error, index:%" PRId64 ", since %s", pMsg->prevLogIndex, terrstr()); - syncNodeErrorLog(ths, logBuf); - return -1; - } - - localPreLogTerm = pEntry->term; - syncEntryDestory(pEntry); - } - - bool logOK = - (pMsg->prevLogIndex == SYNC_INDEX_INVALID) || - ((pMsg->prevLogIndex >= SYNC_INDEX_BEGIN) && - (pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) && (pMsg->prevLogTerm == localPreLogTerm)); - - // reject request - if ((pMsg->term < ths->pRaftStore->currentTerm) || - ((pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK)) { - syncLogRecvAppendEntries(ths, pMsg, "reject"); - - SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; - pReply->success = false; - pReply->matchIndex = SYNC_INDEX_INVALID; - pReply->startTime = ths->startTime; - - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - SRpcMsg rpcMsg; - syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); - syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); - syncAppendEntriesReplyDestroy(pReply); - - return ret; - } - - // accept request - if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_FOLLOWER && logOK) { - // preIndex = -1, or has preIndex entry in local log - ASSERT(pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)); - - // has extra entries (> preIndex) in local log - bool hasExtraEntries = pMsg->prevLogIndex < ths->pLogStore->getLastIndex(ths->pLogStore); - - // has entries in SyncAppendEntries msg - bool hasAppendEntries = pMsg->dataLen > 0; - - syncLogRecvAppendEntries(ths, pMsg, "accept"); - - if (hasExtraEntries && hasAppendEntries) { - // not conflict by default - bool conflict = false; - - SyncIndex extraIndex = pMsg->prevLogIndex + 1; - SSyncRaftEntry* pExtraEntry = ths->pLogStore->getEntry(ths->pLogStore, extraIndex); - if (pExtraEntry == NULL) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "getEntry error2, index:%" PRId64 ", since %s", extraIndex, terrstr()); - syncNodeErrorLog(ths, logBuf); - return -1; - } - - SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); - if (pAppendEntry == NULL) { - syncNodeErrorLog(ths, "syncEntryDeserialize pAppendEntry error"); - return -1; - } - - // log not match, conflict - ASSERT(extraIndex == pAppendEntry->index); - if (pExtraEntry->term != pAppendEntry->term) { - conflict = true; - } - - if (conflict) { - // roll back - SyncIndex delBegin = ths->pLogStore->getLastIndex(ths->pLogStore); - SyncIndex delEnd = extraIndex; - - sTrace("syncNodeOnAppendEntriesCb --> conflict:%d, delBegin:%" PRId64 ", delEnd:%" PRId64, conflict, delBegin, - delEnd); - - // notice! reverse roll back! - for (SyncIndex index = delEnd; index >= delBegin; --index) { - if (ths->pFsm->FpRollBackCb != NULL) { - SSyncRaftEntry* pRollBackEntry = ths->pLogStore->getEntry(ths->pLogStore, index); - if (pRollBackEntry == NULL) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "getEntry error3, index:%" PRId64 ", since %s", index, terrstr()); - syncNodeErrorLog(ths, logBuf); - return -1; - } - - // if (pRollBackEntry->msgType != TDMT_SYNC_NOOP) { - if (syncUtilUserRollback(pRollBackEntry->msgType)) { - SRpcMsg rpcMsg; - syncEntry2OriginalRpc(pRollBackEntry, &rpcMsg); - - SFsmCbMeta cbMeta = {0}; - cbMeta.index = pRollBackEntry->index; - cbMeta.lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, cbMeta.index); - cbMeta.isWeak = pRollBackEntry->isWeak; - cbMeta.code = 0; - cbMeta.state = ths->state; - cbMeta.seqNum = pRollBackEntry->seqNum; - ths->pFsm->FpRollBackCb(ths->pFsm, &rpcMsg, cbMeta); - rpcFreeCont(rpcMsg.pCont); - } - - syncEntryDestory(pRollBackEntry); - } - } - - // delete confict entries - ths->pLogStore->truncate(ths->pLogStore, extraIndex); - - // append new entries - ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry); - - // pre commit - syncNodePreCommit(ths, pAppendEntry, 0); - } - - // free memory - syncEntryDestory(pExtraEntry); - syncEntryDestory(pAppendEntry); - - } else if (hasExtraEntries && !hasAppendEntries) { - // do nothing - - } else if (!hasExtraEntries && hasAppendEntries) { - SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); - if (pAppendEntry == NULL) { - syncNodeErrorLog(ths, "syncEntryDeserialize pAppendEntry2 error"); - return -1; - } - - // append new entries - ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry); - - // pre commit - syncNodePreCommit(ths, pAppendEntry, 0); - - // free memory - syncEntryDestory(pAppendEntry); - - } else if (!hasExtraEntries && !hasAppendEntries) { - // do nothing - - } else { - syncNodeLog3("", ths); - ASSERT(0); - } - - SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; - pReply->success = true; - - if (hasAppendEntries) { - pReply->matchIndex = pMsg->prevLogIndex + 1; - } else { - pReply->matchIndex = pMsg->prevLogIndex; - } - - pReply->startTime = ths->startTime; - - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - SRpcMsg rpcMsg; - syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); - syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); - syncAppendEntriesReplyDestroy(pReply); - - // maybe update commit index from leader - if (pMsg->commitIndex > ths->commitIndex) { - // has commit entry in local - if (pMsg->commitIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) { - SyncIndex beginIndex = ths->commitIndex + 1; - SyncIndex endIndex = pMsg->commitIndex; - - // update commit index - ths->commitIndex = pMsg->commitIndex; - - // call back Wal - ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex); - - int32_t code = syncNodeCommit(ths, beginIndex, endIndex, ths->state); - ASSERT(code == 0); - } - } - } - - return ret; -} - static int32_t syncNodeMakeLogSame(SSyncNode* ths, SyncAppendEntries* pMsg) { int32_t code; @@ -505,544 +271,222 @@ static bool syncNodeOnAppendEntriesLogOK(SSyncNode* pSyncNode, SyncAppendEntries return false; } -int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg) { - int32_t ret = 0; - int32_t code = 0; +int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) { + // maybe update commit index, leader notice me + if (newCommitIndex > ths->commitIndex) { + // has commit entry in local + if (newCommitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { + // advance commit index to sanpshot first + SSnapshot snapshot; + ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot); + if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) { + SyncIndex commitBegin = ths->commitIndex; + SyncIndex commitEnd = snapshot.lastApplyIndex; + ths->commitIndex = snapshot.lastApplyIndex; - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - syncLogRecvAppendEntriesBatch(ths, pMsg, "maybe replica already dropped"); - return -1; + char eventLog[128]; + snprintf(eventLog, sizeof(eventLog), "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, + commitEnd); + syncNodeEventLog(ths, eventLog); + } + + SyncIndex beginIndex = ths->commitIndex + 1; + SyncIndex endIndex = newCommitIndex; + + // update commit index + ths->commitIndex = newCommitIndex; + + // call back Wal + int32_t code = ths->pLogStore->syncLogUpdateCommitIndex(ths->pLogStore, ths->commitIndex); + ASSERT(code == 0); + + code = syncNodeDoCommit(ths, beginIndex, endIndex, ths->state); + ASSERT(code == 0); + } } - // maybe update term - if (pMsg->term > ths->pRaftStore->currentTerm) { - syncNodeUpdateTerm(ths, pMsg->term); - } - ASSERT(pMsg->term <= ths->pRaftStore->currentTerm); - - // reset elect timer - if (pMsg->term == ths->pRaftStore->currentTerm) { - ths->leaderCache = pMsg->srcId; - syncNodeResetElectTimer(ths); - } - ASSERT(pMsg->dataLen >= 0); - - // candidate to follower - // - // operation: - // to follower - do { - bool condition = pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE; - if (condition) { - syncLogRecvAppendEntriesBatch(ths, pMsg, "candidate to follower"); - syncNodeBecomeFollower(ths, "from candidate by append entries"); - return 0; // do not reply? - } - } while (0); - - // fake match - // - // condition1: - // preIndex <= my commit index - // - // operation: - // if hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex, append entry - // match my-commit-index or my-commit-index + batchSize - do { - bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && - (pMsg->prevLogIndex <= ths->commitIndex); - if (condition) { - syncLogRecvAppendEntriesBatch(ths, pMsg, "fake match"); - - SyncIndex matchIndex = ths->commitIndex; - bool hasAppendEntries = pMsg->dataLen > 0; - SOffsetAndContLen* metaTableArr = syncAppendEntriesBatchMetaTableArray(pMsg); - - if (hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex) { - int32_t pass = 0; - SyncIndex logLastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); - bool hasExtraEntries = logLastIndex > pMsg->prevLogIndex; - - // make log same - if (hasExtraEntries) { - // make log same, rollback deleted entries - pass = syncNodeDoMakeLogSame(ths, pMsg->prevLogIndex + 1); - ASSERT(pass >= 0); - } - - // append entry batch - if (pass == 0) { - // assert! no batch - ASSERT(pMsg->dataCount <= 1); - - for (int32_t i = 0; i < pMsg->dataCount; ++i) { - SSyncRaftEntry* pAppendEntry = (SSyncRaftEntry*)(pMsg->data + metaTableArr[i].offset); - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); - if (code != 0) { - sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno)); - return -1; - } - - code = syncNodePreCommit(ths, pAppendEntry, 0); - ASSERT(code == 0); - - // syncEntryDestory(pAppendEntry); - } - } - - // fsync once - SSyncLogStoreData* pData = ths->pLogStore->data; - SWal* pWal = pData->pWal; - walFsync(pWal, false); - - // update match index - matchIndex = pMsg->prevLogIndex + pMsg->dataCount; - } - - // prepare response msg - SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; - pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; - pReply->success = true; - pReply->matchIndex = matchIndex; - pReply->startTime = ths->startTime; - - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - // send response - SRpcMsg rpcMsg; - syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); - syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); - syncAppendEntriesReplyDestroy(pReply); - - return 0; - } - } while (0); - - // calculate logOK here, before will coredump, due to fake match - bool logOK = syncNodeOnAppendEntriesBatchLogOK(ths, pMsg); - - // not match - // - // condition1: - // term < myTerm - // - // condition2: - // !logOK - // - // operation: - // not match - // no operation on log - do { - bool condition1 = pMsg->term < ths->pRaftStore->currentTerm; - bool condition2 = - (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK; - bool condition = condition1 || condition2; - - if (condition) { - syncLogRecvAppendEntriesBatch(ths, pMsg, "not match"); - - // maybe update commit index by snapshot - syncNodeMaybeUpdateCommitBySnapshot(ths); - - // prepare response msg - SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; - pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; - pReply->success = false; - pReply->matchIndex = ths->commitIndex; - pReply->startTime = ths->startTime; - - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - // send response - SRpcMsg rpcMsg; - syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); - syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); - syncAppendEntriesReplyDestroy(pReply); - - return 0; - } - } while (0); - - // really match - // - // condition: - // logOK - // - // operation: - // match - // make log same - do { - bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && logOK; - if (condition) { - // has extra entries (> preIndex) in local log - SyncIndex myLastIndex = syncNodeGetLastIndex(ths); - bool hasExtraEntries = myLastIndex > pMsg->prevLogIndex; - - // has entries in SyncAppendEntries msg - bool hasAppendEntries = pMsg->dataLen > 0; - SOffsetAndContLen* metaTableArr = syncAppendEntriesBatchMetaTableArray(pMsg); - - syncLogRecvAppendEntriesBatch(ths, pMsg, "really match"); - - int32_t pass = 0; - - if (hasExtraEntries) { - // make log same, rollback deleted entries - pass = syncNodeDoMakeLogSame(ths, pMsg->prevLogIndex + 1); - ASSERT(pass >= 0); - } - - if (hasAppendEntries) { - // append entry batch - if (pass == 0) { - // assert! no batch - ASSERT(pMsg->dataCount <= 1); - - // append entry batch - for (int32_t i = 0; i < pMsg->dataCount; ++i) { - SSyncRaftEntry* pAppendEntry = (SSyncRaftEntry*)(pMsg->data + metaTableArr[i].offset); - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); - if (code != 0) { - sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno)); - return -1; - } - - code = syncNodePreCommit(ths, pAppendEntry, 0); - ASSERT(code == 0); - - // syncEntryDestory(pAppendEntry); - } - } - - // fsync once - SSyncLogStoreData* pData = ths->pLogStore->data; - SWal* pWal = pData->pWal; - walFsync(pWal, false); - } - - // prepare response msg - SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; - pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; - pReply->success = true; - pReply->matchIndex = hasAppendEntries ? pMsg->prevLogIndex + pMsg->dataCount : pMsg->prevLogIndex; - pReply->startTime = ths->startTime; - - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - // send response - SRpcMsg rpcMsg; - syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); - syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); - syncAppendEntriesReplyDestroy(pReply); - - // maybe update commit index, leader notice me - if (pMsg->commitIndex > ths->commitIndex) { - SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); - - SyncIndex beginIndex = 0; - SyncIndex endIndex = -1; - - // has commit entry in local - if (pMsg->commitIndex <= lastIndex) { - beginIndex = ths->commitIndex + 1; - endIndex = pMsg->commitIndex; - - // update commit index - ths->commitIndex = pMsg->commitIndex; - - // call back Wal - code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex); - ASSERT(code == 0); - - } else if (pMsg->commitIndex > lastIndex && ths->commitIndex < lastIndex) { - beginIndex = ths->commitIndex + 1; - endIndex = lastIndex; - - // update commit index, speed up - ths->commitIndex = lastIndex; - - // call back Wal - code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex); - ASSERT(code == 0); - } - - code = syncNodeCommit(ths, beginIndex, endIndex, ths->state); - ASSERT(code == 0); - } - - return 0; - } - } while (0); - return 0; } -int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg) { - int32_t ret = 0; - int32_t code = 0; - +int32_t syncNodeOnAppendEntries(SSyncNode* ths, SyncAppendEntries* pMsg) { // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - syncLogRecvAppendEntries(ths, pMsg, "maybe replica already dropped"); - return -1; + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { + syncLogRecvAppendEntries(ths, pMsg, "not in my config"); + goto _IGNORE; + } + + // prepare response msg + SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->success = false; + // pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); + pReply->matchIndex = SYNC_INDEX_INVALID; + pReply->lastSendIndex = pMsg->prevLogIndex + 1; + pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; + pReply->startTime = ths->startTime; + + if (pMsg->term < ths->pRaftStore->currentTerm) { + syncLogRecvAppendEntries(ths, pMsg, "reject, small term"); + goto _SEND_RESPONSE; } - // maybe update term if (pMsg->term > ths->pRaftStore->currentTerm) { - syncNodeUpdateTerm(ths, pMsg->term); + pReply->term = pMsg->term; } - ASSERT(pMsg->term <= ths->pRaftStore->currentTerm); - // reset elect timer - if (pMsg->term == ths->pRaftStore->currentTerm) { - ths->leaderCache = pMsg->srcId; - syncNodeResetElectTimer(ths); + syncNodeStepDown(ths, pMsg->term); + syncNodeResetElectTimer(ths); + + SyncIndex startIndex = ths->pLogStore->syncLogBeginIndex(ths->pLogStore); + SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); + + if (pMsg->prevLogIndex > lastIndex) { + syncLogRecvAppendEntries(ths, pMsg, "reject, index not match"); + goto _SEND_RESPONSE; } - ASSERT(pMsg->dataLen >= 0); - // candidate to follower - // - // operation: - // to follower - do { - bool condition = pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE; - if (condition) { - syncLogRecvAppendEntries(ths, pMsg, "candidate to follower"); - syncNodeBecomeFollower(ths, "from candidate by append entries"); - return 0; // do not reply? + if (pMsg->prevLogIndex >= startIndex) { + SyncTerm myPreLogTerm = syncNodeGetPreTerm(ths, pMsg->prevLogIndex + 1); + ASSERT(myPreLogTerm != SYNC_TERM_INVALID); + + if (myPreLogTerm != pMsg->prevLogTerm) { + syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term not match"); + goto _SEND_RESPONSE; } - } while (0); + } - // fake match - // - // condition1: - // preIndex <= my commit index - // - // operation: - // if hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex, append entry - // match my-commit-index or my-commit-index + 1 - // no operation on log - do { - bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && - (pMsg->prevLogIndex <= ths->commitIndex); - if (condition) { - syncLogRecvAppendEntries(ths, pMsg, "fake match"); + // accept + pReply->success = true; + bool hasAppendEntries = pMsg->dataLen > 0; + if (hasAppendEntries) { + SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); + ASSERT(pAppendEntry != NULL); - SyncIndex matchIndex = ths->commitIndex; - bool hasAppendEntries = pMsg->dataLen > 0; - if (hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex) { - // append entry - SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); - ASSERT(pAppendEntry != NULL); + SyncIndex appendIndex = pMsg->prevLogIndex + 1; + SSyncRaftEntry* pLocalEntry = NULL; + int32_t code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry); + if (code == 0) { + if (pLocalEntry->term == pAppendEntry->term) { + // do nothing - { - // has extra entries (> preIndex) in local log - SyncIndex logLastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); - bool hasExtraEntries = logLastIndex > pMsg->prevLogIndex; + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "log match, do nothing, index:%" PRId64, appendIndex); + syncNodeEventLog(ths, logBuf); - if (hasExtraEntries) { - // make log same, rollback deleted entries - code = syncNodeMakeLogSame(ths, pMsg); - ASSERT(code == 0); - } + } else { + // truncate + code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex); + if (code != 0) { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "ignore, truncate error, append-index:%" PRId64, appendIndex); + syncLogRecvAppendEntries(ths, pMsg, logBuf); + + goto _IGNORE; } + // append code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); if (code != 0) { - sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno)); - return -1; + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "ignore, append error, append-index:%" PRId64, appendIndex); + syncLogRecvAppendEntries(ths, pMsg, logBuf); + + goto _IGNORE; + } + } + + } else { + if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { + // log not exist + + // truncate + code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex); + if (code != 0) { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, truncate error, append-index:%" PRId64, appendIndex); + syncLogRecvAppendEntries(ths, pMsg, logBuf); + + goto _IGNORE; } - // pre commit - code = syncNodePreCommit(ths, pAppendEntry, 0); - ASSERT(code == 0); - - // update match index - matchIndex = pMsg->prevLogIndex + 1; - - syncEntryDestory(pAppendEntry); - } - - // prepare response msg - SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; - pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; - pReply->success = true; - pReply->matchIndex = matchIndex; - pReply->startTime = ths->startTime; - - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - // send response - SRpcMsg rpcMsg; - syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); - syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); - syncAppendEntriesReplyDestroy(pReply); - - return ret; - } - } while (0); - - // calculate logOK here, before will coredump, due to fake match - bool logOK = syncNodeOnAppendEntriesLogOK(ths, pMsg); - - // not match - // - // condition1: - // term < myTerm - // - // condition2: - // !logOK - // - // operation: - // not match - // no operation on log - do { - bool condition1 = pMsg->term < ths->pRaftStore->currentTerm; - bool condition2 = - (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK; - bool condition = condition1 || condition2; - - if (condition) { - syncLogRecvAppendEntries(ths, pMsg, "not match"); - - // prepare response msg - SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; - pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; - pReply->success = false; - pReply->matchIndex = SYNC_INDEX_INVALID; - pReply->startTime = ths->startTime; - - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - // send response - SRpcMsg rpcMsg; - syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); - syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); - syncAppendEntriesReplyDestroy(pReply); - - return ret; - } - } while (0); - - // really match - // - // condition: - // logOK - // - // operation: - // match - // make log same - do { - bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && logOK; - if (condition) { - // has extra entries (> preIndex) in local log - SyncIndex myLastIndex = syncNodeGetLastIndex(ths); - bool hasExtraEntries = myLastIndex > pMsg->prevLogIndex; - - // has entries in SyncAppendEntries msg - bool hasAppendEntries = pMsg->dataLen > 0; - - syncLogRecvAppendEntries(ths, pMsg, "really match"); - - if (hasExtraEntries) { - // make log same, rollback deleted entries - code = syncNodeMakeLogSame(ths, pMsg); - ASSERT(code == 0); - } - - if (hasAppendEntries) { - // append entry - SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); - ASSERT(pAppendEntry != NULL); - + // append code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); if (code != 0) { - sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno)); - return -1; + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, append error, append-index:%" PRId64, appendIndex); + syncLogRecvAppendEntries(ths, pMsg, logBuf); + + goto _IGNORE; } - // pre commit - code = syncNodePreCommit(ths, pAppendEntry, 0); + } else { + // error + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "ignore, get local entry error, append-index:%" PRId64, appendIndex); + syncLogRecvAppendEntries(ths, pMsg, logBuf); + + goto _IGNORE; + } + } + +#if 0 + if (code != 0 && terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { + code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex); + ASSERT(code == 0); + + code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); + ASSERT(code == 0); + + } else { + ASSERT(code == 0); + + if (pLocalEntry->term == pAppendEntry->term) { + // do nothing + } else { + code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex); ASSERT(code == 0); - syncEntryDestory(pAppendEntry); + code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); + ASSERT(code == 0); } - - // prepare response msg - SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; - pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; - pReply->success = true; - pReply->matchIndex = hasAppendEntries ? pMsg->prevLogIndex + 1 : pMsg->prevLogIndex; - pReply->startTime = ths->startTime; - - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - // send response - SRpcMsg rpcMsg; - syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); - syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); - syncAppendEntriesReplyDestroy(pReply); - - // maybe update commit index, leader notice me - if (pMsg->commitIndex > ths->commitIndex) { - // has commit entry in local - if (pMsg->commitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { - // advance commit index to sanpshot first - SSnapshot snapshot; - ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot); - if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) { - SyncIndex commitBegin = ths->commitIndex; - SyncIndex commitEnd = snapshot.lastApplyIndex; - ths->commitIndex = snapshot.lastApplyIndex; - - char eventLog[128]; - snprintf(eventLog, sizeof(eventLog), "commit by snapshot from index:%" PRId64 " to index:%" PRId64, - commitBegin, commitEnd); - syncNodeEventLog(ths, eventLog); - } - - SyncIndex beginIndex = ths->commitIndex + 1; - SyncIndex endIndex = pMsg->commitIndex; - - // update commit index - ths->commitIndex = pMsg->commitIndex; - - // call back Wal - code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex); - ASSERT(code == 0); - - code = syncNodeCommit(ths, beginIndex, endIndex, ths->state); - ASSERT(code == 0); - } - } - return ret; } - } while (0); +#endif - return ret; -} + // update match index + pReply->matchIndex = pAppendEntry->index; + + syncEntryDestory(pLocalEntry); + syncEntryDestory(pAppendEntry); + + } else { + // no append entries, do nothing + // maybe has extra entries, no harm + + // update match index + pReply->matchIndex = pMsg->prevLogIndex; + } + + // maybe update commit index, leader notice me + syncNodeFollowerCommit(ths, pMsg->commitIndex); + + syncLogRecvAppendEntries(ths, pMsg, "accept"); + goto _SEND_RESPONSE; + +_IGNORE: + syncAppendEntriesReplyDestroy(pReply); + return 0; + +_SEND_RESPONSE: + // msg event log + syncLogSendAppendEntriesReply(ths, pReply, ""); + + // send response + SRpcMsg rpcMsg; + syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncAppendEntriesReplyDestroy(pReply); + + return 0; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 9253ed0129..5e6c9f1534 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -20,6 +20,7 @@ #include "syncRaftCfg.h" #include "syncRaftLog.h" #include "syncRaftStore.h" +#include "syncReplication.h" #include "syncSnapshot.h" #include "syncUtil.h" #include "syncVoteMgr.h" @@ -37,74 +38,6 @@ // /\ Discard(m) // /\ UNCHANGED <> // -int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { - int32_t ret = 0; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - syncLogRecvAppendEntriesReply(ths, pMsg, "maybe replica already dropped"); - return -1; - } - - // drop stale response - if (pMsg->term < ths->pRaftStore->currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response"); - return 0; - } - - // no need this code, because if I receive reply.term, then I must have sent for that term. - // if (pMsg->term > ths->pRaftStore->currentTerm) { - // syncNodeUpdateTerm(ths, pMsg->term); - // } - - if (pMsg->term > ths->pRaftStore->currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "error term"); - return -1; - } - - ASSERT(pMsg->term == ths->pRaftStore->currentTerm); - - // update time - syncIndexMgrSetStartTime(ths->pNextIndex, &(pMsg->srcId), pMsg->startTime); - syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), taosGetTimestampMs()); - - SyncIndex beforeNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - SyncIndex beforeMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - - if (pMsg->success) { - // nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1] - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1); - - // matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex] - syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); - - // maybe commit - syncMaybeAdvanceCommitIndex(ths); - - } else { - SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - - // notice! int64, uint64 - if (nextIndex > SYNC_INDEX_BEGIN) { - --nextIndex; - } else { - nextIndex = SYNC_INDEX_BEGIN; - } - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); - } - - SyncIndex afterNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - SyncIndex afterMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - do { - char logBuf[256]; - snprintf(logBuf, sizeof(logBuf), - "before next:%" PRId64 ", match:%" PRId64 ", after next:%" PRId64 ", match:%" PRId64, beforeNextIndex, - beforeMatchIndex, afterNextIndex, afterMatchIndex); - syncLogRecvAppendEntriesReply(ths, pMsg, logBuf); - } while (0); - - return 0; -} // only start once static void syncNodeStartSnapshotOnce(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, SyncTerm lastApplyTerm, @@ -151,13 +84,13 @@ static void syncNodeStartSnapshotOnce(SSyncNode* ths, SyncIndex beginIndex, Sync } } -int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { +int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { int32_t ret = 0; // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - syncLogRecvAppendEntriesReply(ths, pMsg, "maybe replica already dropped"); - return -1; + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { + syncLogRecvAppendEntriesReply(ths, pMsg, "not in my config"); + return 0; } // drop stale response @@ -166,251 +99,40 @@ int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntrie return 0; } - // error term - if (pMsg->term > ths->pRaftStore->currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "error term"); - return -1; - } - - ASSERT(pMsg->term == ths->pRaftStore->currentTerm); - - // update time - syncIndexMgrSetStartTime(ths->pNextIndex, &(pMsg->srcId), pMsg->startTime); - syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), taosGetTimestampMs()); - - SyncIndex beforeNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - SyncIndex beforeMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - - if (pMsg->success) { - SyncIndex newNextIndex = pMsg->matchIndex + 1; - SyncIndex newMatchIndex = pMsg->matchIndex; - - bool needStartSnapshot = false; - if (newMatchIndex >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, newMatchIndex)) { - needStartSnapshot = true; + if (ths->state == TAOS_SYNC_STATE_LEADER) { + if (pMsg->term > ths->pRaftStore->currentTerm) { + syncLogRecvAppendEntriesReply(ths, pMsg, "error term"); + syncNodeStepDown(ths, pMsg->term); + return -1; } - if (!needStartSnapshot) { - // update next-index, match-index - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), newNextIndex); - syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), newMatchIndex); + ASSERT(pMsg->term == ths->pRaftStore->currentTerm); - // maybe commit - if (ths->state == TAOS_SYNC_STATE_LEADER) { + if (pMsg->success) { + SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); + if (pMsg->matchIndex > oldMatchIndex) { + syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); syncMaybeAdvanceCommitIndex(ths); } + syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1); } else { - // start snapshot - SSnapshot oldSnapshot; - ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &oldSnapshot); - if (oldSnapshot.lastApplyIndex > newMatchIndex) { - syncNodeStartSnapshotOnce(ths, newMatchIndex + 1, oldSnapshot.lastApplyIndex, oldSnapshot.lastApplyTerm, - pMsg); // term maybe not ok? + SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); + if (nextIndex > SYNC_INDEX_BEGIN) { + --nextIndex; } - - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), oldSnapshot.lastApplyIndex + 1); - syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), newMatchIndex); + syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); } - // event log, update next-index - do { - char host[64]; - int16_t port; - syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + // send next append entries + SPeerState* pState = syncNodeGetPeerState(ths, &(pMsg->srcId)); + ASSERT(pState != NULL); - char logBuf[256]; - snprintf(logBuf, sizeof(logBuf), "reset next-index:%" PRId64 ", match-index:%" PRId64 " for %s:%d", newNextIndex, - newMatchIndex, host, port); - syncNodeEventLog(ths, logBuf); - - } while (0); - - } else { - SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - - if (nextIndex > SYNC_INDEX_BEGIN) { - --nextIndex; - - // speed up - if (nextIndex > pMsg->matchIndex + 1) { - nextIndex = pMsg->matchIndex + 1; - } - - bool needStartSnapshot = false; - if (nextIndex >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex)) { - needStartSnapshot = true; - } - if (nextIndex - 1 >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex - 1)) { - needStartSnapshot = true; - } - - if (!needStartSnapshot) { - // do nothing - - } else { - SSnapshot oldSnapshot; - ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &oldSnapshot); - SyncTerm newSnapshotTerm = oldSnapshot.lastApplyTerm; - - SyncIndex endIndex; - if (ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex + 1)) { - endIndex = nextIndex; - } else { - endIndex = oldSnapshot.lastApplyIndex; - } - syncNodeStartSnapshotOnce(ths, pMsg->matchIndex + 1, endIndex, newSnapshotTerm, pMsg); - - // get sender - SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(pMsg->srcId)); - ASSERT(pSender != NULL); - SyncIndex sentryIndex = pSender->snapshot.lastApplyIndex + 1; - - // update nextIndex to sentryIndex - if (nextIndex <= sentryIndex) { - nextIndex = sentryIndex; - } - } - - } else { - nextIndex = SYNC_INDEX_BEGIN; + if (pMsg->lastSendIndex == pState->lastSendIndex) { + syncNodeReplicateOne(ths, &(pMsg->srcId)); } - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); - - SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - if (pMsg->matchIndex > oldMatchIndex) { - syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); - } - - // event log, update next-index - do { - char host[64]; - int16_t port; - syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); - - SyncIndex newNextIndex = nextIndex; - SyncIndex newMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - char logBuf[256]; - snprintf(logBuf, sizeof(logBuf), "reset2 next-index:%" PRId64 ", match-index:%" PRId64 " for %s:%d", newNextIndex, - newMatchIndex, host, port); - syncNodeEventLog(ths, logBuf); - - } while (0); } - SyncIndex afterNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - SyncIndex afterMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - do { - char logBuf[256]; - snprintf(logBuf, sizeof(logBuf), - "before next:%" PRId64 ", match:%" PRId64 ", after next:%" PRId64 ", match:%" PRId64, beforeNextIndex, - beforeMatchIndex, afterNextIndex, afterMatchIndex); - syncLogRecvAppendEntriesReply(ths, pMsg, logBuf); - } while (0); - - return 0; -} - -int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { - int32_t ret = 0; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - syncLogRecvAppendEntriesReply(ths, pMsg, "maybe replica already dropped"); - return -1; - } - - // drop stale response - if (pMsg->term < ths->pRaftStore->currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response"); - return 0; - } - - // no need this code, because if I receive reply.term, then I must have sent for that term. - // if (pMsg->term > ths->pRaftStore->currentTerm) { - // syncNodeUpdateTerm(ths, pMsg->term); - // } - - if (pMsg->term > ths->pRaftStore->currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "error term"); - return -1; - } - - ASSERT(pMsg->term == ths->pRaftStore->currentTerm); - - // update time - syncIndexMgrSetStartTime(ths->pNextIndex, &(pMsg->srcId), pMsg->startTime); - syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), taosGetTimestampMs()); - - SyncIndex beforeNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - SyncIndex beforeMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - - if (pMsg->success) { - // nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1] - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1); - - // matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex] - syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); - - // maybe commit - if (ths->state == TAOS_SYNC_STATE_LEADER) { - syncMaybeAdvanceCommitIndex(ths); - } - - } else { - SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - - // notice! int64, uint64 - if (nextIndex > SYNC_INDEX_BEGIN) { - --nextIndex; - - // get sender - SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(pMsg->srcId)); - ASSERT(pSender != NULL); - - SSnapshot snapshot = {.data = NULL, - .lastApplyIndex = SYNC_INDEX_INVALID, - .lastApplyTerm = 0, - .lastConfigIndex = SYNC_INDEX_INVALID}; - void* pReader = NULL; - ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot, NULL, &pReader); - if (snapshot.lastApplyIndex >= SYNC_INDEX_BEGIN && nextIndex <= snapshot.lastApplyIndex + 1 && - !snapshotSenderIsStart(pSender) && pMsg->privateTerm < pSender->privateTerm) { - // has snapshot - ASSERT(pReader != NULL); - SSnapshotParam readerParam = {.start = 0, .end = snapshot.lastApplyIndex}; - snapshotSenderStart(pSender, readerParam, snapshot, pReader); - - } else { - // no snapshot - if (pReader != NULL) { - ths->pFsm->FpSnapshotStopRead(ths->pFsm, pReader); - } - } - - SyncIndex sentryIndex = pSender->snapshot.lastApplyIndex + 1; - - // update nextIndex to sentryIndex - if (nextIndex <= sentryIndex) { - nextIndex = sentryIndex; - } - - } else { - nextIndex = SYNC_INDEX_BEGIN; - } - - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); - } - - SyncIndex afterNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - SyncIndex afterMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - do { - char logBuf[256]; - snprintf(logBuf, sizeof(logBuf), - "before next:%" PRId64 ", match:%" PRId64 ", after next:%" PRId64 ", match:%" PRId64, beforeNextIndex, - beforeMatchIndex, afterNextIndex, afterMatchIndex); - syncLogRecvAppendEntriesReply(ths, pMsg, logBuf); - } while (0); - + syncLogRecvAppendEntriesReply(ths, pMsg, "process"); return 0; } \ No newline at end of file diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 6239726823..811a7b8e99 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -45,8 +45,10 @@ // /\ UNCHANGED <> // void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { - syncIndexMgrLog2("==syncNodeMaybeAdvanceCommitIndex== pNextIndex", pSyncNode->pNextIndex); - syncIndexMgrLog2("==syncNodeMaybeAdvanceCommitIndex== pMatchIndex", pSyncNode->pMatchIndex); + if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { + syncNodeErrorLog(pSyncNode, "not leader, can not advance commit index"); + return; + } // advance commit index to sanpshot first SSnapshot snapshot; @@ -75,9 +77,11 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { if (h) { pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); } else { - pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, index); - if (pEntry == NULL) { - sError("failed to get entry since %s. index:%" PRId64, tstrerror(terrno), index); + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry); + if (code != 0) { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "advance commit index error, read wal index:%" PRId64, index); + syncNodeErrorLog(pSyncNode, logBuf); return; } } @@ -125,13 +129,17 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { pSyncNode->commitIndex = newCommitIndex; // call back Wal - pSyncNode->pLogStore->updateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex); + pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex); // execute fsm if (pSyncNode->pFsm != NULL) { - int32_t code = syncNodeCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state); + int32_t code = syncNodeDoCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state); if (code != 0) { - wError("failed to commit sync node since %s", tstrerror(terrno)); + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "advance commit index error, do commit begin:%" PRId64 ", end:%" PRId64, + beginIndex, endIndex); + syncNodeErrorLog(pSyncNode, logBuf); + return; } } } @@ -162,6 +170,8 @@ static inline int64_t syncNodeAbs64(int64_t a, int64_t b) { } int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { + return pSyncNode->quorum; + int32_t quorum = 1; // self int64_t timeNow = taosGetTimestampMs(); @@ -220,6 +230,7 @@ int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { return quorum; } +/* bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) { int agreeCount = 0; for (int i = 0; i < pSyncNode->replicaNum; ++i) { @@ -232,8 +243,8 @@ bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) { } return false; } +*/ -/* bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) { int agreeCount = 0; for (int i = 0; i < pSyncNode->replicaNum; ++i) { @@ -246,4 +257,3 @@ bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) { } return false; } -*/ diff --git a/source/libs/sync/src/syncElection.c b/source/libs/sync/src/syncElection.c index 3f13249ce6..b428f4d2f2 100644 --- a/source/libs/sync/src/syncElection.c +++ b/source/libs/sync/src/syncElection.c @@ -30,45 +30,6 @@ // msource |-> i, // mdest |-> j]) // /\ UNCHANGED <> -// -int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode) { - ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); - - int32_t ret = 0; - for (int i = 0; i < pSyncNode->peersNum; ++i) { - SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId); - pMsg->srcId = pSyncNode->myRaftId; - pMsg->destId = pSyncNode->peersId[i]; - pMsg->term = pSyncNode->pRaftStore->currentTerm; - pMsg->lastLogIndex = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore); - pMsg->lastLogTerm = pSyncNode->pLogStore->getLastTerm(pSyncNode->pLogStore); - - ret = syncNodeRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg); - ASSERT(ret == 0); - syncRequestVoteDestroy(pMsg); - } - return ret; -} - -int32_t syncNodeRequestVotePeersSnapshot(SSyncNode* pSyncNode) { - ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); - - int32_t ret = 0; - for (int i = 0; i < pSyncNode->peersNum; ++i) { - SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId); - pMsg->srcId = pSyncNode->myRaftId; - pMsg->destId = pSyncNode->peersId[i]; - pMsg->term = pSyncNode->pRaftStore->currentTerm; - - ret = syncNodeGetLastIndexTerm(pSyncNode, &(pMsg->lastLogIndex), &(pMsg->lastLogTerm)); - ASSERT(ret == 0); - - ret = syncNodeRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg); - ASSERT(ret == 0); - syncRequestVoteDestroy(pMsg); - } - return ret; -} int32_t syncNodeElect(SSyncNode* pSyncNode) { syncNodeEventLog(pSyncNode, "begin election"); @@ -111,27 +72,38 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) { } - switch (pSyncNode->pRaftCfg->snapshotStrategy) { - case SYNC_STRATEGY_NO_SNAPSHOT: - ret = syncNodeRequestVotePeers(pSyncNode); - break; - - case SYNC_STRATEGY_STANDARD_SNAPSHOT: - case SYNC_STRATEGY_WAL_FIRST: - ret = syncNodeRequestVotePeersSnapshot(pSyncNode); - break; - - default: - ret = syncNodeRequestVotePeers(pSyncNode); - break; - } + ret = syncNodeRequestVotePeers(pSyncNode); ASSERT(ret == 0); + syncNodeResetElectTimer(pSyncNode); return ret; } -int32_t syncNodeRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg) { +int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode) { + if (pSyncNode->state != TAOS_SYNC_STATE_CANDIDATE) { + syncNodeEventLog(pSyncNode, "not candidate, stop elect"); + return 0; + } + + int32_t ret = 0; + for (int i = 0; i < pSyncNode->peersNum; ++i) { + SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId); + pMsg->srcId = pSyncNode->myRaftId; + pMsg->destId = pSyncNode->peersId[i]; + pMsg->term = pSyncNode->pRaftStore->currentTerm; + + ret = syncNodeGetLastIndexTerm(pSyncNode, &(pMsg->lastLogIndex), &(pMsg->lastLogTerm)); + ASSERT(ret == 0); + + ret = syncNodeSendRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg); + ASSERT(ret == 0); + syncRequestVoteDestroy(pMsg); + } + return ret; +} + +int32_t syncNodeSendRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg) { int32_t ret = 0; syncLogSendRequestVote(pSyncNode, pMsg, ""); diff --git a/source/libs/sync/src/syncIO.c b/source/libs/sync/src/syncIO.c index e9899a3e33..afa2d43e13 100644 --- a/source/libs/sync/src/syncIO.c +++ b/source/libs/sync/src/syncIO.c @@ -326,18 +326,18 @@ static void *syncIOConsumerFunc(void *param) { } } else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { - if (io->FpOnSyncSnapshotSend != NULL) { + if (io->FpOnSyncSnapshot != NULL) { SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pRpcMsg); ASSERT(pSyncMsg != NULL); - io->FpOnSyncSnapshotSend(io->pSyncNode, pSyncMsg); + io->FpOnSyncSnapshot(io->pSyncNode, pSyncMsg); syncSnapshotSendDestroy(pSyncMsg); } } else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { - if (io->FpOnSyncSnapshotRsp != NULL) { + if (io->FpOnSyncSnapshotReply != NULL) { SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pRpcMsg); ASSERT(pSyncMsg != NULL); - io->FpOnSyncSnapshotRsp(io->pSyncNode, pSyncMsg); + io->FpOnSyncSnapshotReply(io->pSyncNode, pSyncMsg); syncSnapshotRspDestroy(pSyncMsg); } diff --git a/source/libs/sync/src/syncIndexMgr.c b/source/libs/sync/src/syncIndexMgr.c index fcbf4a9032..8e78aeedc3 100644 --- a/source/libs/sync/src/syncIndexMgr.c +++ b/source/libs/sync/src/syncIndexMgr.c @@ -83,6 +83,10 @@ void syncIndexMgrSetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, } SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId) { + if (pSyncIndexMgr == NULL) { + return SYNC_INDEX_INVALID; + } + for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { if (syncUtilSameId(&((*(pSyncIndexMgr->replicas))[i]), pRaftId)) { SyncIndex idx = (pSyncIndexMgr->index)[i]; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 2a0c5d9d7c..00c4ea76aa 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -46,6 +46,7 @@ static void syncNodeEqElectTimer(void* param, void* tmrId); static void syncNodeEqHeartbeatTimer(void* param, void* tmrId); static int32_t syncNodeEqNoop(SSyncNode* ths); static int32_t syncNodeAppendNoop(SSyncNode* ths); +static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId); // process message ---- int32_t syncNodeOnPingCb(SSyncNode* ths, SyncPing* pMsg); @@ -67,7 +68,7 @@ int32_t syncInit() { syncCleanUp(); ret = -1; } else { - sDebug("sync rsetId:%" PRId32 " is open", tsNodeRefId); + sDebug("sync rsetId:%d is open", tsNodeRefId); ret = syncEnvStart(); } } @@ -80,7 +81,7 @@ void syncCleanUp() { ASSERT(ret == 0); if (tsNodeRefId != -1) { - sDebug("sync rsetId:%" PRId32 " is closed", tsNodeRefId); + sDebug("sync rsetId:%d is closed", tsNodeRefId); taosCloseRef(tsNodeRefId); tsNodeRefId = -1; } @@ -89,7 +90,7 @@ void syncCleanUp() { int64_t syncOpen(SSyncInfo* pSyncInfo) { SSyncNode* pSyncNode = syncNodeOpen(pSyncInfo); if (pSyncNode == NULL) { - sError("vgId:%d, failed to open sync node since %s", pSyncInfo->vgId, terrstr()); + sError("failed to open sync node. vgId:%d", pSyncInfo->vgId); return -1; } @@ -100,7 +101,7 @@ int64_t syncOpen(SSyncInfo* pSyncInfo) { return -1; } - sDebug("vgId:%d, sync rid:%" PRId64 " is added to rsetId:%" PRId32, pSyncInfo->vgId, pSyncNode->rid, tsNodeRefId); + sDebug("vgId:%d, sync rid:%" PRId64 " is added to rsetId:%d", pSyncInfo->vgId, pSyncNode->rid, tsNodeRefId); return pSyncNode->rid; } @@ -146,7 +147,7 @@ void syncStop(int64_t rid) { taosReleaseRef(tsNodeRefId, pSyncNode->rid); taosRemoveRef(tsNodeRefId, rid); - sDebug("vgId:%d, sync rid:%" PRId64 " is removed from rsetId:%" PRId64, vgId, rid, (int64_t)tsNodeRefId); + sDebug("vgId:%d, sync rid:%" PRId64 " is removed from rsetId:%d", vgId, rid, tsNodeRefId); } int32_t syncSetStandby(int64_t rid) { @@ -238,7 +239,6 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) { } ASSERT(rid == pSyncNode->rid); -#if 0 if (!syncNodeCheckNewConfig(pSyncNode, pNewCfg)) { taosReleaseRef(tsNodeRefId, pSyncNode->rid); terrno = TSDB_CODE_SYN_NEW_CONFIG_ERROR; @@ -246,6 +246,7 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) { return -1; } +#if 0 char* newconfig = syncCfg2Str((SSyncCfg*)pNewCfg); int32_t ret = 0; @@ -263,6 +264,17 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) { #else syncNodeUpdateNewConfigIndex(pSyncNode, pNewCfg); syncNodeDoConfigChange(pSyncNode, pNewCfg, SYNC_INDEX_INVALID); + if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { + syncNodeStopHeartbeatTimer(pSyncNode); + + for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { + syncHbTimerInit(pSyncNode, &(pSyncNode->peerHeartbeatTimerArr[i]), (pSyncNode->replicasId)[i]); + } + + syncNodeStartHeartbeatTimer(pSyncNode); + + syncNodeReplicate(pSyncNode); + } taosReleaseRef(tsNodeRefId, pSyncNode->rid); return 0; #endif @@ -294,6 +306,225 @@ int32_t syncLeaderTransferTo(int64_t rid, SNodeInfo newLeader) { return ret; } +SyncIndex syncMinMatchIndex(SSyncNode* pSyncNode) { + SyncIndex minMatchIndex = SYNC_INDEX_INVALID; + + if (pSyncNode->peersNum > 0) { + minMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[0])); + } + + for (int32_t i = 1; i < pSyncNode->peersNum; ++i) { + SyncIndex matchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[i])); + if (matchIndex < minMatchIndex) { + minMatchIndex = matchIndex; + } + } + return minMatchIndex; +} + +char* syncNodePeerState2Str(const SSyncNode* pSyncNode) { + int32_t len = 128; + int32_t useLen = 0; + int32_t leftLen = len - useLen; + char* pStr = taosMemoryMalloc(len); + memset(pStr, 0, len); + + char* p = pStr; + int32_t use = snprintf(p, leftLen, "{"); + useLen += use; + leftLen -= use; + + for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) { + SPeerState* pState = syncNodeGetPeerState((SSyncNode*)pSyncNode, &(pSyncNode->replicasId[i])); + ASSERT(pState != NULL); + + p = pStr + useLen; + use = snprintf(p, leftLen, "%d:%" PRId64 " ,%" PRId64, i, pState->lastSendIndex, pState->lastSendTime); + useLen += use; + leftLen -= use; + } + + p = pStr + useLen; + use = snprintf(p, leftLen, "}"); + useLen += use; + leftLen -= use; + + // sTrace("vgId:%d, ------------------ syncNodePeerState2Str:%s", pSyncNode->vgId, pStr); + + return pStr; +} + +int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) { + SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); + if (pSyncNode == NULL) { + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + return -1; + } + ASSERT(rid == pSyncNode->rid); + int32_t code = 0; + + if (syncNodeIsMnode(pSyncNode)) { + // mnode + int64_t logRetention = SYNC_MNODE_LOG_RETENTION; + + SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore); + SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore); + int64_t logNum = endIndex - beginIndex; + bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore); + + if (isEmpty || (!isEmpty && logNum < logRetention)) { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), + "new-snapshot-index:%" PRId64 ", log-num:%" PRId64 ", empty:%d, do not delete wal", lastApplyIndex, + logNum, isEmpty); + syncNodeEventLog(pSyncNode, logBuf); + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return 0; + } + + goto _DEL_WAL; + + } else { + // vnode + if (pSyncNode->replicaNum > 1) { + // multi replicas + + if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { + pSyncNode->minMatchIndex = syncMinMatchIndex(pSyncNode); + + for (int32_t i = 0; i < pSyncNode->peersNum; ++i) { + int64_t matchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[i])); + if (lastApplyIndex > matchIndex) { + do { + char host[64]; + uint16_t port; + syncUtilU642Addr(pSyncNode->peersId[i].addr, host, sizeof(host), &port); + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), + "new-snapshot-index:%" PRId64 " is greater than match-index:%" PRId64 + " of %s:%d, do not delete wal", + lastApplyIndex, matchIndex, host, port); + syncNodeEventLog(pSyncNode, logBuf); + } while (0); + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return 0; + } + } + + } else if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { + if (lastApplyIndex > pSyncNode->minMatchIndex) { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), + "new-snapshot-index:%" PRId64 " is greater than min-match-index:%" PRId64 ", do not delete wal", + lastApplyIndex, pSyncNode->minMatchIndex); + syncNodeEventLog(pSyncNode, logBuf); + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return 0; + } + + } else if (pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE) { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), "new-snapshot-index:%" PRId64 " candidate, do not delete wal", lastApplyIndex); + syncNodeEventLog(pSyncNode, logBuf); + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return 0; + + } else { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), "new-snapshot-index:%" PRId64 " unknown state, do not delete wal", + lastApplyIndex); + syncNodeEventLog(pSyncNode, logBuf); + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return 0; + } + + goto _DEL_WAL; + + } else { + // one replica + + goto _DEL_WAL; + } + } + +_DEL_WAL: + + do { + SyncIndex snapshottingIndex = atomic_load_64(&pSyncNode->snapshottingIndex); + + if (snapshottingIndex == SYNC_INDEX_INVALID) { + atomic_store_64(&pSyncNode->snapshottingIndex, lastApplyIndex); + pSyncNode->snapshottingTime = taosGetTimestampMs(); + + SSyncLogStoreData* pData = pSyncNode->pLogStore->data; + code = walBeginSnapshot(pData->pWal, lastApplyIndex); + if (code == 0) { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), "wal snapshot begin, index:%" PRId64 ", last apply index:%" PRId64, + pSyncNode->snapshottingIndex, lastApplyIndex); + syncNodeEventLog(pSyncNode, logBuf); + + } else { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), + "wal snapshot begin error since:%s, index:%" PRId64 ", last apply index:%" PRId64, terrstr(terrno), + pSyncNode->snapshottingIndex, lastApplyIndex); + syncNodeErrorLog(pSyncNode, logBuf); + + atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID); + } + + } else { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), + "snapshotting for %" PRId64 ", do not delete wal for new-snapshot-index:%" PRId64, snapshottingIndex, + lastApplyIndex); + syncNodeEventLog(pSyncNode, logBuf); + } + } while (0); + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return code; +} + +int32_t syncEndSnapshot(int64_t rid) { + SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); + if (pSyncNode == NULL) { + terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + return -1; + } + ASSERT(rid == pSyncNode->rid); + + int32_t code = 0; + if (atomic_load_64(&pSyncNode->snapshottingIndex) != SYNC_INDEX_INVALID) { + SSyncLogStoreData* pData = pSyncNode->pLogStore->data; + code = walEndSnapshot(pData->pWal); + if (code != 0) { + sError("vgId:%d, wal snapshot end error since:%s", pSyncNode->vgId, terrstr(terrno)); + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return -1; + } else { + do { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), "wal snapshot end, index:%" PRId64, + atomic_load_64(&pSyncNode->snapshottingIndex)); + syncNodeEventLog(pSyncNode, logBuf); + } while (0); + + atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID); + } + } + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return code; +} + int32_t syncNodeLeaderTransfer(SSyncNode* pSyncNode) { if (pSyncNode->peersNum == 0) { sDebug("only one replica, cannot leader transfer"); @@ -316,7 +547,7 @@ int32_t syncNodeLeaderTransferTo(SSyncNode* pSyncNode, SNodeInfo newLeader) { } do { - char logBuf[256]; + char logBuf[128]; snprintf(logBuf, sizeof(logBuf), "begin leader transfer to %s:%u", newLeader.nodeFqdn, newLeader.nodePort); syncNodeEventLog(pSyncNode, logBuf); } while (0); @@ -716,24 +947,6 @@ int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak) { return ret; } -int32_t syncProposeBatch(int64_t rid, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize) { - if (arrSize < 0) { - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - - SSyncNode* pSyncNode = taosAcquireRef(tsNodeRefId, rid); - if (pSyncNode == NULL) { - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - ASSERT(rid == pSyncNode->rid); - - int32_t ret = syncNodeProposeBatch(pSyncNode, pMsgPArr, pIsWeakArr, arrSize); - taosReleaseRef(tsNodeRefId, pSyncNode->rid); - return ret; -} - static bool syncNodeBatchOK(SRpcMsg** pMsgPArr, int32_t arrSize) { for (int32_t i = 0; i < arrSize; ++i) { if (pMsgPArr[i]->msgType == TDMT_SYNC_CONFIG_CHANGE) { @@ -748,91 +961,6 @@ static bool syncNodeBatchOK(SRpcMsg** pMsgPArr, int32_t arrSize) { return true; } -int32_t syncNodeProposeBatch(SSyncNode* pSyncNode, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize) { - if (!syncNodeBatchOK(pMsgPArr, arrSize)) { - syncNodeErrorLog(pSyncNode, "sync propose batch error"); - terrno = TSDB_CODE_SYN_BATCH_ERROR; - return -1; - } - - if (arrSize > SYNC_MAX_BATCH_SIZE) { - syncNodeErrorLog(pSyncNode, "sync propose batch error"); - terrno = TSDB_CODE_SYN_BATCH_ERROR; - return -1; - } - - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - syncNodeErrorLog(pSyncNode, "sync propose not leader"); - terrno = TSDB_CODE_SYN_NOT_LEADER; - return -1; - } - - if (pSyncNode->changing) { - syncNodeErrorLog(pSyncNode, "sync propose not ready"); - terrno = TSDB_CODE_SYN_PROPOSE_NOT_READY; - return -1; - } - - SRaftMeta raftArr[SYNC_MAX_BATCH_SIZE]; - for (int i = 0; i < arrSize; ++i) { - do { - char eventLog[128]; - snprintf(eventLog, sizeof(eventLog), "propose message, type:%s batch:%d", TMSG_INFO(pMsgPArr[i]->msgType), - arrSize); - syncNodeEventLog(pSyncNode, eventLog); - } while (0); - - SRespStub stub; - stub.createTime = taosGetTimestampMs(); - stub.rpcMsg = *(pMsgPArr[i]); - uint64_t seqNum = syncRespMgrAdd(pSyncNode->pSyncRespMgr, &stub); - - raftArr[i].isWeak = pIsWeakArr[i]; - raftArr[i].seqNum = seqNum; - } - - SyncClientRequestBatch* pSyncMsg = syncClientRequestBatchBuild(pMsgPArr, raftArr, arrSize, pSyncNode->vgId); - ASSERT(pSyncMsg != NULL); - - SRpcMsg rpcMsg; - syncClientRequestBatch2RpcMsg(pSyncMsg, &rpcMsg); - taosMemoryFree(pSyncMsg); // only free msg body, do not free rpc msg content - - if (pSyncNode->replicaNum == 1 && pSyncNode->vgId != 1) { - int32_t code = syncNodeOnClientRequestBatchCb(pSyncNode, pSyncMsg); - if (code == 0) { - // update rpc msg applyIndex - SRpcMsg* msgArr = syncClientRequestBatchRpcMsgArr(pSyncMsg); - ASSERT(arrSize == pSyncMsg->dataCount); - for (int i = 0; i < arrSize; ++i) { - pMsgPArr[i]->info.conn.applyIndex = msgArr[i].info.conn.applyIndex; - syncRespMgrDel(pSyncNode->pSyncRespMgr, raftArr[i].seqNum); - } - - rpcFreeCont(rpcMsg.pCont); - terrno = 0; - return 1; - - } else { - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - - } else { - if (pSyncNode->FpEqMsg != NULL && (*pSyncNode->FpEqMsg)(pSyncNode->msgcb, &rpcMsg) == 0) { - // enqueue msg ok - return 0; - - } else { - sError("vgId:%d, enqueue msg error, FpEqMsg is NULL", pSyncNode->vgId); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - } - - return 0; -} - int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak) { int32_t ret = 0; @@ -867,7 +995,7 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak) { if (!pSyncNode->restoreFinish && pSyncNode->vgId != 1) { ret = -1; terrno = TSDB_CODE_SYN_PROPOSE_NOT_READY; - sError("vgId:%d, failed to sync propose since not ready, type:%s, last:%" PRId64 ", cmt:%" PRId64 "", + sError("vgId:%d, failed to sync propose since not ready, type:%s, last:%" PRId64 ", cmt:%" PRId64, pSyncNode->vgId, TMSG_INFO(pMsg->msgType), syncNodeGetLastIndex(pSyncNode), pSyncNode->commitIndex); goto _END; } @@ -884,7 +1012,7 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak) { // optimized one replica if (syncNodeIsOptimizedOneReplica(pSyncNode, pMsg)) { SyncIndex retIndex; - int32_t code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, &retIndex); + int32_t code = syncNodeOnClientRequest(pSyncNode, pSyncMsg, &retIndex); if (code == 0) { pMsg->info.conn.applyIndex = retIndex; pMsg->info.conn.applyTerm = pSyncNode->pRaftStore->currentTerm; @@ -925,8 +1053,46 @@ _END: return ret; } +int32_t syncHbTimerInit(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer, SRaftId destId) { + pSyncTimer->pTimer = NULL; + pSyncTimer->counter = 0; + pSyncTimer->timerMS = pSyncNode->hbBaseLine; + pSyncTimer->timerCb = syncNodeEqPeerHeartbeatTimer; + pSyncTimer->destId = destId; + atomic_store_64(&pSyncTimer->logicClock, 0); + return 0; +} + +int32_t syncHbTimerStart(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer) { + int32_t ret = 0; + if (syncEnvIsStart()) { + SSyncHbTimerData* pData = taosMemoryMalloc(sizeof(SSyncHbTimerData)); + pData->pSyncNode = pSyncNode; + pData->pTimer = pSyncTimer; + pData->destId = pSyncTimer->destId; + pData->logicClock = pSyncTimer->logicClock; + + pSyncTimer->pData = pData; + taosTmrReset(pSyncTimer->timerCb, pSyncTimer->timerMS, pData, gSyncEnv->pTimerManager, &pSyncTimer->pTimer); + } else { + sError("vgId:%d, start ctrl hb timer error, sync env is stop", pSyncNode->vgId); + } + return ret; +} + +int32_t syncHbTimerStop(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer) { + int32_t ret = 0; + atomic_add_fetch_64(&pSyncTimer->logicClock, 1); + taosTmrStop(pSyncTimer->pTimer); + pSyncTimer->pTimer = NULL; + // taosMemoryFree(pSyncTimer->pData); + return ret; +} + // open/close -------------- -SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { +SSyncNode* syncNodeOpen(SSyncInfo* pOldSyncInfo) { + SSyncInfo* pSyncInfo = (SSyncInfo*)pOldSyncInfo; + SSyncNode* pSyncNode = (SSyncNode*)taosMemoryCalloc(1, sizeof(SSyncNode)); if (pSyncNode == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -955,6 +1121,9 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { sError("failed to create raft cfg file. configPath: %s", pSyncNode->configPath); goto _error; } + if (pSyncInfo->syncCfg.replicaNum == 0) { + pSyncInfo->syncCfg = pSyncNode->pRaftCfg->cfg; + } } else { // update syncCfg by raft_config.json pSyncNode->pRaftCfg = raftCfgOpen(pSyncNode->configPath); @@ -962,9 +1131,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { sError("failed to open raft cfg file. path:%s", pSyncNode->configPath); goto _error; } - if (pSyncInfo->syncCfg.replicaNum == 0) { - pSyncInfo->syncCfg = pSyncNode->pRaftCfg->cfg; - } + pSyncInfo->syncCfg = pSyncNode->pRaftCfg->cfg; raftCfgClose(pSyncNode->pRaftCfg); pSyncNode->pRaftCfg = NULL; @@ -981,6 +1148,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->msgcb = pSyncInfo->msgcb; pSyncNode->FpSendMsg = pSyncInfo->FpSendMsg; pSyncNode->FpEqMsg = pSyncInfo->FpEqMsg; + pSyncNode->FpEqCtrlMsg = pSyncInfo->FpEqCtrlMsg; // init raft config pSyncNode->pRaftCfg = raftCfgOpen(pSyncNode->configPath); @@ -1136,29 +1304,22 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->FpHeartbeatTimerCB = syncNodeEqHeartbeatTimer; pSyncNode->heartbeatTimerCounter = 0; + // init peer heartbeat timer + for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) { + syncHbTimerInit(pSyncNode, &(pSyncNode->peerHeartbeatTimerArr[i]), (pSyncNode->replicasId)[i]); + } + // init callback pSyncNode->FpOnPing = syncNodeOnPingCb; pSyncNode->FpOnPingReply = syncNodeOnPingReplyCb; - pSyncNode->FpOnClientRequest = syncNodeOnClientRequestCb; - pSyncNode->FpOnTimeout = syncNodeOnTimeoutCb; - - pSyncNode->FpOnSnapshotSend = syncNodeOnSnapshotSendCb; - pSyncNode->FpOnSnapshotRsp = syncNodeOnSnapshotRspCb; - - if (pSyncNode->pRaftCfg->snapshotStrategy) { - sInfo("vgId:%d, sync node use snapshot", pSyncNode->vgId); - pSyncNode->FpOnRequestVote = syncNodeOnRequestVoteSnapshotCb; - pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReplySnapshotCb; - pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntriesSnapshotCb; - pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReplySnapshotCb; - - } else { - sInfo("vgId:%d, sync node do not use snapshot", pSyncNode->vgId); - pSyncNode->FpOnRequestVote = syncNodeOnRequestVoteCb; - pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReplyCb; - pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntriesCb; - pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReplyCb; - } + pSyncNode->FpOnClientRequest = syncNodeOnClientRequest; + pSyncNode->FpOnTimeout = syncNodeOnTimer; + pSyncNode->FpOnSnapshot = syncNodeOnSnapshot; + pSyncNode->FpOnSnapshotReply = syncNodeOnSnapshotReply; + pSyncNode->FpOnRequestVote = syncNodeOnRequestVote; + pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReply; + pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntries; + pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReply; // tools pSyncNode->pSyncRespMgr = syncRespMgrCreate(pSyncNode, SYNC_RESP_TTL_MS); @@ -1183,6 +1344,12 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { // is config changing pSyncNode->changing = false; + // peer state + syncNodePeerStateInit(pSyncNode); + + // min match index + pSyncNode->minMatchIndex = SYNC_INDEX_INVALID; + // start in syncNodeStart // start raft // syncNodeBecomeFollower(pSyncNode); @@ -1192,6 +1359,9 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->leaderTime = timeNow; pSyncNode->lastReplicateTime = timeNow; + // snapshotting + atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID); + syncNodeEventLog(pSyncNode, "sync open"); return pSyncNode; @@ -1226,15 +1396,14 @@ void syncNodeStart(SSyncNode* pSyncNode) { // Raft 3.6.2 Committing entries from previous terms syncNodeAppendNoop(pSyncNode); syncMaybeAdvanceCommitIndex(pSyncNode); + } else { syncNodeBecomeFollower(pSyncNode, "first start"); } - if (pSyncNode->vgId == 1) { - int32_t ret = 0; - ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); - } + int32_t ret = 0; + ret = syncNodeStartPingTimer(pSyncNode); + ASSERT(ret == 0); } void syncNodeStartStandBy(SSyncNode* pSyncNode) { @@ -1247,11 +1416,9 @@ void syncNodeStartStandBy(SSyncNode* pSyncNode) { int32_t ret = syncNodeRestartElectTimer(pSyncNode, electMS); ASSERT(ret == 0); - if (pSyncNode->vgId == 1) { - int32_t ret = 0; - ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); - } + ret = 0; + ret = syncNodeStartPingTimer(pSyncNode); + ASSERT(ret == 0); } void syncNodeClose(SSyncNode* pSyncNode) { @@ -1383,11 +1550,13 @@ int32_t syncNodeStartElectTimer(SSyncNode* pSyncNode, int32_t ms) { &pSyncNode->pElectTimer); atomic_store_64(&pSyncNode->electTimerLogicClock, pSyncNode->electTimerLogicClockUser); - do { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "elect timer reset, ms:%d", ms); - syncNodeEventLog(pSyncNode, logBuf); - } while (0); + /* + do { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "elect timer reset, ms:%d", ms); + syncNodeEventLog(pSyncNode, logBuf); + } while (0); + */ } else { sError("vgId:%d, start elect timer error, sync env is stop", pSyncNode->vgId); @@ -1401,7 +1570,7 @@ int32_t syncNodeStopElectTimer(SSyncNode* pSyncNode) { taosTmrStop(pSyncNode->pElectTimer); pSyncNode->pElectTimer = NULL; - sTrace("vgId:%d, sync %s stop elect timer", pSyncNode->vgId, syncUtilState2String(pSyncNode->state)); + // sTrace("vgId:%d, sync %s stop elect timer", pSyncNode->vgId, syncUtilState2String(pSyncNode->state)); return ret; } @@ -1453,30 +1622,34 @@ static int32_t syncNodeDoStartHeartbeatTimer(SSyncNode* pSyncNode) { } int32_t syncNodeStartHeartbeatTimer(SSyncNode* pSyncNode) { + int32_t ret = 0; + +#if 0 pSyncNode->heartbeatTimerMS = pSyncNode->hbBaseLine; - int32_t ret = syncNodeDoStartHeartbeatTimer(pSyncNode); - return ret; -} + ret = syncNodeDoStartHeartbeatTimer(pSyncNode); +#endif -int32_t syncNodeStartHeartbeatTimerMS(SSyncNode* pSyncNode, int32_t ms) { - pSyncNode->heartbeatTimerMS = ms; - int32_t ret = syncNodeDoStartHeartbeatTimer(pSyncNode); - return ret; -} + for (int i = 0; i < pSyncNode->peersNum; ++i) { + SSyncTimer* pSyncTimer = syncNodeGetHbTimer(pSyncNode, &(pSyncNode->peersId[i])); + syncHbTimerStart(pSyncNode, pSyncTimer); + } -int32_t syncNodeStartHeartbeatTimerNow(SSyncNode* pSyncNode) { - pSyncNode->heartbeatTimerMS = 1; - int32_t ret = syncNodeDoStartHeartbeatTimer(pSyncNode); return ret; } int32_t syncNodeStopHeartbeatTimer(SSyncNode* pSyncNode) { int32_t ret = 0; + +#if 0 atomic_add_fetch_64(&pSyncNode->heartbeatTimerLogicClockUser, 1); taosTmrStop(pSyncNode->pHeartbeatTimer); pSyncNode->pHeartbeatTimer = NULL; +#endif - sTrace("vgId:%d, sync %s stop heartbeat timer", pSyncNode->vgId, syncUtilState2String(pSyncNode->state)); + for (int i = 0; i < pSyncNode->peersNum; ++i) { + SSyncTimer* pSyncTimer = syncNodeGetHbTimer(pSyncNode, &(pSyncNode->peersId[i])); + syncHbTimerStop(pSyncNode, pSyncTimer); + } return ret; } @@ -1487,18 +1660,6 @@ int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode) { return 0; } -int32_t syncNodeRestartHeartbeatTimerNow(SSyncNode* pSyncNode) { - syncNodeStopHeartbeatTimer(pSyncNode); - syncNodeStartHeartbeatTimerNow(pSyncNode); - return 0; -} - -int32_t syncNodeRestartNowHeartbeatTimerMS(SSyncNode* pSyncNode, int32_t ms) { - syncNodeStopHeartbeatTimer(pSyncNode); - syncNodeStartHeartbeatTimerMS(pSyncNode, ms); - return 0; -} - // utils -------------- int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pSyncNode, SRpcMsg* pMsg) { SEpSet epSet; @@ -1702,8 +1863,6 @@ char* syncNode2Str(const SSyncNode* pSyncNode) { } inline void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) { - int32_t userStrLen = strlen(str); - SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpGetSnapshotInfo != NULL) { pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); @@ -1722,22 +1881,25 @@ inline void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) { printStr = pCfgStr; } + char* peerStateStr = syncNodePeerState2Str(pSyncNode); + int32_t userStrLen = strlen(str) + strlen(peerStateStr); + if (userStrLen < 256) { char logBuf[256 + 256]; if (pSyncNode != NULL && pSyncNode->pRaftCfg != NULL && pSyncNode->pRaftStore != NULL) { snprintf(logBuf, sizeof(logBuf), - "vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", snap:%" PRId64 - ", snap-tm:%" PRIu64 + "vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64 + ", snap:%" PRId64 ", snap-tm:%" PRIu64 ", sby:%d, " "stgy:%d, bch:%d, " "r-num:%d, " - "lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s", + "lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s, %s", pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm, - pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, - pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, pSyncNode->pRaftCfg->batchSize, - pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing, - pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode), pSyncNode->electTimerLogicClockUser, - pSyncNode->heartbeatTimerLogicClockUser, printStr); + pSyncNode->commitIndex, logBeginIndex, logLastIndex, pSyncNode->minMatchIndex, snapshot.lastApplyIndex, + snapshot.lastApplyTerm, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, + pSyncNode->pRaftCfg->batchSize, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, + pSyncNode->changing, pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode), + pSyncNode->electTimerLogicClockUser, pSyncNode->heartbeatTimerLogicClockUser, peerStateStr, printStr); } else { snprintf(logBuf, sizeof(logBuf), "%s", str); } @@ -1750,18 +1912,18 @@ inline void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) { char* s = (char*)taosMemoryMalloc(len); if (pSyncNode != NULL && pSyncNode->pRaftCfg != NULL && pSyncNode->pRaftStore != NULL) { snprintf(s, len, - "vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", snap:%" PRId64 - ", snap-tm:%" PRIu64 + "vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64 + ", snap:%" PRId64 ", snap-tm:%" PRIu64 ", sby:%d, " "stgy:%d, bch:%d, " "r-num:%d, " - "lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s", + "lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s, %s", pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm, - pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, - pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, pSyncNode->pRaftCfg->batchSize, - pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing, - pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode), pSyncNode->electTimerLogicClockUser, - pSyncNode->heartbeatTimerLogicClockUser, printStr); + pSyncNode->commitIndex, logBeginIndex, logLastIndex, pSyncNode->minMatchIndex, snapshot.lastApplyIndex, + snapshot.lastApplyTerm, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, + pSyncNode->pRaftCfg->batchSize, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, + pSyncNode->changing, pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode), + pSyncNode->electTimerLogicClockUser, pSyncNode->heartbeatTimerLogicClockUser, peerStateStr, printStr); } else { snprintf(s, len, "%s", str); } @@ -1771,6 +1933,7 @@ inline void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) { taosMemoryFree(s); } + taosMemoryFree(peerStateStr); taosMemoryFree(pCfgStr); } @@ -1799,17 +1962,18 @@ inline void syncNodeErrorLog(const SSyncNode* pSyncNode, char* str) { char logBuf[256 + 256]; if (pSyncNode != NULL && pSyncNode->pRaftCfg != NULL && pSyncNode->pRaftStore != NULL) { snprintf(logBuf, sizeof(logBuf), - "vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", snap:%" PRId64 - ", snap-tm:%" PRIu64 + "vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64 + ", snap:%" PRId64 ", snap-tm:%" PRIu64 ", sby:%d, " "stgy:%d, bch:%d, " "r-num:%d, " - "lcfg:%" PRId64 ", chging:%d, rsto:%d, %s", + "lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s", pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm, - pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, - pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, pSyncNode->pRaftCfg->batchSize, - pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing, - pSyncNode->restoreFinish, printStr); + pSyncNode->commitIndex, logBeginIndex, logLastIndex, pSyncNode->minMatchIndex, snapshot.lastApplyIndex, + snapshot.lastApplyTerm, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, + pSyncNode->pRaftCfg->batchSize, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, + pSyncNode->changing, pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode), + pSyncNode->electTimerLogicClockUser, pSyncNode->heartbeatTimerLogicClockUser, printStr); } else { snprintf(logBuf, sizeof(logBuf), "%s", str); } @@ -1820,17 +1984,18 @@ inline void syncNodeErrorLog(const SSyncNode* pSyncNode, char* str) { char* s = (char*)taosMemoryMalloc(len); if (pSyncNode != NULL && pSyncNode->pRaftCfg != NULL && pSyncNode->pRaftStore != NULL) { snprintf(s, len, - "vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", snap:%" PRId64 - ", snap-tm:%" PRIu64 + "vgId:%d, sync %s %s, tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64 + ", snap:%" PRId64 ", snap-tm:%" PRIu64 ", sby:%d, " "stgy:%d, bch:%d, " "r-num:%d, " - "lcfg:%" PRId64 ", chging:%d, rsto:%d, %s", + "lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s", pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm, - pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, - pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, pSyncNode->pRaftCfg->batchSize, - pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing, - pSyncNode->restoreFinish, printStr); + pSyncNode->commitIndex, logBeginIndex, logLastIndex, pSyncNode->minMatchIndex, snapshot.lastApplyIndex, + snapshot.lastApplyTerm, pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, + pSyncNode->pRaftCfg->batchSize, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, + pSyncNode->changing, pSyncNode->restoreFinish, syncNodeDynamicQuorum(pSyncNode), + pSyncNode->electTimerLogicClockUser, pSyncNode->heartbeatTimerLogicClockUser, printStr); } else { snprintf(s, len, "%s", str); } @@ -1906,11 +2071,12 @@ static bool syncIsConfigChanged(const SSyncCfg* pOldCfg, const SSyncCfg* pNewCfg void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex lastConfigChangeIndex) { SSyncCfg oldConfig = pSyncNode->pRaftCfg->cfg; +#if 1 if (!syncIsConfigChanged(&oldConfig, pNewConfig)) { sInfo("vgId:1, sync not reconfig since not changed"); return; } - +#endif pSyncNode->pRaftCfg->cfg = *pNewConfig; pSyncNode->pRaftCfg->lastConfigIndex = lastConfigChangeIndex; @@ -1993,13 +2159,14 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde syncUtilnodeInfo2raftId(&pSyncNode->pRaftCfg->cfg.nodeInfo[i], pSyncNode->vgId, &pSyncNode->replicasId[i]); } + // update quorum first + pSyncNode->quorum = syncUtilQuorum(pSyncNode->pRaftCfg->cfg.replicaNum); + syncIndexMgrUpdate(pSyncNode->pNextIndex, pSyncNode); syncIndexMgrUpdate(pSyncNode->pMatchIndex, pSyncNode); voteGrantedUpdate(pSyncNode->pVotesGranted, pSyncNode); votesRespondUpdate(pSyncNode->pVotesRespond, pSyncNode); - pSyncNode->quorum = syncUtilQuorum(pSyncNode->pRaftCfg->cfg.replicaNum); - // reset snapshot senders // clear new @@ -2148,6 +2315,30 @@ void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term) { } } +void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm) { + ASSERT(pSyncNode->pRaftStore->currentTerm <= newTerm); + + do { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "step down, new-term:%" PRIu64 ", current-term:%" PRIu64, newTerm, + pSyncNode->pRaftStore->currentTerm); + syncNodeEventLog(pSyncNode, logBuf); + } while (0); + + if (pSyncNode->pRaftStore->currentTerm < newTerm) { + raftStoreSetTerm(pSyncNode->pRaftStore, newTerm); + char tmpBuf[64]; + snprintf(tmpBuf, sizeof(tmpBuf), "step down, update term to %" PRIu64, newTerm); + syncNodeBecomeFollower(pSyncNode, tmpBuf); + raftStoreClearVote(pSyncNode->pRaftStore); + + } else { + if (pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER) { + syncNodeBecomeFollower(pSyncNode, "step down"); + } + } +} + void syncNodeLeaderChangeRsp(SSyncNode* pSyncNode) { syncRespCleanRsp(pSyncNode->pSyncRespMgr); } void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) { @@ -2171,6 +2362,9 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) { pSyncNode->pFsm->FpBecomeFollowerCb(pSyncNode->pFsm); } + // min match index + pSyncNode->minMatchIndex = SYNC_INDEX_INVALID; + // trace log do { int32_t debugStrLen = strlen(debugStr); @@ -2237,6 +2431,9 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { pSyncNode->pMatchIndex->index[i] = SYNC_INDEX_INVALID; } + // init peer mgr + syncNodePeerStateInit(pSyncNode); + // update sender private term SSyncSnapshotSender* pMySender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->myRaftId)); if (pMySender != NULL) { @@ -2259,11 +2456,17 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { // start heartbeat timer syncNodeStartHeartbeatTimer(pSyncNode); + // send heartbeat right now + syncNodeHeartbeatPeers(pSyncNode); + // call back if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpBecomeLeaderCb != NULL) { pSyncNode->pFsm->FpBecomeLeaderCb(pSyncNode->pFsm); } + // min match index + pSyncNode->minMatchIndex = SYNC_INDEX_INVALID; + // trace log do { int32_t debugStrLen = strlen(debugStr); @@ -2282,7 +2485,7 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { void syncNodeCandidate2Leader(SSyncNode* pSyncNode) { ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); - // ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted)); + ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted)); syncNodeBecomeLeader(pSyncNode, "candidate to leader"); syncNodeLog2("==state change syncNodeCandidate2Leader==", pSyncNode); @@ -2290,6 +2493,21 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) { // Raft 3.6.2 Committing entries from previous terms syncNodeAppendNoop(pSyncNode); syncMaybeAdvanceCommitIndex(pSyncNode); + + if (pSyncNode->replicaNum > 1) { + syncNodeReplicate(pSyncNode); + } +} + +bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); } + +int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) { + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + pSyncNode->peerStates[i].lastSendIndex = SYNC_INDEX_INVALID; + pSyncNode->peerStates[i].lastSendTime = 0; + } + + return 0; } void syncNodeFollower2Candidate(SSyncNode* pSyncNode) { @@ -2475,35 +2693,35 @@ int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex // for debug -------------- void syncNodePrint(SSyncNode* pObj) { char* serialized = syncNode2Str(pObj); - printf("syncNodePrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncNodePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncNodePrint2(char* s, SSyncNode* pObj) { char* serialized = syncNode2Str(pObj); - printf("syncNodePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncNodePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncNodeLog(SSyncNode* pObj) { char* serialized = syncNode2Str(pObj); - sTraceLong("syncNodeLog | len:%lu | %s", strlen(serialized), serialized); + sTraceLong("syncNodeLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncNodeLog2(char* s, SSyncNode* pObj) { if (gRaftDetailLog) { char* serialized = syncNode2Str(pObj); - sTraceLong("syncNodeLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTraceLong("syncNodeLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } void syncNodeLog3(char* s, SSyncNode* pObj) { char* serialized = syncNode2Str(pObj); - sTraceLong("syncNodeLog3 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTraceLong("syncNodeLog3 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } @@ -2565,7 +2783,7 @@ static void syncNodeEqElectTimer(void* param, void* tmrId) { syncTimeoutDestroy(pSyncMsg); // reset timer ms - if (syncEnvIsStart()) { + if (syncEnvIsStart() && pSyncNode->electBaseLine > 0) { pSyncNode->electTimerMS = syncUtilElectRandomMS(pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine); taosTmrReset(syncNodeEqElectTimer, pSyncNode->electTimerMS, pSyncNode, gSyncEnv->pTimerManager, &pSyncNode->pElectTimer); @@ -2619,6 +2837,67 @@ static void syncNodeEqHeartbeatTimer(void* param, void* tmrId) { } } +static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) { + SSyncHbTimerData* pData = (SSyncHbTimerData*)param; + SSyncNode* pSyncNode = pData->pSyncNode; + SSyncTimer* pSyncTimer = pData->pTimer; + + if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { + return; + } + + //syncNodeEventLog(pSyncNode, "eq peer hb timer"); + + int64_t timerLogicClock = atomic_load_64(&pSyncTimer->logicClock); + int64_t msgLogicClock = atomic_load_64(&pData->logicClock); + + if (pSyncNode->replicaNum > 1) { + if (timerLogicClock == msgLogicClock) { + SyncHeartbeat* pSyncMsg = syncHeartbeatBuild(pSyncNode->vgId); + pSyncMsg->srcId = pSyncNode->myRaftId; + pSyncMsg->destId = pData->destId; + pSyncMsg->term = pSyncNode->pRaftStore->currentTerm; + pSyncMsg->commitIndex = pSyncNode->commitIndex; + pSyncMsg->minMatchIndex = syncMinMatchIndex(pSyncNode); + pSyncMsg->privateTerm = 0; + + SRpcMsg rpcMsg; + syncHeartbeat2RpcMsg(pSyncMsg, &rpcMsg); + +// eq msg +#if 0 + if (pSyncNode->FpEqCtrlMsg != NULL) { + int32_t code = pSyncNode->FpEqCtrlMsg(pSyncNode->msgcb, &rpcMsg); + if (code != 0) { + sError("vgId:%d, sync ctrl enqueue timer msg error, code:%d", pSyncNode->vgId, code); + rpcFreeCont(rpcMsg.pCont); + syncHeartbeatDestroy(pSyncMsg); + return; + } + } else { + sError("vgId:%d, enqueue ctrl msg cb ptr (i.e. FpEqMsg) not set.", pSyncNode->vgId); + } +#endif + + // send msg + syncNodeSendHeartbeat(pSyncNode, &(pSyncMsg->destId), pSyncMsg); + + syncHeartbeatDestroy(pSyncMsg); + + if (syncEnvIsStart()) { + taosTmrReset(syncNodeEqPeerHeartbeatTimer, pSyncTimer->timerMS, pData, gSyncEnv->pTimerManager, + &pSyncTimer->pTimer); + } else { + sError("sync env is stop, syncNodeEqHeartbeatTimer"); + } + + } else { + sTrace("==syncNodeEqPeerHeartbeatTimer== timerLogicClock:%" PRIu64 ", msgLogicClock:%" PRIu64 "", timerLogicClock, + msgLogicClock); + } + } +} + static int32_t syncNodeEqNoop(SSyncNode* ths) { int32_t ret = 0; ASSERT(ths->state == TAOS_SYNC_STATE_LEADER); @@ -2676,10 +2955,9 @@ static int32_t syncNodeAppendNoop(SSyncNode* ths) { if (ths->state == TAOS_SYNC_STATE_LEADER) { int32_t code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); if (code != 0) { - sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno)); + syncNodeErrorLog(ths, "append noop error"); return -1; } - syncNodeReplicate(ths, false); } if (h) { @@ -2738,13 +3016,15 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, SyncHeartbeat* pMsg) { SRpcMsg rpcMsg; syncHeartbeatReply2RpcMsg(pMsgReply, &rpcMsg); +#if 1 if (pMsg->term >= ths->pRaftStore->currentTerm && ths->state != TAOS_SYNC_STATE_FOLLOWER) { - syncNodeBecomeFollower(ths, "become follower by hb"); + syncNodeStepDown(ths, pMsg->term); } +#endif - if (pMsg->term == ths->pRaftStore->currentTerm) { - // sInfo("vgId:%d, heartbeat reset timer", ths->vgId); + if (pMsg->term == ths->pRaftStore->currentTerm && ths->state != TAOS_SYNC_STATE_LEADER) { syncNodeResetElectTimer(ths); + ths->minMatchIndex = pMsg->minMatchIndex; #if 0 if (ths->state == TAOS_SYNC_STATE_FOLLOWER) { @@ -2785,10 +3065,12 @@ int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, SyncHeartbeatReply* pMsg) { // /\ UNCHANGED <> // -int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncIndex* pRetIndex) { + +int32_t syncNodeOnClientRequest(SSyncNode* ths, SyncClientRequest* pMsg, SyncIndex* pRetIndex) { + syncNodeEventLog(ths, "on client request"); + int32_t ret = 0; int32_t code = 0; - syncClientRequestLog2("==syncNodeOnClientRequestCb==", pMsg); SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore); SyncTerm term = ths->pRaftStore->currentTerm; @@ -2803,16 +3085,13 @@ int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncI code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); if (code != 0) { // del resp mgr, call FpCommitCb - sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno)); + ASSERT(0); return -1; } // if mulit replica, start replicate right now if (ths->replicaNum > 1) { - syncNodeReplicate(ths, false); - - // pre commit - syncNodePreCommit(ths, pEntry, 0); + syncNodeReplicate(ths); } // if only myself, maybe commit right now @@ -2838,61 +3117,6 @@ int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncI return ret; } -int32_t syncNodeOnClientRequestBatchCb(SSyncNode* ths, SyncClientRequestBatch* pMsg) { - int32_t code = 0; - - if (ths->state != TAOS_SYNC_STATE_LEADER) { - // call FpCommitCb, delete resp mgr - return -1; - } - - SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore); - SyncTerm term = ths->pRaftStore->currentTerm; - - int32_t raftMetaArrayLen = sizeof(SRaftMeta) * pMsg->dataCount; - int32_t rpcArrayLen = sizeof(SRpcMsg) * pMsg->dataCount; - SRaftMeta* raftMetaArr = (SRaftMeta*)(pMsg->data); - SRpcMsg* msgArr = (SRpcMsg*)((char*)(pMsg->data) + raftMetaArrayLen); - for (int32_t i = 0; i < pMsg->dataCount; ++i) { - SSyncRaftEntry* pEntry = syncEntryBuild(msgArr[i].contLen); - ASSERT(pEntry != NULL); - - pEntry->originalRpcType = msgArr[i].msgType; - pEntry->seqNum = raftMetaArr[i].seqNum; - pEntry->isWeak = raftMetaArr[i].isWeak; - pEntry->term = term; - pEntry->index = index; - memcpy(pEntry->data, msgArr[i].pCont, msgArr[i].contLen); - ASSERT(msgArr[i].contLen == pEntry->dataLen); - - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); - if (code != 0) { - sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno)); - // del resp mgr, call FpCommitCb - return -1; - } - - // update rpc msg conn apply.index - msgArr[i].info.conn.applyIndex = pEntry->index; - } - - // fsync once - SSyncLogStoreData* pData = ths->pLogStore->data; - SWal* pWal = pData->pWal; - walFsync(pWal, false); - - if (ths->replicaNum > 1) { - // if multi replica, start replicate right now - syncNodeReplicate(ths, false); - - } else if (ths->replicaNum == 1) { - // one replica - syncMaybeAdvanceCommitIndex(ths); - } - - return 0; -} - const char* syncStr(ESyncState state) { switch (state) { case TAOS_SYNC_STATE_FOLLOWER: @@ -2942,7 +3166,7 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p do { char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "do leader transfer, index:%" PRId64 "", pEntry->index); + snprintf(logBuf, sizeof(logBuf), "do leader transfer, index:%" PRId64, pEntry->index); syncNodeEventLog(ths, logBuf); } while (0); @@ -3081,11 +3305,10 @@ static int32_t syncNodeProposeConfigChangeFinish(SSyncNode* ths, SyncReconfigFin } bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) { - return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType)); - // return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1); + return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1); } -int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) { +int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) { if (beginIndex > endIndex) { return 0; } @@ -3121,10 +3344,7 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); } else { code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry); - if (code != 0) { - sError("vgId:%d, failed to get log entry since %s. index:%" PRId64 "", ths->vgId, tstrerror(terrno), i); - return -1; - } + ASSERT(code == 0); ASSERT(pEntry != NULL); } @@ -3134,8 +3354,7 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, // user commit if ((ths->pFsm->FpCommitCb != NULL) && syncUtilUserCommit(pEntry->originalRpcType)) { bool internalExecute = true; - if ((ths->replicaNum == 1) && ths->restoreFinish) { - // if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) { + if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) { internalExecute = false; } @@ -3240,6 +3459,42 @@ SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId) return pSender; } +SSyncTimer* syncNodeGetHbTimer(SSyncNode* ths, SRaftId* pDestId) { + SSyncTimer* pTimer = NULL; + for (int i = 0; i < ths->replicaNum; ++i) { + if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) { + pTimer = &((ths->peerHeartbeatTimerArr)[i]); + } + } + return pTimer; +} + +SPeerState* syncNodeGetPeerState(SSyncNode* ths, const SRaftId* pDestId) { + SPeerState* pState = NULL; + for (int i = 0; i < ths->replicaNum; ++i) { + if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) { + pState = &((ths->peerStates)[i]); + } + } + return pState; +} + +bool syncNodeNeedSendAppendEntries(SSyncNode* ths, const SRaftId* pDestId, const SyncAppendEntries* pMsg) { + SPeerState* pState = syncNodeGetPeerState(ths, pDestId); + if (pState == NULL) { + return false; + } + + SyncIndex sendIndex = pMsg->prevLogIndex + 1; + int64_t tsNow = taosGetTimestampMs(); + + if (pState->lastSendIndex == sendIndex && tsNow - pState->lastSendTime < SYNC_APPEND_ENTRIES_TIMEOUT_MS) { + return false; + } + + return true; +} + bool syncNodeCanChange(SSyncNode* pSyncNode) { if (pSyncNode->changing) { sError("sync cannot change"); @@ -3265,6 +3520,25 @@ bool syncNodeCanChange(SSyncNode* pSyncNode) { return true; } +const char* syncTimerTypeStr(enum ESyncTimeoutType timerType) { + if (timerType == SYNC_TIMEOUT_PING) { + return "ping"; + } else if (timerType == SYNC_TIMEOUT_ELECTION) { + return "elect"; + } else if (timerType == SYNC_TIMEOUT_HEARTBEAT) { + return "heartbeat"; + } else { + return "unknown"; + } +} + +void syncLogRecvTimer(SSyncNode* pSyncNode, const SyncTimeout* pMsg, const char* s) { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), "recv sync-timer {type:%s, lc:%" PRIu64 ", ms:%d, data:%p}, %s", + syncTimerTypeStr(pMsg->timeoutType), pMsg->logicClock, pMsg->timerMS, pMsg->data, s); + syncNodeEventLog(pSyncNode, logBuf); +} + void syncLogSendRequestVote(SSyncNode* pSyncNode, const SyncRequestVote* pMsg, const char* s) { char host[64]; uint16_t port; @@ -3393,8 +3667,9 @@ void syncLogSendHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, const syncUtilU642Addr(pMsg->destId.addr, host, sizeof(host), &port); char logBuf[256]; snprintf(logBuf, sizeof(logBuf), - "send sync-heartbeat from %s:%d {term:%" PRIu64 ", cmt:%" PRIu64 ", pterm:%" PRIu64 "}, %s", host, port, - pMsg->term, pMsg->commitIndex, pMsg->privateTerm, s); + "send sync-heartbeat to %s:%d {term:%" PRIu64 ", cmt:%" PRId64 ", min-match:%" PRId64 ", pterm:%" PRIu64 + "}, %s", + host, port, pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pMsg->privateTerm, s); syncNodeEventLog(pSyncNode, logBuf); } @@ -3404,8 +3679,9 @@ void syncLogRecvHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, const syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); char logBuf[256]; snprintf(logBuf, sizeof(logBuf), - "recv sync-heartbeat from %s:%d {term:%" PRIu64 ", cmt:%" PRIu64 ", pterm:%" PRIu64 "}, %s", host, port, - pMsg->term, pMsg->commitIndex, pMsg->privateTerm, s); + "recv sync-heartbeat from %s:%d {term:%" PRIu64 ", cmt:%" PRId64 ", min-match:%" PRId64 ", pterm:%" PRIu64 + "}, %s", + host, port, pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pMsg->privateTerm, s); syncNodeEventLog(pSyncNode, logBuf); } @@ -3427,4 +3703,4 @@ void syncLogRecvHeartbeatReply(SSyncNode* pSyncNode, const SyncHeartbeatReply* p snprintf(logBuf, sizeof(logBuf), "recv sync-heartbeat-reply from %s:%d {term:%" PRIu64 ", pterm:%" PRIu64 "}, %s", host, port, pMsg->term, pMsg->privateTerm, s); syncNodeEventLog(pSyncNode, logBuf); -} +} \ No newline at end of file diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index 7c871d0542..9de3fde389 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -133,28 +133,28 @@ char* syncRpcMsg2Str(SRpcMsg* pRpcMsg) { // for debug ---------------------- void syncRpcMsgPrint(SRpcMsg* pMsg) { char* serialized = syncRpcMsg2Str(pMsg); - printf("syncRpcMsgPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncRpcMsgPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncRpcMsgPrint2(char* s, SRpcMsg* pMsg) { char* serialized = syncRpcMsg2Str(pMsg); - printf("syncRpcMsgPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncRpcMsgPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncRpcMsgLog(SRpcMsg* pMsg) { char* serialized = syncRpcMsg2Str(pMsg); - sTrace("syncRpcMsgLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncRpcMsgLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncRpcMsgLog2(char* s, SRpcMsg* pMsg) { if (gRaftDetailLog) { char* serialized = syncRpcMsg2Str(pMsg); - sTrace("syncRpcMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncRpcMsgLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -271,21 +271,21 @@ void syncTimeoutPrint(const SyncTimeout* pMsg) { void syncTimeoutPrint2(char* s, const SyncTimeout* pMsg) { char* serialized = syncTimeout2Str(pMsg); - printf("syncTimeoutPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncTimeoutPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncTimeoutLog(const SyncTimeout* pMsg) { char* serialized = syncTimeout2Str(pMsg); - sTrace("syncTimeoutLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncTimeoutLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncTimeoutLog2(char* s, const SyncTimeout* pMsg) { if (gRaftDetailLog) { char* serialized = syncTimeout2Str(pMsg); - sTrace("syncTimeoutLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncTimeoutLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -526,28 +526,28 @@ char* syncPing2Str(const SyncPing* pMsg) { // for debug ---------------------- void syncPingPrint(const SyncPing* pMsg) { char* serialized = syncPing2Str(pMsg); - printf("syncPingPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncPingPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncPingPrint2(char* s, const SyncPing* pMsg) { char* serialized = syncPing2Str(pMsg); - printf("syncPingPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncPingPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncPingLog(const SyncPing* pMsg) { char* serialized = syncPing2Str(pMsg); - sTrace("syncPingLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncPingLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncPingLog2(char* s, const SyncPing* pMsg) { if (gRaftDetailLog) { char* serialized = syncPing2Str(pMsg); - sTrace("syncPingLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncPingLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -931,28 +931,28 @@ char* syncClientRequest2Str(const SyncClientRequest* pMsg) { // for debug ---------------------- void syncClientRequestPrint(const SyncClientRequest* pMsg) { char* serialized = syncClientRequest2Str(pMsg); - printf("syncClientRequestPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncClientRequestPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncClientRequestPrint2(char* s, const SyncClientRequest* pMsg) { char* serialized = syncClientRequest2Str(pMsg); - printf("syncClientRequestPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncClientRequestPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncClientRequestLog(const SyncClientRequest* pMsg) { char* serialized = syncClientRequest2Str(pMsg); - sTrace("syncClientRequestLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncClientRequestLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncClientRequestLog2(char* s, const SyncClientRequest* pMsg) { if (gRaftDetailLog) { char* serialized = syncClientRequest2Str(pMsg); - sTrace("syncClientRequestLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncClientRequestLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -1101,28 +1101,28 @@ char* syncClientRequestBatch2Str(const SyncClientRequestBatch* pMsg) { // for debug ---------------------- void syncClientRequestBatchPrint(const SyncClientRequestBatch* pMsg) { char* serialized = syncClientRequestBatch2Str(pMsg); - printf("syncClientRequestBatchPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncClientRequestBatchPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncClientRequestBatchPrint2(char* s, const SyncClientRequestBatch* pMsg) { char* serialized = syncClientRequestBatch2Str(pMsg); - printf("syncClientRequestBatchPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncClientRequestBatchPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncClientRequestBatchLog(const SyncClientRequestBatch* pMsg) { char* serialized = syncClientRequestBatch2Str(pMsg); - sTrace("syncClientRequestBatchLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncClientRequestBatchLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncClientRequestBatchLog2(char* s, const SyncClientRequestBatch* pMsg) { if (gRaftDetailLog) { char* serialized = syncClientRequestBatch2Str(pMsg); - sTraceLong("syncClientRequestBatchLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTraceLong("syncClientRequestBatchLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -1252,28 +1252,28 @@ char* syncRequestVote2Str(const SyncRequestVote* pMsg) { // for debug ---------------------- void syncRequestVotePrint(const SyncRequestVote* pMsg) { char* serialized = syncRequestVote2Str(pMsg); - printf("syncRequestVotePrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncRequestVotePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncRequestVotePrint2(char* s, const SyncRequestVote* pMsg) { char* serialized = syncRequestVote2Str(pMsg); - printf("syncRequestVotePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncRequestVotePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncRequestVoteLog(const SyncRequestVote* pMsg) { char* serialized = syncRequestVote2Str(pMsg); - sTrace("syncRequestVoteLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncRequestVoteLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncRequestVoteLog2(char* s, const SyncRequestVote* pMsg) { if (gRaftDetailLog) { char* serialized = syncRequestVote2Str(pMsg); - sTrace("syncRequestVoteLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncRequestVoteLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -1400,28 +1400,28 @@ char* syncRequestVoteReply2Str(const SyncRequestVoteReply* pMsg) { // for debug ---------------------- void syncRequestVoteReplyPrint(const SyncRequestVoteReply* pMsg) { char* serialized = syncRequestVoteReply2Str(pMsg); - printf("syncRequestVoteReplyPrint | len:%ld | %s \n", strlen(serialized), serialized); + printf("syncRequestVoteReplyPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncRequestVoteReplyPrint2(char* s, const SyncRequestVoteReply* pMsg) { char* serialized = syncRequestVoteReply2Str(pMsg); - printf("syncRequestVoteReplyPrint2 | len:%ld | %s | %s \n", strlen(serialized), s, serialized); + printf("syncRequestVoteReplyPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncRequestVoteReplyLog(const SyncRequestVoteReply* pMsg) { char* serialized = syncRequestVoteReply2Str(pMsg); - sTrace("syncRequestVoteReplyLog | len:%ld | %s", strlen(serialized), serialized); + sTrace("syncRequestVoteReplyLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncRequestVoteReplyLog2(char* s, const SyncRequestVoteReply* pMsg) { if (gRaftDetailLog) { char* serialized = syncRequestVoteReply2Str(pMsg); - sTrace("syncRequestVoteReplyLog2 | len:%ld | %s | %s", strlen(serialized), s, serialized); + sTrace("syncRequestVoteReplyLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -1571,28 +1571,28 @@ char* syncAppendEntries2Str(const SyncAppendEntries* pMsg) { // for debug ---------------------- void syncAppendEntriesPrint(const SyncAppendEntries* pMsg) { char* serialized = syncAppendEntries2Str(pMsg); - printf("syncAppendEntriesPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncAppendEntriesPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncAppendEntriesPrint2(char* s, const SyncAppendEntries* pMsg) { char* serialized = syncAppendEntries2Str(pMsg); - printf("syncAppendEntriesPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncAppendEntriesPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncAppendEntriesLog(const SyncAppendEntries* pMsg) { char* serialized = syncAppendEntries2Str(pMsg); - sTrace("syncAppendEntriesLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncAppendEntriesLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncAppendEntriesLog2(char* s, const SyncAppendEntries* pMsg) { if (gRaftDetailLog) { char* serialized = syncAppendEntries2Str(pMsg); - sTrace("syncAppendEntriesLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncAppendEntriesLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -1810,28 +1810,28 @@ char* syncAppendEntriesBatch2Str(const SyncAppendEntriesBatch* pMsg) { // for debug ---------------------- void syncAppendEntriesBatchPrint(const SyncAppendEntriesBatch* pMsg) { char* serialized = syncAppendEntriesBatch2Str(pMsg); - printf("syncAppendEntriesBatchPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncAppendEntriesBatchPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncAppendEntriesBatchPrint2(char* s, const SyncAppendEntriesBatch* pMsg) { char* serialized = syncAppendEntriesBatch2Str(pMsg); - printf("syncAppendEntriesBatchPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncAppendEntriesBatchPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncAppendEntriesBatchLog(const SyncAppendEntriesBatch* pMsg) { char* serialized = syncAppendEntriesBatch2Str(pMsg); - sTrace("syncAppendEntriesBatchLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncAppendEntriesBatchLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncAppendEntriesBatchLog2(char* s, const SyncAppendEntriesBatch* pMsg) { if (gRaftDetailLog) { char* serialized = syncAppendEntriesBatch2Str(pMsg); - sTraceLong("syncAppendEntriesBatchLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTraceLong("syncAppendEntriesBatchLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -1966,28 +1966,28 @@ char* syncAppendEntriesReply2Str(const SyncAppendEntriesReply* pMsg) { // for debug ---------------------- void syncAppendEntriesReplyPrint(const SyncAppendEntriesReply* pMsg) { char* serialized = syncAppendEntriesReply2Str(pMsg); - printf("syncAppendEntriesReplyPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncAppendEntriesReplyPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncAppendEntriesReplyPrint2(char* s, const SyncAppendEntriesReply* pMsg) { char* serialized = syncAppendEntriesReply2Str(pMsg); - printf("syncAppendEntriesReplyPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncAppendEntriesReplyPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncAppendEntriesReplyLog(const SyncAppendEntriesReply* pMsg) { char* serialized = syncAppendEntriesReply2Str(pMsg); - sTrace("syncAppendEntriesReplyLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncAppendEntriesReplyLog | len:%d| %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncAppendEntriesReplyLog2(char* s, const SyncAppendEntriesReply* pMsg) { if (gRaftDetailLog) { char* serialized = syncAppendEntriesReply2Str(pMsg); - sTrace("syncAppendEntriesReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncAppendEntriesReplyLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -2119,28 +2119,28 @@ char* syncHeartbeat2Str(const SyncHeartbeat* pMsg) { void syncHeartbeatPrint(const SyncHeartbeat* pMsg) { char* serialized = syncHeartbeat2Str(pMsg); - printf("syncHeartbeatPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncHeartbeatPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncHeartbeatPrint2(char* s, const SyncHeartbeat* pMsg) { char* serialized = syncHeartbeat2Str(pMsg); - printf("syncHeartbeatPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncHeartbeatPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncHeartbeatLog(const SyncHeartbeat* pMsg) { char* serialized = syncHeartbeat2Str(pMsg); - sTrace("syncHeartbeatLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncHeartbeatLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncHeartbeatLog2(char* s, const SyncHeartbeat* pMsg) { if (gRaftDetailLog) { char* serialized = syncHeartbeat2Str(pMsg); - sTrace("syncHeartbeatLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncHeartbeatLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -2273,28 +2273,28 @@ char* syncHeartbeatReply2Str(const SyncHeartbeatReply* pMsg) { void syncHeartbeatReplyPrint(const SyncHeartbeatReply* pMsg) { char* serialized = syncHeartbeatReply2Str(pMsg); - printf("syncHeartbeatReplyPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncHeartbeatReplyPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncHeartbeatReplyPrint2(char* s, const SyncHeartbeatReply* pMsg) { char* serialized = syncHeartbeatReply2Str(pMsg); - printf("syncHeartbeatReplyPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncHeartbeatReplyPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncHeartbeatReplyLog(const SyncHeartbeatReply* pMsg) { char* serialized = syncHeartbeatReply2Str(pMsg); - sTrace("syncHeartbeatReplyLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncHeartbeatReplyLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncHeartbeatReplyLog2(char* s, const SyncHeartbeatReply* pMsg) { if (gRaftDetailLog) { char* serialized = syncHeartbeatReply2Str(pMsg); - sTrace("syncHeartbeatReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncHeartbeatReplyLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -2426,28 +2426,28 @@ char* syncApplyMsg2Str(const SyncApplyMsg* pMsg) { // for debug ---------------------- void syncApplyMsgPrint(const SyncApplyMsg* pMsg) { char* serialized = syncApplyMsg2Str(pMsg); - printf("syncApplyMsgPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncApplyMsgPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncApplyMsgPrint2(char* s, const SyncApplyMsg* pMsg) { char* serialized = syncApplyMsg2Str(pMsg); - printf("syncApplyMsgPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncApplyMsgPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncApplyMsgLog(const SyncApplyMsg* pMsg) { char* serialized = syncApplyMsg2Str(pMsg); - sTrace("ssyncApplyMsgLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("ssyncApplyMsgLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncApplyMsgLog2(char* s, const SyncApplyMsg* pMsg) { if (gRaftDetailLog) { char* serialized = syncApplyMsg2Str(pMsg); - sTrace("syncApplyMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncApplyMsgLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -2603,28 +2603,28 @@ char* syncSnapshotSend2Str(const SyncSnapshotSend* pMsg) { // for debug ---------------------- void syncSnapshotSendPrint(const SyncSnapshotSend* pMsg) { char* serialized = syncSnapshotSend2Str(pMsg); - printf("syncSnapshotSendPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncSnapshotSendPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncSnapshotSendPrint2(char* s, const SyncSnapshotSend* pMsg) { char* serialized = syncSnapshotSend2Str(pMsg); - printf("syncSnapshotSendPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncSnapshotSendPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncSnapshotSendLog(const SyncSnapshotSend* pMsg) { char* serialized = syncSnapshotSend2Str(pMsg); - sTrace("syncSnapshotSendLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncSnapshotSendLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncSnapshotSendLog2(char* s, const SyncSnapshotSend* pMsg) { if (gRaftDetailLog) { char* serialized = syncSnapshotSend2Str(pMsg); - sTrace("syncSnapshotSendLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncSnapshotSendLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -2763,28 +2763,28 @@ char* syncSnapshotRsp2Str(const SyncSnapshotRsp* pMsg) { // for debug ---------------------- void syncSnapshotRspPrint(const SyncSnapshotRsp* pMsg) { char* serialized = syncSnapshotRsp2Str(pMsg); - printf("syncSnapshotRspPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncSnapshotRspPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncSnapshotRspPrint2(char* s, const SyncSnapshotRsp* pMsg) { char* serialized = syncSnapshotRsp2Str(pMsg); - printf("syncSnapshotRspPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncSnapshotRspPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncSnapshotRspLog(const SyncSnapshotRsp* pMsg) { char* serialized = syncSnapshotRsp2Str(pMsg); - sTrace("syncSnapshotRspLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncSnapshotRspLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncSnapshotRspLog2(char* s, const SyncSnapshotRsp* pMsg) { if (gRaftDetailLog) { char* serialized = syncSnapshotRsp2Str(pMsg); - sTrace("syncSnapshotRspLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncSnapshotRspLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -2925,28 +2925,28 @@ char* syncLeaderTransfer2Str(const SyncLeaderTransfer* pMsg) { // for debug ---------------------- void syncLeaderTransferPrint(const SyncLeaderTransfer* pMsg) { char* serialized = syncLeaderTransfer2Str(pMsg); - printf("syncLeaderTransferPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncLeaderTransferPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncLeaderTransferPrint2(char* s, const SyncLeaderTransfer* pMsg) { char* serialized = syncLeaderTransfer2Str(pMsg); - printf("syncLeaderTransferPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncLeaderTransferPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncLeaderTransferLog(const SyncLeaderTransfer* pMsg) { char* serialized = syncLeaderTransfer2Str(pMsg); - sTrace("syncLeaderTransferLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncLeaderTransferLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncLeaderTransferLog2(char* s, const SyncLeaderTransfer* pMsg) { if (gRaftDetailLog) { char* serialized = syncLeaderTransfer2Str(pMsg); - sTrace("syncLeaderTransferLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncLeaderTransferLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -3054,28 +3054,28 @@ char* syncReconfigFinish2Str(const SyncReconfigFinish* pMsg) { // for debug ---------------------- void syncReconfigFinishPrint(const SyncReconfigFinish* pMsg) { char* serialized = syncReconfigFinish2Str(pMsg); - printf("syncReconfigFinishPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncReconfigFinishPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncReconfigFinishPrint2(char* s, const SyncReconfigFinish* pMsg) { char* serialized = syncReconfigFinish2Str(pMsg); - printf("syncReconfigFinishPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncReconfigFinishPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncReconfigFinishLog(const SyncReconfigFinish* pMsg) { char* serialized = syncReconfigFinish2Str(pMsg); - sTrace("syncReconfigFinishLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncReconfigFinishLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncReconfigFinishLog2(char* s, const SyncReconfigFinish* pMsg) { if (gRaftDetailLog) { char* serialized = syncReconfigFinish2Str(pMsg); - sTrace("syncReconfigFinishLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncReconfigFinishLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index cf5bd24899..bac4825c50 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -447,85 +447,85 @@ int32_t raftCfgFromStr(const char *s, SRaftCfg *pRaftCfg) { // for debug ---------------------- void syncCfgPrint(SSyncCfg *pCfg) { char *serialized = syncCfg2Str(pCfg); - printf("syncCfgPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("syncCfgPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void syncCfgPrint2(char *s, SSyncCfg *pCfg) { char *serialized = syncCfg2Str(pCfg); - printf("syncCfgPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("syncCfgPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void syncCfgLog(SSyncCfg *pCfg) { char *serialized = syncCfg2Str(pCfg); - sTrace("syncCfgLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("syncCfgLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void syncCfgLog2(char *s, SSyncCfg *pCfg) { char *serialized = syncCfg2Str(pCfg); - sTrace("syncCfgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncCfgLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } void syncCfgLog3(char *s, SSyncCfg *pCfg) { char *serialized = syncCfg2SimpleStr(pCfg); - sTrace("syncCfgLog3 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("syncCfgLog3 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } void raftCfgPrint(SRaftCfg *pCfg) { char *serialized = raftCfg2Str(pCfg); - printf("raftCfgPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("raftCfgPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void raftCfgPrint2(char *s, SRaftCfg *pCfg) { char *serialized = raftCfg2Str(pCfg); - printf("raftCfgPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("raftCfgPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void raftCfgLog(SRaftCfg *pCfg) { char *serialized = raftCfg2Str(pCfg); - sTrace("raftCfgLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("raftCfgLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void raftCfgLog2(char *s, SRaftCfg *pCfg) { char *serialized = raftCfg2Str(pCfg); - sTrace("raftCfgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("raftCfgLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } // --------- void raftCfgIndexPrint(SRaftCfgIndex *pCfg) { char *serialized = raftCfgIndex2Str(pCfg); - printf("raftCfgIndexPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("raftCfgIndexPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void raftCfgIndexPrint2(char *s, SRaftCfgIndex *pCfg) { char *serialized = raftCfgIndex2Str(pCfg); - printf("raftCfgIndexPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("raftCfgIndexPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void raftCfgIndexLog(SRaftCfgIndex *pCfg) { char *serialized = raftCfgIndex2Str(pCfg); - sTrace("raftCfgIndexLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("raftCfgIndexLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void raftCfgIndexLog2(char *s, SRaftCfgIndex *pCfg) { char *serialized = raftCfgIndex2Str(pCfg); - sTrace("raftCfgIndexLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("raftCfgIndexLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } diff --git a/source/libs/sync/src/syncRaftEntry.c b/source/libs/sync/src/syncRaftEntry.c index 818ffa57a6..940aaca055 100644 --- a/source/libs/sync/src/syncRaftEntry.c +++ b/source/libs/sync/src/syncRaftEntry.c @@ -418,28 +418,28 @@ char* raftCache2Str(SRaftEntryHashCache* pCache) { void raftCachePrint(SRaftEntryHashCache* pCache) { char* serialized = raftCache2Str(pCache); - printf("raftCachePrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("raftCachePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void raftCachePrint2(char* s, SRaftEntryHashCache* pCache) { char* serialized = raftCache2Str(pCache); - printf("raftCachePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("raftCachePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void raftCacheLog(SRaftEntryHashCache* pCache) { char* serialized = raftCache2Str(pCache); - sTrace("raftCacheLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("raftCacheLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void raftCacheLog2(char* s, SRaftEntryHashCache* pCache) { if (gRaftDetailLog) { char* serialized = raftCache2Str(pCache); - sTraceLong("raftCacheLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTraceLong("raftCacheLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -677,28 +677,28 @@ char* raftEntryCache2Str(SRaftEntryCache* pObj) { void raftEntryCachePrint(SRaftEntryCache* pObj) { char* serialized = raftEntryCache2Str(pObj); - printf("raftEntryCachePrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("raftEntryCachePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void raftEntryCachePrint2(char* s, SRaftEntryCache* pObj) { char* serialized = raftEntryCache2Str(pObj); - printf("raftEntryCachePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("raftEntryCachePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void raftEntryCacheLog(SRaftEntryCache* pObj) { char* serialized = raftEntryCache2Str(pObj); - sTrace("raftEntryCacheLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("raftEntryCacheLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void raftEntryCacheLog2(char* s, SRaftEntryCache* pObj) { if (gRaftDetailLog) { char* serialized = raftEntryCache2Str(pObj); - sTraceLong("raftEntryCacheLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTraceLong("raftEntryCacheLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 3e806a0fb7..23d076cfbc 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -33,21 +33,11 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEn static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry); static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIndex); static bool raftLogExist(struct SSyncLogStore* pLogStore, SyncIndex index); +static int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index); +static SyncIndex raftlogCommitIndex(SSyncLogStore* pLogStore); -// private function static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** ppLastEntry); -//------------------------------- -// log[0 .. n] -static SSyncRaftEntry* logStoreGetLastEntry(SSyncLogStore* pLogStore); -static SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore); -static SyncTerm logStoreLastTerm(SSyncLogStore* pLogStore); -static SSyncRaftEntry* logStoreGetEntry(SSyncLogStore* pLogStore, SyncIndex index); -static int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); -static int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex fromIndex); -static int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index); -static SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore); - //------------------------------- SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) { SSyncLogStore* pLogStore = taosMemoryMalloc(sizeof(SSyncLogStore)); @@ -74,14 +64,8 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) { pData->pWalHandle = walOpenReader(pData->pWal, NULL); ASSERT(pData->pWalHandle != NULL); - pLogStore->appendEntry = logStoreAppendEntry; - pLogStore->getEntry = logStoreGetEntry; - pLogStore->truncate = logStoreTruncate; - pLogStore->getLastIndex = logStoreLastIndex; - pLogStore->getLastTerm = logStoreLastTerm; - pLogStore->updateCommitIndex = logStoreUpdateCommitIndex; - pLogStore->getCommitIndex = logStoreGetCommitIndex; - + pLogStore->syncLogUpdateCommitIndex = raftLogUpdateCommitIndex; + pLogStore->syncLogCommitIndex = raftlogCommitIndex; pLogStore->syncLogRestoreFromSnapshot = raftLogRestoreFromSnapshot; pLogStore->syncLogBeginIndex = raftLogBeginIndex; pLogStore->syncLogEndIndex = raftLogEndIndex; @@ -234,6 +218,8 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr snprintf(logBuf, sizeof(logBuf), "wal write error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", pEntry->index, err, err, errStr, sysErr, sysErrStr); syncNodeErrorLog(pData->pSyncNode, logBuf); + + ASSERT(0); return -1; } pEntry->index = index; @@ -277,11 +263,15 @@ static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, do { char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - index, err, err, errStr, sysErr, sysErrStr); if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - // syncNodeEventLog(pData->pSyncNode, logBuf); + snprintf(logBuf, sizeof(logBuf), + "wal read not exist, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index, err, err, + errStr, sysErr, sysErrStr); + syncNodeEventLog(pData->pSyncNode, logBuf); + } else { + snprintf(logBuf, sizeof(logBuf), "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", + index, err, err, errStr, sysErr, sysErrStr); syncNodeErrorLog(pData->pSyncNode, logBuf); } } while (0); @@ -372,157 +362,7 @@ static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** pp return -1; } -//------------------------------- -// log[0 .. n] - -int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { - SSyncLogStoreData* pData = pLogStore->data; - SWal* pWal = pData->pWal; - - SyncIndex index = 0; - SWalSyncInfo syncMeta = {0}; - syncMeta.isWeek = pEntry->isWeak; - syncMeta.seqNum = pEntry->seqNum; - syncMeta.term = pEntry->term; - - index = walAppendLog(pWal, pEntry->originalRpcType, syncMeta, pEntry->data, pEntry->dataLen); - if (index < 0) { - int32_t err = terrno; - const char* errStr = tstrerror(err); - int32_t sysErr = errno; - const char* sysErrStr = strerror(errno); - - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "wal write error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - pEntry->index, err, err, errStr, sysErr, sysErrStr); - syncNodeErrorLog(pData->pSyncNode, logBuf); - - ASSERT(0); - return -1; - } - pEntry->index = index; - - do { - char eventLog[128]; - snprintf(eventLog, sizeof(eventLog), "write2 index:%" PRId64 ", type:%s, origin type:%s", pEntry->index, - TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType)); - syncNodeEventLog(pData->pSyncNode, eventLog); - } while (0); - - return 0; -} - -SSyncRaftEntry* logStoreGetEntryWithoutLock(SSyncLogStore* pLogStore, SyncIndex index) { - SSyncLogStoreData* pData = pLogStore->data; - SWal* pWal = pData->pWal; - - if (index >= SYNC_INDEX_BEGIN && index <= logStoreLastIndex(pLogStore)) { - // SWalReadHandle* pWalHandle = walOpenReadHandle(pWal); - SWalReader* pWalHandle = pData->pWalHandle; - ASSERT(pWalHandle != NULL); - - int32_t code = walReadVer(pWalHandle, index); - // int32_t code = walReadVerCached(pWalHandle, index); - if (code != 0) { - int32_t err = terrno; - const char* errStr = tstrerror(err); - int32_t sysErr = errno; - const char* sysErrStr = strerror(errno); - - do { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - index, err, err, errStr, sysErr, sysErrStr); - if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - // syncNodeEventLog(pData->pSyncNode, logBuf); - } else { - syncNodeErrorLog(pData->pSyncNode, logBuf); - } - } while (0); - - sError("failed to read ver since %s. index:%" PRId64 "", tstrerror(terrno), index); - return NULL; - } - - SSyncRaftEntry* pEntry = syncEntryBuild(pWalHandle->pHead->head.bodyLen); - ASSERT(pEntry != NULL); - - pEntry->msgType = TDMT_SYNC_CLIENT_REQUEST; - pEntry->originalRpcType = pWalHandle->pHead->head.msgType; - pEntry->seqNum = pWalHandle->pHead->head.syncMeta.seqNum; - pEntry->isWeak = pWalHandle->pHead->head.syncMeta.isWeek; - pEntry->term = pWalHandle->pHead->head.syncMeta.term; - pEntry->index = index; - ASSERT(pEntry->dataLen == pWalHandle->pHead->head.bodyLen); - memcpy(pEntry->data, pWalHandle->pHead->head.body, pWalHandle->pHead->head.bodyLen); - - /* - int32_t saveErr = terrno; - walCloseReadHandle(pWalHandle); - terrno = saveErr; - */ - - return pEntry; - - } else { - return NULL; - } -} - -SSyncRaftEntry* logStoreGetEntry(SSyncLogStore* pLogStore, SyncIndex index) { - SSyncLogStoreData* pData = pLogStore->data; - SSyncRaftEntry *pEntry = NULL; - - taosThreadMutexLock(&pData->mutex); - pEntry = logStoreGetEntryWithoutLock(pLogStore, index); - taosThreadMutexUnlock(&pData->mutex); - return pEntry; -} - -int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex fromIndex) { - SSyncLogStoreData* pData = pLogStore->data; - SWal* pWal = pData->pWal; - // ASSERT(walRollback(pWal, fromIndex) == 0); - int32_t code = walRollback(pWal, fromIndex); - if (code != 0) { - int32_t err = terrno; - const char* errStr = tstrerror(err); - int32_t sysErr = errno; - const char* sysErrStr = strerror(errno); - sError("vgId:%d, wal truncate error, from-index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", - pData->pSyncNode->vgId, fromIndex, err, err, errStr, sysErr, sysErrStr); - - ASSERT(0); - } - - // event log - do { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "wal truncate, from-index:%" PRId64, fromIndex); - syncNodeEventLog(pData->pSyncNode, logBuf); - } while (0); - - return 0; -} - -SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore) { - SSyncLogStoreData* pData = pLogStore->data; - SWal* pWal = pData->pWal; - SyncIndex lastIndex = walGetLastVer(pWal); - return lastIndex; -} - -SyncTerm logStoreLastTerm(SSyncLogStore* pLogStore) { - SyncTerm lastTerm = 0; - SSyncRaftEntry* pLastEntry = logStoreGetLastEntry(pLogStore); - if (pLastEntry != NULL) { - lastTerm = pLastEntry->term; - taosMemoryFree(pLastEntry); - } - return lastTerm; -} - -int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) { +int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) { SSyncLogStoreData* pData = pLogStore->data; SWal* pWal = pData->pWal; // ASSERT(walCommit(pWal, index) == 0); @@ -540,23 +380,11 @@ int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) { return 0; } -SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore) { +SyncIndex raftlogCommitIndex(SSyncLogStore* pLogStore) { SSyncLogStoreData* pData = pLogStore->data; return pData->pSyncNode->commitIndex; } -SSyncRaftEntry* logStoreGetLastEntry(SSyncLogStore* pLogStore) { - SSyncLogStoreData* pData = pLogStore->data; - SWal* pWal = pData->pWal; - SyncIndex lastIndex = walGetLastVer(pWal); - - SSyncRaftEntry* pEntry = NULL; - if (lastIndex > 0) { - pEntry = logStoreGetEntry(pLogStore, lastIndex); - } - return pEntry; -} - cJSON* logStore2Json(SSyncLogStore* pLogStore) { char u64buf[128] = {0}; SSyncLogStoreData* pData = (SSyncLogStoreData*)pLogStore->data; @@ -595,7 +423,9 @@ cJSON* logStore2Json(SSyncLogStore* pLogStore) { if (!raftLogIsEmpty(pLogStore)) { for (SyncIndex i = beginIndex; i <= endIndex; ++i) { - SSyncRaftEntry* pEntry = logStoreGetEntry(pLogStore, i); + SSyncRaftEntry* pEntry = NULL; + raftLogGetEntry(pLogStore, i, &pEntry); + cJSON_AddItemToArray(pEntries, syncEntry2Json(pEntry)); syncEntryDestory(pEntry); } @@ -675,14 +505,14 @@ SyncIndex logStoreWalCommitVer(SSyncLogStore* pLogStore) { // for debug ----------------- void logStorePrint(SSyncLogStore* pLogStore) { char* serialized = logStore2Str(pLogStore); - printf("logStorePrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("logStorePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void logStorePrint2(char* s, SSyncLogStore* pLogStore) { char* serialized = logStore2Str(pLogStore); - printf("logStorePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("logStorePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } @@ -690,7 +520,7 @@ void logStorePrint2(char* s, SSyncLogStore* pLogStore) { void logStoreLog(SSyncLogStore* pLogStore) { if (gRaftDetailLog) { char* serialized = logStore2Str(pLogStore); - sTraceLong("logStoreLog | len:%lu | %s", strlen(serialized), serialized); + sTraceLong("logStoreLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } } @@ -698,7 +528,7 @@ void logStoreLog(SSyncLogStore* pLogStore) { void logStoreLog2(char* s, SSyncLogStore* pLogStore) { if (gRaftDetailLog) { char* serialized = logStore2Str(pLogStore); - sTraceLong("logStoreLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTraceLong("logStoreLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } @@ -706,28 +536,28 @@ void logStoreLog2(char* s, SSyncLogStore* pLogStore) { // for debug ----------------- void logStoreSimplePrint(SSyncLogStore* pLogStore) { char* serialized = logStoreSimple2Str(pLogStore); - printf("logStoreSimplePrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("logStoreSimplePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void logStoreSimplePrint2(char* s, SSyncLogStore* pLogStore) { char* serialized = logStoreSimple2Str(pLogStore); - printf("logStoreSimplePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("logStoreSimplePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void logStoreSimpleLog(SSyncLogStore* pLogStore) { char* serialized = logStoreSimple2Str(pLogStore); - sTrace("logStoreSimpleLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("logStoreSimpleLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void logStoreSimpleLog2(char* s, SSyncLogStore* pLogStore) { if (gRaftDetailLog) { char* serialized = logStoreSimple2Str(pLogStore); - sTrace("logStoreSimpleLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("logStoreSimpleLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } } diff --git a/source/libs/sync/src/syncRaftStore.c b/source/libs/sync/src/syncRaftStore.c index a714a2c403..dcc4e1f133 100644 --- a/source/libs/sync/src/syncRaftStore.c +++ b/source/libs/sync/src/syncRaftStore.c @@ -226,25 +226,25 @@ char *raftStore2Str(SRaftStore *pRaftStore) { // for debug ------------------- void raftStorePrint(SRaftStore *pObj) { char *serialized = raftStore2Str(pObj); - printf("raftStorePrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("raftStorePrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void raftStorePrint2(char *s, SRaftStore *pObj) { char *serialized = raftStore2Str(pObj); - printf("raftStorePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("raftStorePrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void raftStoreLog(SRaftStore *pObj) { char *serialized = raftStore2Str(pObj); - sTrace("raftStoreLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("raftStoreLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void raftStoreLog2(char *s, SRaftStore *pObj) { char *serialized = raftStore2Str(pObj); - sTrace("raftStoreLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("raftStoreLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index 886f7ad199..e040310e15 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -47,138 +47,75 @@ // msource |-> i, // mdest |-> j]) // /\ UNCHANGED <> -// -int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode) { - ASSERT(pSyncNode->state == TAOS_SYNC_STATE_LEADER); - syncIndexMgrLog2("==syncNodeAppendEntriesPeers== pNextIndex", pSyncNode->pNextIndex); - syncIndexMgrLog2("==syncNodeAppendEntriesPeers== pMatchIndex", pSyncNode->pMatchIndex); - logStoreSimpleLog2("==syncNodeAppendEntriesPeers==", pSyncNode->pLogStore); +int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId) { + // next index + SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); - int32_t ret = 0; - for (int i = 0; i < pSyncNode->peersNum; ++i) { - SRaftId* pDestId = &(pSyncNode->peersId[i]); + // maybe start snapshot + SyncIndex logStartIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore); + SyncIndex logEndIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore); + if (nextIndex < logStartIndex || nextIndex - 1 > logEndIndex) { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "start snapshot for next-index:%" PRId64 ", start:%" PRId64 ", end:%" PRId64, + nextIndex, logStartIndex, logEndIndex); + syncNodeEventLog(pSyncNode, logBuf); - // set prevLogIndex - SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); - - SyncIndex preLogIndex = nextIndex - 1; - - // set preLogTerm - SyncTerm preLogTerm = 0; - if (preLogIndex >= SYNC_INDEX_BEGIN) { - SSyncRaftEntry* pPreEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, preLogIndex); - ASSERT(pPreEntry != NULL); - - preLogTerm = pPreEntry->term; - syncEntryDestory(pPreEntry); - } - - // batch optimized - // SyncIndex lastIndex = syncUtilMinIndex(pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore), nextIndex); - - SyncAppendEntries* pMsg = NULL; - SSyncRaftEntry* pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, nextIndex); - if (pEntry != NULL) { - pMsg = syncAppendEntriesBuild(pEntry->bytes, pSyncNode->vgId); - ASSERT(pMsg != NULL); - - // add pEntry into msg - uint32_t len; - char* serialized = syncEntrySerialize(pEntry, &len); - ASSERT(len == pEntry->bytes); - memcpy(pMsg->data, serialized, len); - - taosMemoryFree(serialized); - syncEntryDestory(pEntry); - - } else { - // maybe overflow, send empty record - pMsg = syncAppendEntriesBuild(0, pSyncNode->vgId); - ASSERT(pMsg != NULL); - } - - ASSERT(pMsg != NULL); - pMsg->srcId = pSyncNode->myRaftId; - pMsg->destId = *pDestId; - pMsg->term = pSyncNode->pRaftStore->currentTerm; - pMsg->prevLogIndex = preLogIndex; - pMsg->prevLogTerm = preLogTerm; - pMsg->commitIndex = pSyncNode->commitIndex; - - syncAppendEntriesLog2("==syncNodeAppendEntriesPeers==", pMsg); - - // send AppendEntries - syncNodeAppendEntries(pSyncNode, pDestId, pMsg); - syncAppendEntriesDestroy(pMsg); + // start snapshot + int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId); + ASSERT(code == 0); + return 0; } - return ret; -} - -int32_t syncNodeAppendEntriesOnePeer(SSyncNode* pSyncNode, SRaftId* pDestId, SyncIndex nextIndex) { - int32_t ret = 0; - // pre index, pre term SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex); SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex); - if (preLogTerm == SYNC_TERM_INVALID) { - SyncIndex newNextIndex = syncNodeGetLastIndex(pSyncNode) + 1; - // SyncIndex newNextIndex = nextIndex + 1; - syncIndexMgrSetIndex(pSyncNode->pNextIndex, pDestId, newNextIndex); - syncIndexMgrSetIndex(pSyncNode->pMatchIndex, pDestId, SYNC_INDEX_INVALID); - sError("vgId:%d, sync get pre term error, nextIndex:%" PRId64 ", update next-index:%" PRId64 - ", match-index:%d, raftid:%" PRId64, - pSyncNode->vgId, nextIndex, newNextIndex, SYNC_INDEX_INVALID, pDestId->addr); - return -1; - } + // prepare entry + SyncAppendEntries* pMsg = NULL; - // entry pointer array - SSyncRaftEntry* entryPArr[SYNC_MAX_BATCH_SIZE]; - memset(entryPArr, 0, sizeof(entryPArr)); + SSyncRaftEntry* pEntry; + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry); - // get entry batch - int32_t getCount = 0; - SyncIndex getEntryIndex = nextIndex; - for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) { - SSyncRaftEntry* pEntry = NULL; - int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, getEntryIndex, &pEntry); - if (code == 0) { - ASSERT(pEntry != NULL); - entryPArr[i] = pEntry; - getCount++; - getEntryIndex++; + if (code == 0) { + ASSERT(pEntry != NULL); + + pMsg = syncAppendEntriesBuild(pEntry->bytes, pSyncNode->vgId); + ASSERT(pMsg != NULL); + + // add pEntry into msg + uint32_t len; + char* serialized = syncEntrySerialize(pEntry, &len); + ASSERT(len == pEntry->bytes); + memcpy(pMsg->data, serialized, len); + + taosMemoryFree(serialized); + syncEntryDestory(pEntry); + + } else { + if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { + // no entry in log + pMsg = syncAppendEntriesBuild(0, pSyncNode->vgId); + ASSERT(pMsg != NULL); } else { - break; - } - } + do { + char host[64]; + uint16_t port; + syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port); - // event log - do { - char logBuf[128]; - char host[64]; - uint16_t port; - syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port); - snprintf(logBuf, sizeof(logBuf), "build batch:%d for %s:%d", getCount, host, port); - syncNodeEventLog(pSyncNode, logBuf); - } while (0); + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "replicate to %s:%d error, next-index:%" PRId64, host, port, nextIndex); + syncNodeErrorLog(pSyncNode, logBuf); + } while (0); - // build msg - SyncAppendEntriesBatch* pMsg = syncAppendEntriesBatchBuild(entryPArr, getCount, pSyncNode->vgId); - ASSERT(pMsg != NULL); - - // free entries - for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) { - SSyncRaftEntry* pEntry = entryPArr[i]; - if (pEntry != NULL) { - syncEntryDestory(pEntry); - entryPArr[i] = NULL; + syncAppendEntriesDestroy(pMsg); + return -1; } } // prepare msg + ASSERT(pMsg != NULL); pMsg->srcId = pSyncNode->myRaftId; pMsg->destId = *pDestId; pMsg->term = pSyncNode->pRaftStore->currentTerm; @@ -186,293 +123,69 @@ int32_t syncNodeAppendEntriesOnePeer(SSyncNode* pSyncNode, SRaftId* pDestId, Syn pMsg->prevLogTerm = preLogTerm; pMsg->commitIndex = pSyncNode->commitIndex; pMsg->privateTerm = 0; - pMsg->dataCount = getCount; + // pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId); // send msg - syncNodeAppendEntriesBatch(pSyncNode, pDestId, pMsg); + syncNodeMaybeSendAppendEntries(pSyncNode, pDestId, pMsg); + syncAppendEntriesDestroy(pMsg); - // speed up - if (pMsg->dataCount > 0 && pSyncNode->commitIndex - pMsg->prevLogIndex > SYNC_SLOW_DOWN_RANGE) { - ret = 1; - -#if 0 - do { - char logBuf[128]; - char host[64]; - uint16_t port; - syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port); - snprintf(logBuf, sizeof(logBuf), "maybe speed up for %s:%d, pre-index:%ld", host, port, pMsg->prevLogIndex); - syncNodeEventLog(pSyncNode, logBuf); - } while (0); -#endif - } - - syncAppendEntriesBatchDestroy(pMsg); - - return ret; + return 0; } -int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode) { +int32_t syncNodeReplicate(SSyncNode* pSyncNode) { if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { return -1; } - int32_t ret = 0; - for (int i = 0; i < pSyncNode->peersNum; ++i) { - SRaftId* pDestId = &(pSyncNode->peersId[i]); - - // next index - SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); - ret = syncNodeAppendEntriesOnePeer(pSyncNode, pDestId, nextIndex); - } - - return ret; -} - -#if 0 -int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode) { - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - return -1; - } + syncNodeEventLog(pSyncNode, "do replicate"); int32_t ret = 0; for (int i = 0; i < pSyncNode->peersNum; ++i) { SRaftId* pDestId = &(pSyncNode->peersId[i]); - - // next index - SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); - - // pre index, pre term - SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex); - SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex); - if (preLogTerm == SYNC_TERM_INVALID) { - SyncIndex newNextIndex = syncNodeGetLastIndex(pSyncNode) + 1; - // SyncIndex newNextIndex = nextIndex + 1; - - syncIndexMgrSetIndex(pSyncNode->pNextIndex, pDestId, newNextIndex); - syncIndexMgrSetIndex(pSyncNode->pMatchIndex, pDestId, SYNC_INDEX_INVALID); - sError("vgId:%d, sync get pre term error, nextIndex:%" PRId64 ", update next-index:%" PRId64 - ", match-index:%d, raftid:%" PRId64, - pSyncNode->vgId, nextIndex, newNextIndex, SYNC_INDEX_INVALID, pDestId->addr); - return -1; - } - - // entry pointer array - SSyncRaftEntry* entryPArr[SYNC_MAX_BATCH_SIZE]; - memset(entryPArr, 0, sizeof(entryPArr)); - - // get entry batch - int32_t getCount = 0; - SyncIndex getEntryIndex = nextIndex; - for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) { - SSyncRaftEntry* pEntry = NULL; - int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, getEntryIndex, &pEntry); - if (code == 0) { - ASSERT(pEntry != NULL); - entryPArr[i] = pEntry; - getCount++; - getEntryIndex++; - - } else { - break; - } - } - - // event log - do { - char logBuf[128]; - char host[64]; - uint16_t port; + ret = syncNodeReplicateOne(pSyncNode, pDestId); + if (ret != 0) { + char host[64]; + int16_t port; syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port); - snprintf(logBuf, sizeof(logBuf), "build batch:%d for %s:%d", getCount, host, port); - syncNodeEventLog(pSyncNode, logBuf); - } while (0); - - // build msg - SyncAppendEntriesBatch* pMsg = syncAppendEntriesBatchBuild(entryPArr, getCount, pSyncNode->vgId); - ASSERT(pMsg != NULL); - - // free entries - for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) { - SSyncRaftEntry* pEntry = entryPArr[i]; - if (pEntry != NULL) { - syncEntryDestory(pEntry); - entryPArr[i] = NULL; - } + sError("vgId:%d, do append entries error for %s:%d", pSyncNode->vgId, host, port); } - - // prepare msg - pMsg->srcId = pSyncNode->myRaftId; - pMsg->destId = *pDestId; - pMsg->term = pSyncNode->pRaftStore->currentTerm; - pMsg->prevLogIndex = preLogIndex; - pMsg->prevLogTerm = preLogTerm; - pMsg->commitIndex = pSyncNode->commitIndex; - pMsg->privateTerm = 0; - pMsg->dataCount = getCount; - - // send msg - syncNodeAppendEntriesBatch(pSyncNode, pDestId, pMsg); - - // speed up - if (pMsg->dataCount > 0 && pSyncNode->commitIndex - pMsg->prevLogIndex > SYNC_SLOW_DOWN_RANGE) { - ret = 1; - -#if 0 - do { - char logBuf[128]; - char host[64]; - uint16_t port; - syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port); - snprintf(logBuf, sizeof(logBuf), "maybe speed up for %s:%d, pre-index:%ld", host, port, pMsg->prevLogIndex); - syncNodeEventLog(pSyncNode, logBuf); - } while (0); -#endif - } - - syncAppendEntriesBatchDestroy(pMsg); } - return ret; + return 0; } -#endif - -int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode) { - ASSERT(pSyncNode->state == TAOS_SYNC_STATE_LEADER); - - syncIndexMgrLog2("begin append entries peers pNextIndex:", pSyncNode->pNextIndex); - syncIndexMgrLog2("begin append entries peers pMatchIndex:", pSyncNode->pMatchIndex); - logStoreSimpleLog2("begin append entries peers LogStore:", pSyncNode->pLogStore); +int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg) { int32_t ret = 0; - for (int i = 0; i < pSyncNode->peersNum; ++i) { - SRaftId* pDestId = &(pSyncNode->peersId[i]); + syncLogSendAppendEntries(pSyncNode, pMsg, ""); - // next index - SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); + SRpcMsg rpcMsg; + syncAppendEntries2RpcMsg(pMsg, &rpcMsg); + syncNodeSendMsgById(destRaftId, pSyncNode, &rpcMsg); - // pre index, pre term - SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex); - SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex); - if (preLogTerm == SYNC_TERM_INVALID) { - SyncIndex newNextIndex = syncNodeGetLastIndex(pSyncNode) + 1; - // SyncIndex newNextIndex = nextIndex + 1; + SPeerState* pState = syncNodeGetPeerState(pSyncNode, destRaftId); + ASSERT(pState != NULL); - syncIndexMgrSetIndex(pSyncNode->pNextIndex, pDestId, newNextIndex); - syncIndexMgrSetIndex(pSyncNode->pMatchIndex, pDestId, SYNC_INDEX_INVALID); - sError("vgId:%d, sync get pre term error, nextIndex:%" PRId64 ", update next-index:%" PRId64 - ", match-index:%d, raftid:%" PRId64, - pSyncNode->vgId, nextIndex, newNextIndex, SYNC_INDEX_INVALID, pDestId->addr); - - return -1; - } - - // prepare entry - SyncAppendEntries* pMsg = NULL; - - SSyncRaftEntry* pEntry; - int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry); - - if (code == 0) { - ASSERT(pEntry != NULL); - - pMsg = syncAppendEntriesBuild(pEntry->bytes, pSyncNode->vgId); - ASSERT(pMsg != NULL); - - // add pEntry into msg - uint32_t len; - char* serialized = syncEntrySerialize(pEntry, &len); - ASSERT(len == pEntry->bytes); - memcpy(pMsg->data, serialized, len); - - taosMemoryFree(serialized); - syncEntryDestory(pEntry); - - } else { - if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - // no entry in log - pMsg = syncAppendEntriesBuild(0, pSyncNode->vgId); - ASSERT(pMsg != NULL); - - } else { - syncNodeLog3("", pSyncNode); - ASSERT(0); - } - } - - // prepare msg - ASSERT(pMsg != NULL); - pMsg->srcId = pSyncNode->myRaftId; - pMsg->destId = *pDestId; - pMsg->term = pSyncNode->pRaftStore->currentTerm; - pMsg->prevLogIndex = preLogIndex; - pMsg->prevLogTerm = preLogTerm; - pMsg->commitIndex = pSyncNode->commitIndex; - pMsg->privateTerm = 0; - // pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId); - - // send msg - syncNodeAppendEntries(pSyncNode, pDestId, pMsg); - syncAppendEntriesDestroy(pMsg); + if (pMsg->dataLen > 0) { + pState->lastSendIndex = pMsg->prevLogIndex + 1; + pState->lastSendTime = taosGetTimestampMs(); } return ret; } -int32_t syncNodeReplicate(SSyncNode* pSyncNode, bool isTimer) { - // start replicate +int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg) { int32_t ret = 0; - - switch (pSyncNode->pRaftCfg->snapshotStrategy) { - case SYNC_STRATEGY_NO_SNAPSHOT: - ret = syncNodeAppendEntriesPeers(pSyncNode); - break; - - case SYNC_STRATEGY_STANDARD_SNAPSHOT: - ret = syncNodeAppendEntriesPeersSnapshot(pSyncNode); - break; - - case SYNC_STRATEGY_WAL_FIRST: - ret = syncNodeAppendEntriesPeersSnapshot2(pSyncNode); - break; - - default: - ret = syncNodeAppendEntriesPeers(pSyncNode); - break; - } - - // start delay - int64_t timeNow = taosGetTimestampMs(); - int64_t startDelay = timeNow - pSyncNode->startTime; - - // replicate delay - int64_t replicateDelay = timeNow - pSyncNode->lastReplicateTime; - pSyncNode->lastReplicateTime = timeNow; - - if (ret > 0 && isTimer && startDelay > SYNC_SPEED_UP_AFTER_MS) { - // speed up replicate - int32_t ms = - pSyncNode->heartbeatTimerMS < SYNC_SPEED_UP_HB_TIMER ? pSyncNode->heartbeatTimerMS : SYNC_SPEED_UP_HB_TIMER; - syncNodeRestartNowHeartbeatTimerMS(pSyncNode, ms); - -#if 0 - do { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "replicate speed up"); - syncNodeEventLog(pSyncNode, logBuf); - } while (0); -#endif + if (syncNodeNeedSendAppendEntries(pSyncNode, destRaftId, pMsg)) { + ret = syncNodeSendAppendEntries(pSyncNode, destRaftId, pMsg); } else { - syncNodeRestartHeartbeatTimer(pSyncNode); + char logBuf[128]; + char host[64]; + int16_t port; + syncUtilU642Addr(destRaftId->addr, host, sizeof(host), &port); -#if 0 - do { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "replicate slow down"); - syncNodeEventLog(pSyncNode, logBuf); - } while (0); -#endif + snprintf(logBuf, sizeof(logBuf), "do not repcate to %s:%d for index:%" PRId64, host, port, pMsg->prevLogIndex + 1); + syncNodeEventLog(pSyncNode, logBuf); } return ret; @@ -488,12 +201,34 @@ int32_t syncNodeAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, c return ret; } -int32_t syncNodeAppendEntriesBatch(SSyncNode* pSyncNode, const SRaftId* destRaftId, - const SyncAppendEntriesBatch* pMsg) { - syncLogSendAppendEntriesBatch(pSyncNode, pMsg, ""); +int32_t syncNodeSendHeartbeat(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncHeartbeat* pMsg) { + int32_t ret = 0; + syncLogSendHeartbeat(pSyncNode, pMsg, ""); SRpcMsg rpcMsg; - syncAppendEntriesBatch2RpcMsg(pMsg, &rpcMsg); - syncNodeSendMsgById(destRaftId, pSyncNode, &rpcMsg); + syncHeartbeat2RpcMsg(pMsg, &rpcMsg); + syncNodeSendMsgById(&(pMsg->destId), pSyncNode, &rpcMsg); + return ret; +} + +int32_t syncNodeHeartbeatPeers(SSyncNode* pSyncNode) { + for (int32_t i = 0; i < pSyncNode->peersNum; ++i) { + SyncHeartbeat* pSyncMsg = syncHeartbeatBuild(pSyncNode->vgId); + pSyncMsg->srcId = pSyncNode->myRaftId; + pSyncMsg->destId = pSyncNode->peersId[i]; + pSyncMsg->term = pSyncNode->pRaftStore->currentTerm; + pSyncMsg->commitIndex = pSyncNode->commitIndex; + pSyncMsg->minMatchIndex = syncMinMatchIndex(pSyncNode); + pSyncMsg->privateTerm = 0; + + SRpcMsg rpcMsg; + syncHeartbeat2RpcMsg(pSyncMsg, &rpcMsg); + + // send msg + syncNodeSendHeartbeat(pSyncNode, &(pSyncMsg->destId), pSyncMsg); + + syncHeartbeatDestroy(pSyncMsg); + } + return 0; } \ No newline at end of file diff --git a/source/libs/sync/src/syncRequestVote.c b/source/libs/sync/src/syncRequestVote.c index 122a81930b..074e4fca64 100644 --- a/source/libs/sync/src/syncRequestVote.c +++ b/source/libs/sync/src/syncRequestVote.c @@ -42,65 +42,6 @@ // m) // /\ UNCHANGED <> // -int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg) { - int32_t ret = 0; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - syncLogRecvRequestVote(ths, pMsg, "maybe replica already dropped"); - return -1; - } - - bool logOK = (pMsg->lastLogTerm > ths->pLogStore->getLastTerm(ths->pLogStore)) || - ((pMsg->lastLogTerm == ths->pLogStore->getLastTerm(ths->pLogStore)) && - (pMsg->lastLogIndex >= ths->pLogStore->getLastIndex(ths->pLogStore))); - - // maybe update term - if (pMsg->term > ths->pRaftStore->currentTerm) { - syncNodeUpdateTerm(ths, pMsg->term); -#if 0 - if (logOK) { - syncNodeUpdateTerm(ths, pMsg->term); - } else { - syncNodeUpdateTermWithoutStepDown(ths, pMsg->term); - } -#endif - } - ASSERT(pMsg->term <= ths->pRaftStore->currentTerm); - - bool grant = (pMsg->term == ths->pRaftStore->currentTerm) && logOK && - ((!raftStoreHasVoted(ths->pRaftStore)) || (syncUtilSameId(&(ths->pRaftStore->voteFor), &(pMsg->srcId)))); - if (grant) { - // maybe has already voted for pMsg->srcId - // vote again, no harm - raftStoreVote(ths->pRaftStore, &(pMsg->srcId)); - - // forbid elect for this round - syncNodeResetElectTimer(ths); - } - - // send msg - SyncRequestVoteReply* pReply = syncRequestVoteReplyBuild(ths->vgId); - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->pRaftStore->currentTerm; - pReply->voteGranted = grant; - - // trace log - do { - char logBuf[32]; - snprintf(logBuf, sizeof(logBuf), "grant:%d", pReply->voteGranted); - syncLogRecvRequestVote(ths, pMsg, logBuf); - syncLogSendRequestVoteReply(ths, pReply, ""); - } while (0); - - SRpcMsg rpcMsg; - syncRequestVoteReply2RpcMsg(pReply, &rpcMsg); - syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); - syncRequestVoteReplyDestroy(pReply); - - return ret; -} static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pMsg) { SyncTerm myLastTerm = syncNodeGetLastTerm(pSyncNode); @@ -157,12 +98,12 @@ static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pM return false; } -int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) { +int32_t syncNodeOnRequestVote(SSyncNode* ths, SyncRequestVote* pMsg) { int32_t ret = 0; // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - syncLogRecvRequestVote(ths, pMsg, "maybe replica already dropped"); + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { + syncLogRecvRequestVote(ths, pMsg, "not in my config"); return -1; } @@ -170,14 +111,8 @@ int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) { // maybe update term if (pMsg->term > ths->pRaftStore->currentTerm) { - syncNodeUpdateTerm(ths, pMsg->term); -#if 0 - if (logOK) { - syncNodeUpdateTerm(ths, pMsg->term); - } else { - syncNodeUpdateTermWithoutStepDown(ths, pMsg->term); - } -#endif + syncNodeStepDown(ths, pMsg->term); + // syncNodeUpdateTerm(ths, pMsg->term); } ASSERT(pMsg->term <= ths->pRaftStore->currentTerm); @@ -188,6 +123,9 @@ int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) { // vote again, no harm raftStoreVote(ths->pRaftStore, &(pMsg->srcId)); + // candidate ? + syncNodeStepDown(ths, ths->pRaftStore->currentTerm); + // forbid elect for this round syncNodeResetElectTimer(ths); } diff --git a/source/libs/sync/src/syncRequestVoteReply.c b/source/libs/sync/src/syncRequestVoteReply.c index ff91315de7..a9c3256258 100644 --- a/source/libs/sync/src/syncRequestVoteReply.c +++ b/source/libs/sync/src/syncRequestVoteReply.c @@ -37,68 +37,12 @@ // /\ Discard(m) // /\ UNCHANGED <> // -int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) { +int32_t syncNodeOnRequestVoteReply(SSyncNode* ths, SyncRequestVoteReply* pMsg) { int32_t ret = 0; // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - syncLogRecvRequestVoteReply(ths, pMsg, "maybe replica already dropped"); - return -1; - } - - // drop stale response - if (pMsg->term < ths->pRaftStore->currentTerm) { - syncLogRecvRequestVoteReply(ths, pMsg, "drop stale response"); - return -1; - } - - // ASSERT(!(pMsg->term > ths->pRaftStore->currentTerm)); - // no need this code, because if I receive reply.term, then I must have sent for that term. - // if (pMsg->term > ths->pRaftStore->currentTerm) { - // syncNodeUpdateTerm(ths, pMsg->term); - // } - - if (pMsg->term > ths->pRaftStore->currentTerm) { - syncLogRecvRequestVoteReply(ths, pMsg, "error term"); - return -1; - } - - syncLogRecvRequestVoteReply(ths, pMsg, ""); - ASSERT(pMsg->term == ths->pRaftStore->currentTerm); - - // This tallies votes even when the current state is not Candidate, - // but they won't be looked at, so it doesn't matter. - if (ths->state == TAOS_SYNC_STATE_CANDIDATE) { - votesRespondAdd(ths->pVotesRespond, pMsg); - if (pMsg->voteGranted) { - // add vote - voteGrantedVote(ths->pVotesGranted, pMsg); - - // maybe to leader - if (voteGrantedMajority(ths->pVotesGranted)) { - if (!ths->pVotesGranted->toLeader) { - syncNodeCandidate2Leader(ths); - - // prevent to leader again! - ths->pVotesGranted->toLeader = true; - } - } - } else { - ; - // do nothing - // UNCHANGED <> - } - } - - return 0; -} - -int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) { - int32_t ret = 0; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { - syncLogRecvRequestVoteReply(ths, pMsg, "maybe replica already dropped"); + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { + syncLogRecvRequestVoteReply(ths, pMsg, "not in my config"); return -1; } @@ -116,6 +60,7 @@ int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteRepl if (pMsg->term > ths->pRaftStore->currentTerm) { syncLogRecvRequestVoteReply(ths, pMsg, "error term"); + syncNodeStepDown(ths, pMsg->term); return -1; } diff --git a/source/libs/sync/src/syncRespMgr.c b/source/libs/sync/src/syncRespMgr.c index 30b29d335c..88af5746d4 100644 --- a/source/libs/sync/src/syncRespMgr.c +++ b/source/libs/sync/src/syncRespMgr.c @@ -136,7 +136,7 @@ void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) { while (pStub) { size_t len; - void *key = taosHashGetKey(pStub, &len); + void * key = taosHashGetKey(pStub, &len); uint64_t *pSeqNum = (uint64_t *)key; sum++; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 6167d41141..a7bafa9f28 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -41,6 +41,8 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI } memset(pSender, 0, sizeof(*pSender)); + int64_t timeNow = taosGetTimestampMs(); + pSender->start = false; pSender->seq = SYNC_SNAPSHOT_SEQ_INVALID; pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; @@ -51,7 +53,8 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI pSender->pSyncNode = pSyncNode; pSender->replicaIndex = replicaIndex; pSender->term = pSyncNode->pRaftStore->currentTerm; - pSender->privateTerm = taosGetTimestampMs() + 100; + pSender->privateTerm = timeNow + 100; + pSender->startTime = timeNow; pSender->pSyncNode->pFsm->FpGetSnapshotInfo(pSender->pSyncNode->pFsm, &(pSender->snapshot)); pSender->finish = false; } else { @@ -402,6 +405,24 @@ char *snapshotSender2SimpleStr(SSyncSnapshotSender *pSender, char *event) { return s; } +int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { + // calculate index + + syncNodeEventLog(pSyncNode, "start snapshot ..."); + + SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, pDestId); + if (pSender == NULL) { + // create sender + } else { + // if is same + // return 0; + } + + // send begin msg + + return 0; +} + // ------------------------------------- SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId fromId) { bool condition = (pSyncNode->pFsm->FpSnapshotStartWrite != NULL) && (pSyncNode->pFsm->FpSnapshotStopWrite != NULL) && @@ -721,7 +742,7 @@ char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event) // condition 3, recv SYNC_SNAPSHOT_SEQ_FORCE_CLOSE, force close // condition 4, got data, update ack // -int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { +int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { // get receiver SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; bool needRsp = false; @@ -834,7 +855,7 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { // condition 2 sender receives ack, set seq = ack + 1, send msg from seq // condition 3 sender receives error msg, just print error log // -int32_t syncNodeOnSnapshotRspCb(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) { +int32_t syncNodeOnSnapshotReply(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) { // if already drop replica, do not process if (!syncNodeInRaftGroup(pSyncNode, &(pMsg->srcId)) && pSyncNode->state == TAOS_SYNC_STATE_LEADER) { sError("vgId:%d, recv sync-snapshot-rsp, maybe replica already dropped", pSyncNode->vgId); diff --git a/source/libs/sync/src/syncTimeout.c b/source/libs/sync/src/syncTimeout.c index fd7443222f..17c8c14136 100644 --- a/source/libs/sync/src/syncTimeout.c +++ b/source/libs/sync/src/syncTimeout.c @@ -16,6 +16,7 @@ #include "syncTimeout.h" #include "syncElection.h" #include "syncRaftCfg.h" +#include "syncRaftLog.h" #include "syncReplication.h" #include "syncRespMgr.h" @@ -60,12 +61,36 @@ static void syncNodeCleanConfigIndex(SSyncNode* ths) { int32_t syncNodeTimerRoutine(SSyncNode* ths) { syncNodeEventLog(ths, "timer routines"); - if (ths->vgId == 1) { + // timer replicate + syncNodeReplicate(ths); + + // clean mnode index + if (syncNodeIsMnode(ths)) { syncNodeCleanConfigIndex(ths); } + // end timeout wal snapshot + int64_t timeNow = taosGetTimestampMs(); + if (timeNow - ths->snapshottingIndex > SYNC_DEL_WAL_MS && + atomic_load_64(&ths->snapshottingIndex) != SYNC_INDEX_INVALID) { + SSyncLogStoreData* pData = ths->pLogStore->data; + int32_t code = walEndSnapshot(pData->pWal); + if (code != 0) { + sError("vgId:%d, wal snapshot end error since:%s", ths->vgId, terrstr(terrno)); + return -1; + } else { + do { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), "wal snapshot end, index:%" PRId64, atomic_load_64(&ths->snapshottingIndex)); + syncNodeEventLog(ths, logBuf); + } while (0); + + atomic_store_64(&ths->snapshottingIndex, SYNC_INDEX_INVALID); + } + } + #if 0 - if (ths->vgId != 1) { + if (!syncNodeIsMnode(ths)) { syncRespClean(ths->pSyncRespMgr); } #endif @@ -73,9 +98,9 @@ int32_t syncNodeTimerRoutine(SSyncNode* ths) { return 0; } -int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg) { +int32_t syncNodeOnTimer(SSyncNode* ths, SyncTimeout* pMsg) { int32_t ret = 0; - syncTimeoutLog2("==syncNodeOnTimeoutCb==", pMsg); + syncLogRecvTimer(ths, pMsg, ""); if (pMsg->timeoutType == SYNC_TIMEOUT_PING) { if (atomic_load_64(&ths->pingTimerLogicClockUser) <= pMsg->logicClock) { @@ -84,28 +109,30 @@ int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg) { // syncNodePingAll(ths); // syncNodePingPeers(ths); - // sTrace("vgId:%d, sync timeout, type:ping count:%d", ths->vgId, ths->pingTimerCounter); syncNodeTimerRoutine(ths); } } else if (pMsg->timeoutType == SYNC_TIMEOUT_ELECTION) { if (atomic_load_64(&ths->electTimerLogicClockUser) <= pMsg->logicClock) { ++(ths->electTimerCounter); - sTrace("vgId:%d, sync timer, type:election count:%" PRId64 ", electTimerLogicClockUser:%" PRId64 "", ths->vgId, + sTrace("vgId:%d, sync timer, type:election count:%" PRIu64 ", lc-user:%" PRIu64, ths->vgId, ths->electTimerCounter, ths->electTimerLogicClockUser); + syncNodeElect(ths); } } else if (pMsg->timeoutType == SYNC_TIMEOUT_HEARTBEAT) { if (atomic_load_64(&ths->heartbeatTimerLogicClockUser) <= pMsg->logicClock) { ++(ths->heartbeatTimerCounter); - sTrace("vgId:%d, sync timer, type:replicate count:%" PRId64 ", heartbeatTimerLogicClockUser:%" PRId64 "", - ths->vgId, ths->heartbeatTimerCounter, ths->heartbeatTimerLogicClockUser); - syncNodeReplicate(ths, true); + sTrace("vgId:%d, sync timer, type:replicate count:%" PRIu64 ", lc-user:%" PRIu64, ths->vgId, + ths->heartbeatTimerCounter, ths->heartbeatTimerLogicClockUser); + + // syncNodeReplicate(ths, true); } + } else { - sError("vgId:%d, unknown timeout-type:%d", ths->vgId, pMsg->timeoutType); + sError("vgId:%d, recv unknown timer-type:%d", ths->vgId, pMsg->timeoutType); } return ret; -} +} \ No newline at end of file diff --git a/source/libs/sync/src/syncVoteMgr.c b/source/libs/sync/src/syncVoteMgr.c index e10041d976..39d62b957a 100644 --- a/source/libs/sync/src/syncVoteMgr.c +++ b/source/libs/sync/src/syncVoteMgr.c @@ -138,27 +138,27 @@ char *voteGranted2Str(SVotesGranted *pVotesGranted) { // for debug ------------------- void voteGrantedPrint(SVotesGranted *pObj) { char *serialized = voteGranted2Str(pObj); - printf("voteGrantedPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("voteGrantedPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void voteGrantedPrint2(char *s, SVotesGranted *pObj) { char *serialized = voteGranted2Str(pObj); - printf("voteGrantedPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("voteGrantedPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void voteGrantedLog(SVotesGranted *pObj) { char *serialized = voteGranted2Str(pObj); - sTrace("voteGrantedLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("voteGrantedLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void voteGrantedLog2(char *s, SVotesGranted *pObj) { char *serialized = voteGranted2Str(pObj); - sTrace("voteGrantedLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("voteGrantedLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } @@ -267,26 +267,26 @@ char *votesRespond2Str(SVotesRespond *pVotesRespond) { // for debug ------------------- void votesRespondPrint(SVotesRespond *pObj) { char *serialized = votesRespond2Str(pObj); - printf("votesRespondPrint | len:%lu | %s \n", strlen(serialized), serialized); + printf("votesRespondPrint | len:%d | %s \n", (int32_t)strlen(serialized), serialized); fflush(NULL); taosMemoryFree(serialized); } void votesRespondPrint2(char *s, SVotesRespond *pObj) { char *serialized = votesRespond2Str(pObj); - printf("votesRespondPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + printf("votesRespondPrint2 | len:%d | %s | %s \n", (int32_t)strlen(serialized), s, serialized); fflush(NULL); taosMemoryFree(serialized); } void votesRespondLog(SVotesRespond *pObj) { char *serialized = votesRespond2Str(pObj); - sTrace("votesRespondLog | len:%lu | %s", strlen(serialized), serialized); + sTrace("votesRespondLog | len:%d | %s", (int32_t)strlen(serialized), serialized); taosMemoryFree(serialized); } void votesRespondLog2(char *s, SVotesRespond *pObj) { char *serialized = votesRespond2Str(pObj); - sTrace("votesRespondLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + sTrace("votesRespondLog2 | len:%d | %s | %s", (int32_t)strlen(serialized), s, serialized); taosMemoryFree(serialized); } diff --git a/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp b/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp index c523fbc1c3..95677e592b 100644 --- a/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp +++ b/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp @@ -237,8 +237,8 @@ int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal* gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries; gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply; - gSyncIO->FpOnSyncSnapshotSend = pSyncNode->FpOnSnapshotSend; - gSyncIO->FpOnSyncSnapshotRsp = pSyncNode->FpOnSnapshotRsp; + gSyncIO->FpOnSyncSnapshot = pSyncNode->FpOnSnapshot; + gSyncIO->FpOnSyncSnapshotReply = pSyncNode->FpOnSnapshotReply; gSyncIO->pSyncNode = pSyncNode; syncNodeRelease(pSyncNode); diff --git a/source/libs/sync/test/syncEncodeTest.cpp b/source/libs/sync/test/syncEncodeTest.cpp index 4016f07442..8b209c4c9e 100644 --- a/source/libs/sync/test/syncEncodeTest.cpp +++ b/source/libs/sync/test/syncEncodeTest.cpp @@ -181,8 +181,11 @@ int main(int argc, char **argv) { SSyncNode *pSyncNode = syncNodeInit(); assert(pSyncNode != NULL); SSyncRaftEntry *pEntry = pMsg4; - pSyncNode->pLogStore->appendEntry(pSyncNode->pLogStore, pEntry); - SSyncRaftEntry *pEntry2 = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, pEntry->index); + pSyncNode->pLogStore->syncLogAppendEntry(pSyncNode->pLogStore, pEntry); + + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, pEntry->index, &pEntry); + ASSERT(code == 0); + syncEntryLog2((char *)"==pEntry2==", pEntry2); // step5 diff --git a/source/libs/sync/test/syncHeartbeatReplyTest.cpp b/source/libs/sync/test/syncHeartbeatReplyTest.cpp index f5519fe6d4..1fac03652b 100644 --- a/source/libs/sync/test/syncHeartbeatReplyTest.cpp +++ b/source/libs/sync/test/syncHeartbeatReplyTest.cpp @@ -36,7 +36,7 @@ void test1() { void test2() { SyncHeartbeatReply *pMsg = createMsg(); uint32_t len = pMsg->bytes; - char *serialized = (char *)taosMemoryMalloc(len); + char * serialized = (char *)taosMemoryMalloc(len); syncHeartbeatReplySerialize(pMsg, serialized, len); SyncHeartbeatReply *pMsg2 = syncHeartbeatReplyBuild(1000); syncHeartbeatReplyDeserialize(serialized, len, pMsg2); @@ -50,7 +50,7 @@ void test2() { void test3() { SyncHeartbeatReply *pMsg = createMsg(); uint32_t len; - char *serialized = syncHeartbeatReplySerialize2(pMsg, &len); + char * serialized = syncHeartbeatReplySerialize2(pMsg, &len); SyncHeartbeatReply *pMsg2 = syncHeartbeatReplyDeserialize2(serialized, len); syncHeartbeatReplyLog2((char *)"test3: syncHeartbeatReplySerialize3 -> syncHeartbeatReplyDeserialize2 ", pMsg2); diff --git a/source/libs/sync/test/syncHeartbeatTest.cpp b/source/libs/sync/test/syncHeartbeatTest.cpp index 577af33b9c..b0c0554355 100644 --- a/source/libs/sync/test/syncHeartbeatTest.cpp +++ b/source/libs/sync/test/syncHeartbeatTest.cpp @@ -35,7 +35,7 @@ void test1() { void test2() { SyncHeartbeat *pMsg = createMsg(); uint32_t len = pMsg->bytes; - char *serialized = (char *)taosMemoryMalloc(len); + char * serialized = (char *)taosMemoryMalloc(len); syncHeartbeatSerialize(pMsg, serialized, len); SyncHeartbeat *pMsg2 = syncHeartbeatBuild(789); syncHeartbeatDeserialize(serialized, len, pMsg2); @@ -49,7 +49,7 @@ void test2() { void test3() { SyncHeartbeat *pMsg = createMsg(); uint32_t len; - char *serialized = syncHeartbeatSerialize2(pMsg, &len); + char * serialized = syncHeartbeatSerialize2(pMsg, &len); SyncHeartbeat *pMsg2 = syncHeartbeatDeserialize2(serialized, len); syncHeartbeatLog2((char *)"test3: syncHeartbeatSerialize2 -> syncHeartbeatDeserialize2 ", pMsg2); diff --git a/source/libs/sync/test/syncLogStoreTest.cpp b/source/libs/sync/test/syncLogStoreTest.cpp index 9cb8194aa7..9ff0ed2089 100644 --- a/source/libs/sync/test/syncLogStoreTest.cpp +++ b/source/libs/sync/test/syncLogStoreTest.cpp @@ -52,7 +52,7 @@ void cleanup() { void logStoreTest() { pLogStore = logStoreCreate(pSyncNode); assert(pLogStore); - assert(pLogStore->getLastIndex(pLogStore) == SYNC_INDEX_INVALID); + assert(pLogStore->syncLogLastIndex(pLogStore) == SYNC_INDEX_INVALID); logStoreLog2((char*)"logStoreTest", pLogStore); @@ -65,22 +65,20 @@ void logStoreTest() { pEntry->seqNum = 3; pEntry->isWeak = true; pEntry->term = 100 + i; - pEntry->index = pLogStore->getLastIndex(pLogStore) + 1; + pEntry->index = pLogStore->syncLogLastIndex(pLogStore) + 1; snprintf(pEntry->data, dataLen, "value%d", i); syncEntryLog2((char*)"==write entry== :", pEntry); - pLogStore->appendEntry(pLogStore, pEntry); + pLogStore->syncLogAppendEntry(pLogStore, pEntry); syncEntryDestory(pEntry); if (i == 0) { - assert(pLogStore->getLastIndex(pLogStore) == SYNC_INDEX_BEGIN); + assert(pLogStore->syncLogLastIndex(pLogStore) == SYNC_INDEX_BEGIN); } } logStoreLog2((char*)"after appendEntry", pLogStore); - - pLogStore->truncate(pLogStore, 3); + pLogStore->syncLogTruncate(pLogStore, 3); logStoreLog2((char*)"after truncate 3", pLogStore); - logStoreDestory(pLogStore); } diff --git a/source/libs/sync/test/syncTestTool.cpp b/source/libs/sync/test/syncTestTool.cpp index e718d37376..1cdecfe5b3 100644 --- a/source/libs/sync/test/syncTestTool.cpp +++ b/source/libs/sync/test/syncTestTool.cpp @@ -266,14 +266,12 @@ int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal* gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply; gSyncIO->FpOnSyncTimeout = pSyncNode->FpOnTimeout; gSyncIO->FpOnSyncClientRequest = pSyncNode->FpOnClientRequest; - gSyncIO->FpOnSyncRequestVote = pSyncNode->FpOnRequestVote; gSyncIO->FpOnSyncRequestVoteReply = pSyncNode->FpOnRequestVoteReply; gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries; gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply; - - gSyncIO->FpOnSyncSnapshotSend = pSyncNode->FpOnSnapshotSend; - gSyncIO->FpOnSyncSnapshotRsp = pSyncNode->FpOnSnapshotRsp; + gSyncIO->FpOnSyncSnapshot = pSyncNode->FpOnSnapshot; + gSyncIO->FpOnSyncSnapshotReply = pSyncNode->FpOnSnapshotReply; gSyncIO->pSyncNode = pSyncNode; syncNodeRelease(pSyncNode); diff --git a/source/libs/transport/src/trans.c b/source/libs/transport/src/trans.c index b649b1e6c1..756a8ff2cf 100644 --- a/source/libs/transport/src/trans.c +++ b/source/libs/transport/src/trans.c @@ -69,6 +69,7 @@ void* rpcOpen(const SRpcInit* pInit) { pRpc->idleTime = pInit->idleTime; pRpc->tcphandle = (*taosInitHandle[pRpc->connType])(ip, pInit->localPort, pRpc->label, pRpc->numOfThreads, NULL, pRpc); + if (pRpc->tcphandle == NULL) { taosMemoryFree(pRpc); return NULL; diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 48b3097449..126b0b638e 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -439,12 +439,14 @@ void cliHandleExceptImpl(SCliConn* pConn, int32_t code) { tDebug("%s conn %p construct ahandle %p by %s", CONN_GET_INST_LABEL(pConn), pConn, transMsg.info.ahandle, TMSG_INFO(transMsg.msgType)); if (transMsg.info.ahandle == NULL) { - transMsg.info.ahandle = transCtxDumpBrokenlinkVal(&pConn->ctx, (int32_t*)&(transMsg.msgType)); + int32_t msgType = 0; + transMsg.info.ahandle = transCtxDumpBrokenlinkVal(&pConn->ctx, &msgType); + transMsg.msgType = msgType; tDebug("%s conn %p construct ahandle %p due to brokenlink", CONN_GET_INST_LABEL(pConn), pConn, transMsg.info.ahandle); } } else { - transMsg.info.ahandle = (pMsg->type != Release && pCtx) ? pCtx->ahandle : NULL; + transMsg.info.ahandle = (pMsg != NULL && pMsg->type != Release && pCtx) ? pCtx->ahandle : NULL; } if (pCtx == NULL || pCtx->pSem == NULL) { @@ -1078,9 +1080,6 @@ static void cliPrepareCb(uv_prepare_t* handle) { QUEUE_REMOVE(h); SCliMsg* pMsg = QUEUE_DATA(h, SCliMsg, q); - if (pMsg == NULL) { - continue; - } (*cliAsyncHandle[pMsg->type])(pMsg, thrd); count++; } diff --git a/source/os/src/osDir.c b/source/os/src/osDir.c index 2902f90f7b..ca99742773 100644 --- a/source/os/src/osDir.c +++ b/source/os/src/osDir.c @@ -388,8 +388,15 @@ char *taosDirEntryBaseName(char *name) { _splitpath(name, NULL, NULL, Filename1, Ext1); return name + (strlen(name) - strlen(Filename1) - strlen(Ext1)); #else - char *pPoint = strchr(name, '.'); - if (pPoint != NULL) pPoint = 0; + if (name == NULL || (name[0] == '/' && name[1] == '\0')) return name; + char *pPoint = strrchr(name, '/'); + if (pPoint != NULL) { + if (*(pPoint + 1) == '\0') { + *pPoint = '\0'; + return taosDirEntryBaseName(name); + } + return pPoint + 1; + } return name; #endif } diff --git a/source/os/src/osSocket.c b/source/os/src/osSocket.c index fc943d4d76..fd5bde90ba 100644 --- a/source/os/src/osSocket.c +++ b/source/os/src/osSocket.c @@ -73,6 +73,7 @@ typedef struct TdEpoll { EpollFd fd; } * TdEpollPtr, TdEpoll; +#if 0 int32_t taosSendto(TdSocketPtr pSocket, void *buf, int len, unsigned int flags, const struct sockaddr *dest_addr, int addrlen) { if (pSocket == NULL || pSocket->fd < 0) { @@ -84,6 +85,7 @@ int32_t taosSendto(TdSocketPtr pSocket, void *buf, int len, unsigned int flags, return sendto(pSocket->fd, buf, len, flags, dest_addr, addrlen); #endif } + int32_t taosWriteSocket(TdSocketPtr pSocket, void *buf, int len) { if (pSocket == NULL || pSocket->fd < 0) { return -1; @@ -114,6 +116,8 @@ int32_t taosReadFromSocket(TdSocketPtr pSocket, void *buf, int32_t len, int32_t } return recvfrom(pSocket->fd, buf, len, flags, destAddr, addrLen); } +#endif // endif 0 + int32_t taosCloseSocketNoCheck1(SocketFd fd) { #ifdef WINDOWS return closesocket(fd); @@ -121,6 +125,7 @@ int32_t taosCloseSocketNoCheck1(SocketFd fd) { return close(fd); #endif } + int32_t taosCloseSocket(TdSocketPtr *ppSocket) { int32_t code; if (ppSocket == NULL || *ppSocket == NULL || (*ppSocket)->fd < 0) { @@ -131,6 +136,8 @@ int32_t taosCloseSocket(TdSocketPtr *ppSocket) { taosMemoryFree(*ppSocket); return code; } + +#if 0 int32_t taosCloseSocketServer(TdSocketServerPtr *ppSocketServer) { int32_t code; if (ppSocketServer == NULL || *ppSocketServer == NULL || (*ppSocketServer)->fd < 0) { @@ -216,20 +223,6 @@ int32_t taosShutDownSocketServerRDWR(TdSocketServerPtr pSocketServer) { #endif } -void taosWinSocketInit() { -#ifdef WINDOWS - static char flag = 0; - if (flag == 0) { - WORD wVersionRequested; - WSADATA wsaData; - wVersionRequested = MAKEWORD(1, 1); - if (WSAStartup(wVersionRequested, &wsaData) == 0) { - flag = 1; - } - } -#else -#endif -} int32_t taosSetNonblocking(TdSocketPtr pSocket, int32_t on) { if (pSocket == NULL || pSocket->fd < 0) { return -1; @@ -262,6 +255,8 @@ int32_t taosSetNonblocking(TdSocketPtr pSocket, int32_t on) { #endif return 0; } +#endif // endif 0 + int32_t taosSetSockOpt(TdSocketPtr pSocket, int32_t level, int32_t optname, void *optval, int32_t optlen) { if (pSocket == NULL || pSocket->fd < 0) { return -1; @@ -296,6 +291,8 @@ int32_t taosSetSockOpt(TdSocketPtr pSocket, int32_t level, int32_t optname, void return setsockopt(pSocket->fd, level, optname, optval, (int)optlen); #endif } + +#if 0 int32_t taosGetSockOpt(TdSocketPtr pSocket, int32_t level, int32_t optname, void *optval, int32_t *optlen) { if (pSocket == NULL || pSocket->fd < 0) { return -1; @@ -307,6 +304,9 @@ int32_t taosGetSockOpt(TdSocketPtr pSocket, int32_t level, int32_t optname, void return getsockopt(pSocket->fd, level, optname, optval, (int *)optlen); #endif } + +#endif + uint32_t taosInetAddr(const char *ipAddr) { #ifdef WINDOWS uint32_t value; @@ -330,6 +330,7 @@ const char *taosInetNtoa(struct in_addr ipInt, char *dstStr, int32_t len) { #define TCP_CONN_TIMEOUT 3000 // conn timeout +#if 0 int32_t taosWriteMsg(TdSocketPtr pSocket, void *buf, int32_t nbytes) { if (pSocket == NULL || pSocket->fd < 0) { return -1; @@ -726,6 +727,7 @@ int taosValidIp(uint32_t ip) { #endif return 0; } +#endif // endif 0 bool taosValidIpAndPort(uint32_t ip, uint16_t port) { struct sockaddr_in serverAdd; @@ -774,6 +776,8 @@ bool taosValidIpAndPort(uint32_t ip, uint16_t port) { return true; // return 0 == taosValidIp(ip) ? true : false; } + +#if 0 TdSocketServerPtr taosOpenTcpServerSocket(uint32_t ip, uint16_t port) { struct sockaddr_in serverAdd; SocketFd fd; @@ -888,6 +892,36 @@ int64_t taosCopyFds(TdSocketPtr pSrcSocket, TdSocketPtr pDestSocket, int64_t len return len; } +// Function converting an IP address string to an uint32_t. +uint32_t ip2uint(const char *const ip_addr) { + char ip_addr_cpy[20]; + char ip[5]; + + tstrncpy(ip_addr_cpy, ip_addr, sizeof(ip_addr_cpy)); + + char *s_start, *s_end; + s_start = ip_addr_cpy; + s_end = ip_addr_cpy; + + int32_t k; + + for (k = 0; *s_start != '\0'; s_start = s_end) { + for (s_end = s_start; *s_end != '.' && *s_end != '\0'; s_end++) { + } + if (*s_end == '.') { + *s_end = '\0'; + s_end++; + } + ip[k++] = (char)atoi(s_start); + } + + ip[k] = '\0'; + + return *((uint32_t *)ip); +} + +#endif // endif 0 + void taosBlockSIGPIPE() { #ifdef WINDOWS // assert(0); @@ -991,34 +1025,6 @@ int32_t taosGetFqdn(char *fqdn) { return 0; } -// Function converting an IP address string to an uint32_t. -uint32_t ip2uint(const char *const ip_addr) { - char ip_addr_cpy[20]; - char ip[5]; - - tstrncpy(ip_addr_cpy, ip_addr, sizeof(ip_addr_cpy)); - - char *s_start, *s_end; - s_start = ip_addr_cpy; - s_end = ip_addr_cpy; - - int32_t k; - - for (k = 0; *s_start != '\0'; s_start = s_end) { - for (s_end = s_start; *s_end != '.' && *s_end != '\0'; s_end++) { - } - if (*s_end == '.') { - *s_end = '\0'; - s_end++; - } - ip[k++] = (char)atoi(s_start); - } - - ip[k] = '\0'; - - return *((uint32_t *)ip); -} - void tinet_ntoa(char *ipstr, uint32_t ip) { sprintf(ipstr, "%d.%d.%d.%d", ip & 0xFF, (ip >> 8) & 0xFF, (ip >> 16) & 0xFF, ip >> 24); } @@ -1039,12 +1045,14 @@ void taosSetMaskSIGPIPE() { #endif } +#if 0 int32_t taosGetSocketName(TdSocketPtr pSocket, struct sockaddr *destAddr, int *addrLen) { if (pSocket == NULL || pSocket->fd < 0) { return -1; } return getsockname(pSocket->fd, destAddr, addrLen); } +#endif // endif 0 /* * Set TCP connection timeout per-socket level. @@ -1080,3 +1088,18 @@ int32_t taosCreateSocketWithTimeout(uint32_t timeout) { return (int)fd; } + +void taosWinSocketInit() { +#ifdef WINDOWS + static char flag = 0; + if (flag == 0) { + WORD wVersionRequested; + WSADATA wsaData; + wVersionRequested = MAKEWORD(1, 1); + if (WSAStartup(wVersionRequested, &wsaData) == 0) { + flag = 1; + } + } +#else +#endif +} diff --git a/source/util/src/tqueue.c b/source/util/src/tqueue.c index 8748f8df3f..f1f926c0b7 100644 --- a/source/util/src/tqueue.c +++ b/source/util/src/tqueue.c @@ -141,14 +141,10 @@ int32_t taosQueueItemSize(STaosQueue *queue) { } int64_t taosQueueMemorySize(STaosQueue *queue) { -#if 1 - return queue->memOfItems; -#else taosThreadMutexLock(&queue->mutex); int64_t memOfItems = queue->memOfItems; taosThreadMutexUnlock(&queue->mutex); return memOfItems; -#endif } void *taosAllocateQitem(int32_t size, EQItype itype) { diff --git a/tests/pytest/util/dnodes.py b/tests/pytest/util/dnodes.py index 4bcbe190d5..b9504875e5 100644 --- a/tests/pytest/util/dnodes.py +++ b/tests/pytest/util/dnodes.py @@ -131,7 +131,7 @@ class TDDnode: "qDebugFlag": "143", "rpcDebugFlag": "143", "tmrDebugFlag": "131", - "uDebugFlag": "131", + "uDebugFlag": "143", "sDebugFlag": "143", "wDebugFlag": "143", "numOfLogLines": "100000000", diff --git a/tests/script/tsim/mnode/basic5.sim b/tests/script/tsim/mnode/basic5.sim index 16e8fa3dfa..e96b193b83 100644 --- a/tests/script/tsim/mnode/basic5.sim +++ b/tests/script/tsim/mnode/basic5.sim @@ -183,7 +183,7 @@ $x = 0 step71: $x = $x + 1 sleep 1000 - if $x == 10 then + if $x == 50 then return -1 endi sql select * from information_schema.ins_dnodes diff --git a/tests/script/tsim/parser/where.sim b/tests/script/tsim/parser/where.sim index e242dea8ec..c1660883b6 100644 --- a/tests/script/tsim/parser/where.sim +++ b/tests/script/tsim/parser/where.sim @@ -87,17 +87,17 @@ if $rows != 2 then return -1 endi -print $tbPrefix -$tb = $tbPrefix . 0 -if $data00 != wh_tb1 then - print expect wh_tb1, actual:$data00 - return -1 -endi -$tb = $tbPrefix . 1 -if $data10 != wh_tb0 then - print expect wh_tb0, actual:$data00 - return -1 -endi +#print $tbPrefix +#$tb = $tbPrefix . 0 +#if $data00 != wh_tb1 then +# print expect wh_tb1, actual:$data00 +# return -1 +#endi +#$tb = $tbPrefix . 1 +#if $data10 != wh_tb0 then +# print expect wh_tb0, actual:$data00 +# return -1 +#endi ## select specified columns diff --git a/tests/script/tsim/sync/sync2-test.sim b/tests/script/tsim/sync/sync2-test.sim new file mode 100644 index 0000000000..4f6bf3f9f1 --- /dev/null +++ b/tests/script/tsim/sync/sync2-test.sim @@ -0,0 +1,175 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/deploy.sh -n dnode2 -i 2 +system sh/deploy.sh -n dnode3 -i 3 +system sh/deploy.sh -n dnode4 -i 4 + +system sh/cfg.sh -n dnode1 -c supportVnodes -v 0 + +system sh/exec.sh -n dnode1 -s start +system sh/exec.sh -n dnode2 -s start +system sh/exec.sh -n dnode3 -s start +system sh/exec.sh -n dnode4 -s start + +sql connect +sql create dnode $hostname port 7200 +sql create dnode $hostname port 7300 +sql create dnode $hostname port 7400 + +$x = 0 +step1: + $x = $x + 1 + sleep 1000 + if $x == 10 then + print ====> dnode not ready! + return -1 + endi +sql select * from information_schema.ins_dnodes +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 +print ===> $data30 $data31 $data32 $data33 $data34 $data35 +if $rows != 4 then + return -1 +endi +if $data(1)[4] != ready then + goto step1 +endi +if $data(2)[4] != ready then + goto step1 +endi +if $data(3)[4] != ready then + goto step1 +endi +if $data(4)[4] != ready then + goto step1 +endi + +$replica = 3 +$vgroups = 1 + +print ============= create database +sql create database db replica $replica vgroups $vgroups + +$loop_cnt = 0 +check_db_ready: +$loop_cnt = $loop_cnt + 1 +sleep 200 +if $loop_cnt == 100 then + print ====> db not ready! + return -1 +endi +sql select * from information_schema.ins_databases +print ===> rows: $rows +print $data[2][0] $data[2][1] $data[2][2] $data[2][3] $data[2][4] $data[2][5] $data[2][6] $data[2][7] $data[2][8] $data[2][9] $data[2][6] $data[2][11] $data[2][12] $data[2][13] $data[2][14] $data[2][15] $data[2][16] $data[2][17] $data[2][18] $data[2][19] +if $rows != 3 then + return -1 +endi +if $data[2][15] != ready then + goto check_db_ready +endi + +sql use db + +$loop_cnt = 0 +check_vg_ready: +$loop_cnt = $loop_cnt + 1 +sleep 200 +if $loop_cnt == 300 then + print ====> vgroups not ready! + return -1 +endi + +sql show vgroups +print ===> rows: $rows +print $data[0][0] $data[0][1] $data[0][2] $data[0][3] $data[0][4] $data[0][5] $data[0][6] $data[0][7] $data[0][8] $data[0][9] $data[0][10] $data[0][11] + +if $rows != $vgroups then + return -1 +endi + +if $data[0][4] == leader then + if $data[0][6] == follower then + if $data[0][8] == follower then + print ---- vgroup $data[0][0] leader locate on dnode $data[0][3] + endi + endi +elif $data[0][6] == leader then + if $data[0][4] == follower then + if $data[0][8] == follower then + print ---- vgroup $data[0][0] leader locate on dnode $data[0][5] + endi + endi +elif $data[0][8] == leader then + if $data[0][4] == follower then + if $data[0][6] == follower then + print ---- vgroup $data[0][0] leader locate on dnode $data[0][7] + endi + endi +else + goto check_vg_ready +endi + + + + + +#return 0 + + + + + +vg_ready: +print ====> create stable/child table +sql create table stb (ts timestamp, c1 int, c2 float, c3 double) tags (t1 int) + + + + +#return 0 + + + +sql show stables +if $rows != 1 then + return -1 +endi + +sql create table ct1 using stb tags(1000) + + + + + + + + +print ====> step1 insert 1000 records +$N = 1000 +$count = 0 +while $count < $N + $ms = 1591200000000 + $count + sql insert into ct1 values( $ms , $count , 2.1, 3.1) + $count = $count + 1 +endw + +print ====> step2 sleep 20s, checking data +sleep 20000 + + +print ====> step3 sleep 30s, kill leader +sleep 30000 + +print ====> step4 insert 1000 records +$N = 1000 +$count = 0 +while $count < $N + $ms = 1591201000000 + $count + sql insert into ct1 values( $ms , $count , 2.1, 3.1) + $count = $count + 1 +endw + +print ====> step5 sleep 20s, checking data +sleep 20000 + diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index a89b41cac6..a095bdf045 100644 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -249,8 +249,8 @@ python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertDataAsync.py -N 5 - python3 ./test.py -f 6-cluster/5dnode3mnodeAdd1Ddnoe.py -N 6 -M 3 -C 5 # BUG python3 ./test.py -f 6-cluster/5dnode3mnodeStopInsert.py -python3 ./test.py -f 6-cluster/5dnode3mnodeDrop.py -N 5 -python3 test.py -f 6-cluster/5dnode3mnodeStopConnect.py -N 5 -M 3 +# TD-19646 python3 ./test.py -f 6-cluster/5dnode3mnodeDrop.py -N 5 +# TD-19646 python3 test.py -f 6-cluster/5dnode3mnodeStopConnect.py -N 5 -M 3 python3 ./test.py -f 6-cluster/5dnode3mnodeRecreateMnode.py -N 5 -M 3 python3 ./test.py -f 6-cluster/5dnode3mnodeStopFollowerLeader.py -N 5 -M 3 diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 98d21a4f2c..c6a5e33735 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -17,7 +17,8 @@ IF (TD_WEBSOCKET) PATCH_COMMAND COMMAND git clean -f -d BUILD_COMMAND - COMMAND cargo build --release -p taos-ws-sys + COMMAND cargo update + COMMAND cargo build --release -p taos-ws-sys --features native-tls-vendored COMMAND ./taos-ws-sys/ci/package.sh INSTALL_COMMAND COMMAND cmake -E copy target/libtaosws/${websocket_lib_file} ${CMAKE_BINARY_DIR}/build/lib @@ -36,7 +37,8 @@ IF (TD_WEBSOCKET) PATCH_COMMAND COMMAND git clean -f -d BUILD_COMMAND - COMMAND cargo build --release -p taos-ws-sys + COMMAND cargo update + COMMAND cargo build --release -p taos-ws-sys --features native-tls-vendored COMMAND ./taos-ws-sys/ci/package.sh INSTALL_COMMAND COMMAND cmake -E copy target/libtaosws/${websocket_lib_file} ${CMAKE_BINARY_DIR}/build/lib