homework-jianmu/source/libs/sync/src/syncRaftEntry.c

446 lines
14 KiB
C

/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "syncRaftEntry.h"
#include "syncUtil.h"
#include "tref.h"
SSyncRaftEntry* syncEntryBuild(int32_t dataLen) {
int32_t bytes = sizeof(SSyncRaftEntry) + dataLen;
SSyncRaftEntry* pEntry = taosMemoryMalloc(bytes);
if (pEntry == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
pEntry->bytes = bytes;
pEntry->dataLen = dataLen;
pEntry->rid = -1;
return pEntry;
}
SSyncRaftEntry* syncEntryBuildFromClientRequest(const SyncClientRequest* pMsg, SyncTerm term, SyncIndex index) {
SSyncRaftEntry* pEntry = syncEntryBuild(pMsg->dataLen);
if (pEntry == NULL) return NULL;
pEntry->msgType = pMsg->msgType;
pEntry->originalRpcType = pMsg->originalRpcType;
pEntry->seqNum = pMsg->seqNum;
pEntry->isWeak = pMsg->isWeak;
pEntry->term = term;
pEntry->index = index;
memcpy(pEntry->data, pMsg->data, pMsg->dataLen);
return pEntry;
}
SSyncRaftEntry* syncEntryBuildFromRpcMsg(const SRpcMsg* pMsg, SyncTerm term, SyncIndex index) {
SSyncRaftEntry* pEntry = syncEntryBuild(pMsg->contLen);
if (pEntry == NULL) return NULL;
pEntry->msgType = TDMT_SYNC_CLIENT_REQUEST;
pEntry->originalRpcType = pMsg->msgType;
pEntry->seqNum = 0;
pEntry->isWeak = 0;
pEntry->term = term;
pEntry->index = index;
memcpy(pEntry->data, pMsg->pCont, pMsg->contLen);
return pEntry;
}
SSyncRaftEntry* syncEntryBuildFromAppendEntries(const SyncAppendEntries* pMsg) {
SSyncRaftEntry* pEntry = syncEntryBuild((int32_t)(pMsg->dataLen));
if (pEntry == NULL) return NULL;
memcpy(pEntry, pMsg->data, pMsg->dataLen);
return pEntry;
}
SSyncRaftEntry* syncEntryBuildNoop(SyncTerm term, SyncIndex index, int32_t vgId) {
SSyncRaftEntry* pEntry = syncEntryBuild(sizeof(SMsgHead));
if (pEntry == NULL) return NULL;
pEntry->msgType = TDMT_SYNC_CLIENT_REQUEST;
pEntry->originalRpcType = TDMT_SYNC_NOOP;
pEntry->seqNum = 0;
pEntry->isWeak = 0;
pEntry->term = term;
pEntry->index = index;
SMsgHead* pHead = (SMsgHead*)pEntry->data;
pHead->vgId = vgId;
pHead->contLen = sizeof(SMsgHead);
return pEntry;
}
void syncEntryDestory(SSyncRaftEntry* pEntry) {
if (pEntry != NULL) {
sTrace("free entry: %p", pEntry);
taosMemoryFree(pEntry);
}
}
void syncEntry2OriginalRpc(const SSyncRaftEntry* pEntry, SRpcMsg* pRpcMsg) {
pRpcMsg->msgType = pEntry->originalRpcType;
pRpcMsg->contLen = (int32_t)(pEntry->dataLen);
pRpcMsg->pCont = rpcMallocCont(pRpcMsg->contLen);
memcpy(pRpcMsg->pCont, pEntry->data, pRpcMsg->contLen);
}
SRaftEntryHashCache* raftCacheCreate(SSyncNode* pSyncNode, int32_t maxCount) {
SRaftEntryHashCache* pCache = taosMemoryMalloc(sizeof(SRaftEntryHashCache));
if (pCache == NULL) {
sError("vgId:%d, raft cache create error", pSyncNode->vgId);
return NULL;
}
pCache->pEntryHash =
taosHashInit(sizeof(SyncIndex), taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
if (pCache->pEntryHash == NULL) {
sError("vgId:%d, raft cache create hash error", pSyncNode->vgId);
return NULL;
}
taosThreadMutexInit(&(pCache->mutex), NULL);
pCache->maxCount = maxCount;
pCache->currentCount = 0;
pCache->pSyncNode = pSyncNode;
return pCache;
}
void raftCacheDestroy(SRaftEntryHashCache* pCache) {
if (pCache != NULL) {
taosThreadMutexLock(&pCache->mutex);
taosHashCleanup(pCache->pEntryHash);
taosThreadMutexUnlock(&pCache->mutex);
taosThreadMutexDestroy(&(pCache->mutex));
taosMemoryFree(pCache);
}
}
// success, return 1
// max count, return 0
// error, return -1
int32_t raftCachePutEntry(struct SRaftEntryHashCache* pCache, SSyncRaftEntry* pEntry) {
taosThreadMutexLock(&pCache->mutex);
if (pCache->currentCount >= pCache->maxCount) {
taosThreadMutexUnlock(&pCache->mutex);
return 0;
}
taosHashPut(pCache->pEntryHash, &(pEntry->index), sizeof(pEntry->index), pEntry, pEntry->bytes);
++(pCache->currentCount);
sNTrace(pCache->pSyncNode, "raft cache add, type:%s,%d, type2:%s,%d, index:%" PRId64 ", bytes:%d",
TMSG_INFO(pEntry->msgType), pEntry->msgType, TMSG_INFO(pEntry->originalRpcType), pEntry->originalRpcType,
pEntry->index, pEntry->bytes);
taosThreadMutexUnlock(&pCache->mutex);
return 1;
}
// success, return 0
// error, return -1
// not exist, return -1, terrno = TSDB_CODE_WAL_LOG_NOT_EXIST
int32_t raftCacheGetEntry(struct SRaftEntryHashCache* pCache, SyncIndex index, SSyncRaftEntry** ppEntry) {
if (ppEntry == NULL) {
return -1;
}
*ppEntry = NULL;
taosThreadMutexLock(&pCache->mutex);
void* pTmp = taosHashGet(pCache->pEntryHash, &index, sizeof(index));
if (pTmp != NULL) {
SSyncRaftEntry* pEntry = pTmp;
*ppEntry = taosMemoryMalloc(pEntry->bytes);
memcpy(*ppEntry, pTmp, pEntry->bytes);
sNTrace(pCache->pSyncNode, "raft cache get, type:%s,%d, type2:%s,%d, index:%" PRId64,
TMSG_INFO((*ppEntry)->msgType), (*ppEntry)->msgType, TMSG_INFO((*ppEntry)->originalRpcType),
(*ppEntry)->originalRpcType, (*ppEntry)->index);
taosThreadMutexUnlock(&pCache->mutex);
return 0;
}
taosThreadMutexUnlock(&pCache->mutex);
terrno = TSDB_CODE_WAL_LOG_NOT_EXIST;
return -1;
}
// success, return 0
// error, return -1
// not exist, return -1, terrno = TSDB_CODE_WAL_LOG_NOT_EXIST
int32_t raftCacheGetEntryP(struct SRaftEntryHashCache* pCache, SyncIndex index, SSyncRaftEntry** ppEntry) {
if (ppEntry == NULL) {
return -1;
}
*ppEntry = NULL;
taosThreadMutexLock(&pCache->mutex);
void* pTmp = taosHashGet(pCache->pEntryHash, &index, sizeof(index));
if (pTmp != NULL) {
SSyncRaftEntry* pEntry = pTmp;
*ppEntry = pEntry;
sNTrace(pCache->pSyncNode, "raft cache get, type:%s,%d, type2:%s,%d, index:%" PRId64,
TMSG_INFO((*ppEntry)->msgType), (*ppEntry)->msgType, TMSG_INFO((*ppEntry)->originalRpcType),
(*ppEntry)->originalRpcType, (*ppEntry)->index);
taosThreadMutexUnlock(&pCache->mutex);
return 0;
}
taosThreadMutexUnlock(&pCache->mutex);
terrno = TSDB_CODE_WAL_LOG_NOT_EXIST;
return -1;
}
int32_t raftCacheDelEntry(struct SRaftEntryHashCache* pCache, SyncIndex index) {
taosThreadMutexLock(&pCache->mutex);
taosHashRemove(pCache->pEntryHash, &index, sizeof(index));
--(pCache->currentCount);
taosThreadMutexUnlock(&pCache->mutex);
return 0;
}
int32_t raftCacheGetAndDel(struct SRaftEntryHashCache* pCache, SyncIndex index, SSyncRaftEntry** ppEntry) {
if (ppEntry == NULL) {
return -1;
}
*ppEntry = NULL;
taosThreadMutexLock(&pCache->mutex);
void* pTmp = taosHashGet(pCache->pEntryHash, &index, sizeof(index));
if (pTmp != NULL) {
SSyncRaftEntry* pEntry = pTmp;
*ppEntry = taosMemoryMalloc(pEntry->bytes);
memcpy(*ppEntry, pTmp, pEntry->bytes);
sNTrace(pCache->pSyncNode, "raft cache get-and-del, type:%s,%d, type2:%s,%d, index:%" PRId64,
TMSG_INFO((*ppEntry)->msgType), (*ppEntry)->msgType, TMSG_INFO((*ppEntry)->originalRpcType),
(*ppEntry)->originalRpcType, (*ppEntry)->index);
taosHashRemove(pCache->pEntryHash, &index, sizeof(index));
--(pCache->currentCount);
taosThreadMutexUnlock(&pCache->mutex);
return 0;
}
taosThreadMutexUnlock(&pCache->mutex);
terrno = TSDB_CODE_WAL_LOG_NOT_EXIST;
return -1;
}
int32_t raftCacheClear(struct SRaftEntryHashCache* pCache) {
taosThreadMutexLock(&pCache->mutex);
taosHashClear(pCache->pEntryHash);
pCache->currentCount = 0;
taosThreadMutexUnlock(&pCache->mutex);
return 0;
}
static char* keyFn(const void* pData) {
SSyncRaftEntry* pEntry = (SSyncRaftEntry*)pData;
return (char*)(&(pEntry->index));
}
static int cmpFn(const void* p1, const void* p2) { return memcmp(p1, p2, sizeof(SyncIndex)); }
static void freeRaftEntry(void* param) {
SSyncRaftEntry* pEntry = (SSyncRaftEntry*)param;
syncEntryDestory(pEntry);
}
SRaftEntryCache* raftEntryCacheCreate(SSyncNode* pSyncNode, int32_t maxCount) {
SRaftEntryCache* pCache = taosMemoryMalloc(sizeof(SRaftEntryCache));
if (pCache == NULL) {
sError("vgId:%d, raft cache create error", pSyncNode->vgId);
return NULL;
}
pCache->pSkipList =
tSkipListCreate(MAX_SKIP_LIST_LEVEL, TSDB_DATA_TYPE_BINARY, sizeof(SyncIndex), cmpFn, SL_ALLOW_DUP_KEY, keyFn);
if (pCache->pSkipList == NULL) {
sError("vgId:%d, raft cache create hash error", pSyncNode->vgId);
return NULL;
}
taosThreadMutexInit(&(pCache->mutex), NULL);
pCache->refMgr = taosOpenRef(10, freeRaftEntry);
pCache->maxCount = maxCount;
pCache->currentCount = 0;
pCache->pSyncNode = pSyncNode;
return pCache;
}
void raftEntryCacheDestroy(SRaftEntryCache* pCache) {
if (pCache != NULL) {
taosThreadMutexLock(&pCache->mutex);
tSkipListDestroy(pCache->pSkipList);
if (pCache->refMgr != -1) {
taosCloseRef(pCache->refMgr);
pCache->refMgr = -1;
}
taosThreadMutexUnlock(&pCache->mutex);
taosThreadMutexDestroy(&(pCache->mutex));
taosMemoryFree(pCache);
}
}
// success, return 1
// max count, return 0
// error, return -1
int32_t raftEntryCachePutEntry(struct SRaftEntryCache* pCache, SSyncRaftEntry* pEntry) {
taosThreadMutexLock(&pCache->mutex);
if (pCache->currentCount >= pCache->maxCount) {
taosThreadMutexUnlock(&pCache->mutex);
return 0;
}
SSkipListNode* pSkipListNode = tSkipListPut(pCache->pSkipList, pEntry);
ASSERT(pSkipListNode != NULL);
++(pCache->currentCount);
pEntry->rid = taosAddRef(pCache->refMgr, pEntry);
ASSERT(pEntry->rid >= 0);
sNTrace(pCache->pSyncNode, "raft cache add, type:%s,%d, type2:%s,%d, index:%" PRId64 ", bytes:%d",
TMSG_INFO(pEntry->msgType), pEntry->msgType, TMSG_INFO(pEntry->originalRpcType), pEntry->originalRpcType,
pEntry->index, pEntry->bytes);
taosThreadMutexUnlock(&pCache->mutex);
return 1;
}
// find one, return 1
// not found, return 0
// error, return -1
int32_t raftEntryCacheGetEntry(struct SRaftEntryCache* pCache, SyncIndex index, SSyncRaftEntry** ppEntry) {
ASSERT(ppEntry != NULL);
SSyncRaftEntry* pEntry = NULL;
int32_t code = raftEntryCacheGetEntryP(pCache, index, &pEntry);
if (code == 1) {
int32_t bytes = (int32_t)pEntry->bytes;
*ppEntry = taosMemoryMalloc((int64_t)bytes);
memcpy(*ppEntry, pEntry, pEntry->bytes);
(*ppEntry)->rid = -1;
} else {
*ppEntry = NULL;
}
return code;
}
// find one, return 1
// not found, return 0
// error, return -1
int32_t raftEntryCacheGetEntryP(struct SRaftEntryCache* pCache, SyncIndex index, SSyncRaftEntry** ppEntry) {
taosThreadMutexLock(&pCache->mutex);
SyncIndex index2 = index;
int32_t code = 0;
SArray* entryPArray = tSkipListGet(pCache->pSkipList, (char*)(&index2));
int32_t arraySize = taosArrayGetSize(entryPArray);
if (arraySize == 1) {
SSkipListNode** ppNode = (SSkipListNode**)taosArrayGet(entryPArray, 0);
ASSERT(*ppNode != NULL);
*ppEntry = (SSyncRaftEntry*)SL_GET_NODE_DATA(*ppNode);
taosAcquireRef(pCache->refMgr, (*ppEntry)->rid);
code = 1;
} else if (arraySize == 0) {
code = 0;
} else {
ASSERT(0);
code = -1;
}
taosArrayDestroy(entryPArray);
taosThreadMutexUnlock(&pCache->mutex);
return code;
}
// count = -1, clear all
// count >= 0, clear count
// return -1, error
// return delete count
int32_t raftEntryCacheClear(struct SRaftEntryCache* pCache, int32_t count) {
taosThreadMutexLock(&pCache->mutex);
int32_t returnCnt = 0;
if (count == -1) {
// clear all
SSkipListIterator* pIter = tSkipListCreateIter(pCache->pSkipList);
while (tSkipListIterNext(pIter)) {
SSkipListNode* pNode = tSkipListIterGet(pIter);
ASSERT(pNode != NULL);
SSyncRaftEntry* pEntry = (SSyncRaftEntry*)SL_GET_NODE_DATA(pNode);
syncEntryDestory(pEntry);
++returnCnt;
}
tSkipListDestroyIter(pIter);
tSkipListDestroy(pCache->pSkipList);
pCache->pSkipList =
tSkipListCreate(MAX_SKIP_LIST_LEVEL, TSDB_DATA_TYPE_BINARY, sizeof(SyncIndex), cmpFn, SL_ALLOW_DUP_KEY, keyFn);
ASSERT(pCache->pSkipList != NULL);
} else {
// clear count
int i = 0;
SSkipListIterator* pIter = tSkipListCreateIter(pCache->pSkipList);
SArray* delNodeArray = taosArrayInit(0, sizeof(SSkipListNode*));
// free entry
while (tSkipListIterNext(pIter)) {
SSkipListNode* pNode = tSkipListIterGet(pIter);
ASSERT(pNode != NULL);
if (i++ >= count) {
break;
}
// sDebug("push pNode:%p", pNode);
taosArrayPush(delNodeArray, &pNode);
++returnCnt;
SSyncRaftEntry* pEntry = (SSyncRaftEntry*)SL_GET_NODE_DATA(pNode);
// syncEntryDestory(pEntry);
taosRemoveRef(pCache->refMgr, pEntry->rid);
}
tSkipListDestroyIter(pIter);
// delete skiplist node
int32_t arraySize = taosArrayGetSize(delNodeArray);
for (int32_t i = 0; i < arraySize; ++i) {
SSkipListNode** ppNode = taosArrayGet(delNodeArray, i);
// sDebug("get pNode:%p", *ppNode);
tSkipListRemoveNode(pCache->pSkipList, *ppNode);
}
taosArrayDestroy(delNodeArray);
}
pCache->currentCount -= returnCnt;
taosThreadMutexUnlock(&pCache->mutex);
return returnCnt;
}