406 lines
14 KiB
C
406 lines
14 KiB
C
/*
|
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
|
*
|
|
* This program is free software: you can use, redistribute, and/or modify
|
|
* it under the terms of the GNU Affero General Public License, version 3
|
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <http:www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#define _DEFAULT_SOURCE
|
|
#include "vmInt.h"
|
|
|
|
static inline void vmSendRsp(SRpcMsg *pMsg, int32_t code) {
|
|
if (pMsg->info.handle == NULL) return;
|
|
SRpcMsg rsp = {
|
|
.code = code,
|
|
.pCont = pMsg->info.rsp,
|
|
.contLen = pMsg->info.rspLen,
|
|
.info = pMsg->info,
|
|
};
|
|
tmsgSendRsp(&rsp);
|
|
}
|
|
|
|
static void vmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
|
|
SVnodeMgmt *pMgmt = pInfo->ahandle;
|
|
int32_t code = -1;
|
|
const STraceId *trace = &pMsg->info.traceId;
|
|
|
|
dGTrace("msg:%p, get from vnode-mgmt queue", pMsg);
|
|
switch (pMsg->msgType) {
|
|
case TDMT_DND_CREATE_VNODE:
|
|
code = vmProcessCreateVnodeReq(pMgmt, pMsg);
|
|
break;
|
|
case TDMT_DND_DROP_VNODE:
|
|
code = vmProcessDropVnodeReq(pMgmt, pMsg);
|
|
break;
|
|
case TDMT_VND_ALTER_REPLICA:
|
|
code = vmProcessAlterVnodeReplicaReq(pMgmt, pMsg);
|
|
break;
|
|
case TDMT_VND_DISABLE_WRITE:
|
|
code = vmProcessDisableVnodeWriteReq(pMgmt, pMsg);
|
|
break;
|
|
case TDMT_VND_ALTER_HASHRANGE:
|
|
code = vmProcessAlterHashRangeReq(pMgmt, pMsg);
|
|
break;
|
|
default:
|
|
terrno = TSDB_CODE_MSG_NOT_PROCESSED;
|
|
dGError("msg:%p, not processed in vnode-mgmt queue", pMsg);
|
|
}
|
|
|
|
if (IsReq(pMsg)) {
|
|
if (code != 0) {
|
|
if (terrno != 0) code = terrno;
|
|
dGError("msg:%p, failed to process since %s, type:%s", pMsg, terrstr(code), TMSG_INFO(pMsg->msgType));
|
|
}
|
|
vmSendRsp(pMsg, code);
|
|
}
|
|
|
|
dGTrace("msg:%p, is freed, code:0x%x", pMsg, code);
|
|
rpcFreeCont(pMsg->pCont);
|
|
taosFreeQitem(pMsg);
|
|
}
|
|
|
|
static void vmProcessQueryQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
|
|
SVnodeObj *pVnode = pInfo->ahandle;
|
|
const STraceId *trace = &pMsg->info.traceId;
|
|
|
|
dGTrace("vgId:%d, msg:%p get from vnode-query queue", pVnode->vgId, pMsg);
|
|
int32_t code = vnodeProcessQueryMsg(pVnode->pImpl, pMsg);
|
|
if (code != 0) {
|
|
if (terrno != 0) code = terrno;
|
|
dGError("vgId:%d, msg:%p failed to query since %s", pVnode->vgId, pMsg, terrstr(code));
|
|
vmSendRsp(pMsg, code);
|
|
}
|
|
|
|
dGTrace("vgId:%d, msg:%p is freed, code:0x%x", pVnode->vgId, pMsg, code);
|
|
rpcFreeCont(pMsg->pCont);
|
|
taosFreeQitem(pMsg);
|
|
}
|
|
|
|
static void vmProcessStreamQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
|
|
SVnodeObj *pVnode = pInfo->ahandle;
|
|
const STraceId *trace = &pMsg->info.traceId;
|
|
|
|
dGTrace("vgId:%d, msg:%p get from vnode-stream queue", pVnode->vgId, pMsg);
|
|
int32_t code = vnodeProcessFetchMsg(pVnode->pImpl, pMsg, pInfo);
|
|
if (code != 0) {
|
|
if (terrno != 0) code = terrno;
|
|
dGError("vgId:%d, msg:%p failed to process stream msg %s since %s", pVnode->vgId, pMsg, TMSG_INFO(pMsg->msgType),
|
|
terrstr(code));
|
|
vmSendRsp(pMsg, code);
|
|
}
|
|
|
|
dGTrace("vgId:%d, msg:%p is freed, code:0x%x", pVnode->vgId, pMsg, code);
|
|
rpcFreeCont(pMsg->pCont);
|
|
taosFreeQitem(pMsg);
|
|
}
|
|
|
|
static void vmProcessFetchQueue(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) {
|
|
SVnodeObj *pVnode = pInfo->ahandle;
|
|
SRpcMsg *pMsg = NULL;
|
|
|
|
for (int32_t i = 0; i < numOfMsgs; ++i) {
|
|
if (taosGetQitem(qall, (void **)&pMsg) == 0) continue;
|
|
const STraceId *trace = &pMsg->info.traceId;
|
|
dGTrace("vgId:%d, msg:%p get from vnode-fetch queue", pVnode->vgId, pMsg);
|
|
|
|
int32_t code = vnodeProcessFetchMsg(pVnode->pImpl, pMsg, pInfo);
|
|
if (code != 0) {
|
|
if (terrno != 0) code = terrno;
|
|
dGError("vgId:%d, msg:%p failed to fetch since %s", pVnode->vgId, pMsg, terrstr(code));
|
|
vmSendRsp(pMsg, code);
|
|
}
|
|
|
|
dGTrace("vgId:%d, msg:%p is freed, code:0x%x", pVnode->vgId, pMsg, code);
|
|
rpcFreeCont(pMsg->pCont);
|
|
taosFreeQitem(pMsg);
|
|
}
|
|
}
|
|
|
|
static void vmProcessSyncQueue(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) {
|
|
SVnodeObj *pVnode = pInfo->ahandle;
|
|
SRpcMsg *pMsg = NULL;
|
|
|
|
for (int32_t i = 0; i < numOfMsgs; ++i) {
|
|
if (taosGetQitem(qall, (void **)&pMsg) == 0) continue;
|
|
const STraceId *trace = &pMsg->info.traceId;
|
|
dGTrace("vgId:%d, msg:%p get from vnode-sync queue", pVnode->vgId, pMsg);
|
|
|
|
int32_t code = vnodeProcessSyncMsg(pVnode->pImpl, pMsg, NULL); // no response here
|
|
dGTrace("vgId:%d, msg:%p is freed, code:0x%x", pVnode->vgId, pMsg, code);
|
|
rpcFreeCont(pMsg->pCont);
|
|
taosFreeQitem(pMsg);
|
|
}
|
|
}
|
|
|
|
static void vmSendResponse(SRpcMsg *pMsg) {
|
|
if (pMsg->info.handle) {
|
|
SRpcMsg rsp = {.info = pMsg->info, .code = terrno};
|
|
rpcSendResponse(&rsp);
|
|
}
|
|
}
|
|
|
|
static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtype) {
|
|
const STraceId *trace = &pMsg->info.traceId;
|
|
if (pMsg->contLen < sizeof(SMsgHead)) {
|
|
dGError("invalid rpc msg with no msg head at pCont. pMsg:%p, type:%s, contLen:%d", pMsg, TMSG_INFO(pMsg->msgType),
|
|
pMsg->contLen);
|
|
return -1;
|
|
}
|
|
|
|
SMsgHead *pHead = pMsg->pCont;
|
|
int32_t code = 0;
|
|
|
|
pHead->contLen = ntohl(pHead->contLen);
|
|
pHead->vgId = ntohl(pHead->vgId);
|
|
|
|
SVnodeObj *pVnode = vmAcquireVnode(pMgmt, pHead->vgId);
|
|
if (pVnode == NULL) {
|
|
dGError("vgId:%d, msg:%p failed to put into vnode queue since %s, type:%s qtype:%d contLen:%d", pHead->vgId, pMsg,
|
|
terrstr(), TMSG_INFO(pMsg->msgType), qtype, pHead->contLen);
|
|
terrno = (terrno != 0) ? terrno : -1;
|
|
return terrno;
|
|
}
|
|
|
|
switch (qtype) {
|
|
case QUERY_QUEUE:
|
|
code = vnodePreprocessQueryMsg(pVnode->pImpl, pMsg);
|
|
if (code) {
|
|
dError("vgId:%d, msg:%p preprocess query msg failed since %s", pVnode->vgId, pMsg, terrstr(code));
|
|
} else {
|
|
dGTrace("vgId:%d, msg:%p put into vnode-query queue", pVnode->vgId, pMsg);
|
|
taosWriteQitem(pVnode->pQueryQ, pMsg);
|
|
}
|
|
break;
|
|
case STREAM_QUEUE:
|
|
dGTrace("vgId:%d, msg:%p put into vnode-stream queue", pVnode->vgId, pMsg);
|
|
if (pMsg->msgType == TDMT_STREAM_TASK_DISPATCH) {
|
|
vnodeEnqueueStreamMsg(pVnode->pImpl, pMsg);
|
|
} else {
|
|
taosWriteQitem(pVnode->pStreamQ, pMsg);
|
|
}
|
|
break;
|
|
case FETCH_QUEUE:
|
|
dGTrace("vgId:%d, msg:%p put into vnode-fetch queue", pVnode->vgId, pMsg);
|
|
taosWriteQitem(pVnode->pFetchQ, pMsg);
|
|
break;
|
|
case WRITE_QUEUE:
|
|
if (!osDataSpaceSufficient()) {
|
|
terrno = TSDB_CODE_NO_ENOUGH_DISKSPACE;
|
|
code = terrno;
|
|
dError("vgId:%d, msg:%p put into vnode-write queue failed since %s", pVnode->vgId, pMsg, terrstr(code));
|
|
break;
|
|
}
|
|
if (pMsg->msgType == TDMT_VND_SUBMIT && (grantCheck(TSDB_GRANT_STORAGE) != TSDB_CODE_SUCCESS)) {
|
|
terrno = TSDB_CODE_VND_NO_WRITE_AUTH;
|
|
code = terrno;
|
|
dDebug("vgId:%d, msg:%p put into vnode-write queue failed since %s", pVnode->vgId, pMsg, terrstr(code));
|
|
break;
|
|
}
|
|
if (pMsg->msgType != TDMT_VND_ALTER_CONFIRM && pVnode->disable) {
|
|
dDebug("vgId:%d, msg:%p put into vnode-write queue failed since its disable", pVnode->vgId, pMsg);
|
|
terrno = TSDB_CODE_VND_STOPPED;
|
|
break;
|
|
}
|
|
dGTrace("vgId:%d, msg:%p put into vnode-write queue", pVnode->vgId, pMsg);
|
|
taosWriteQitem(pVnode->pWriteW.queue, pMsg);
|
|
break;
|
|
case SYNC_QUEUE:
|
|
dGTrace("vgId:%d, msg:%p put into vnode-sync queue", pVnode->vgId, pMsg);
|
|
taosWriteQitem(pVnode->pSyncW.queue, pMsg);
|
|
break;
|
|
case SYNC_RD_QUEUE:
|
|
dGTrace("vgId:%d, msg:%p put into vnode-sync-rd queue", pVnode->vgId, pMsg);
|
|
taosWriteQitem(pVnode->pSyncRdW.queue, pMsg);
|
|
break;
|
|
case APPLY_QUEUE:
|
|
dGTrace("vgId:%d, msg:%p put into vnode-apply queue", pVnode->vgId, pMsg);
|
|
taosWriteQitem(pVnode->pApplyW.queue, pMsg);
|
|
break;
|
|
default:
|
|
code = -1;
|
|
terrno = TSDB_CODE_INVALID_PARA;
|
|
break;
|
|
}
|
|
|
|
vmReleaseVnode(pMgmt, pVnode);
|
|
return code;
|
|
}
|
|
|
|
int32_t vmPutMsgToSyncRdQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_RD_QUEUE); }
|
|
|
|
int32_t vmPutMsgToSyncQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_QUEUE); }
|
|
|
|
int32_t vmPutMsgToWriteQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, WRITE_QUEUE); }
|
|
|
|
int32_t vmPutMsgToQueryQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, QUERY_QUEUE); }
|
|
|
|
int32_t vmPutMsgToFetchQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, FETCH_QUEUE); }
|
|
|
|
int32_t vmPutMsgToStreamQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, STREAM_QUEUE); }
|
|
|
|
int32_t vmPutMsgToMgmtQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
|
const STraceId *trace = &pMsg->info.traceId;
|
|
dGTrace("msg:%p, put into vnode-mgmt queue", pMsg);
|
|
taosWriteQitem(pMgmt->mgmtWorker.queue, pMsg);
|
|
return 0;
|
|
}
|
|
|
|
int32_t vmPutRpcMsgToQueue(SVnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) {
|
|
if (pRpc->contLen < sizeof(SMsgHead)) {
|
|
dError("invalid rpc msg with no msg head at pCont. pRpc:%p, type:%s, len:%d", pRpc, TMSG_INFO(pRpc->msgType),
|
|
pRpc->contLen);
|
|
rpcFreeCont(pRpc->pCont);
|
|
pRpc->pCont = NULL;
|
|
return -1;
|
|
}
|
|
|
|
SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg), RPC_QITEM, pRpc->contLen);
|
|
if (pMsg == NULL) {
|
|
rpcFreeCont(pRpc->pCont);
|
|
pRpc->pCont = NULL;
|
|
return -1;
|
|
}
|
|
|
|
SMsgHead *pHead = pRpc->pCont;
|
|
dTrace("vgId:%d, msg:%p is created, type:%s len:%d", pHead->vgId, pMsg, TMSG_INFO(pRpc->msgType), pRpc->contLen);
|
|
|
|
pHead->contLen = htonl(pHead->contLen);
|
|
pHead->vgId = htonl(pHead->vgId);
|
|
memcpy(pMsg, pRpc, sizeof(SRpcMsg));
|
|
pRpc->pCont = NULL;
|
|
|
|
int32_t code = vmPutMsgToQueue(pMgmt, pMsg, qtype);
|
|
if (code != 0) {
|
|
dTrace("msg:%p, is freed", pMsg);
|
|
rpcFreeCont(pMsg->pCont);
|
|
taosFreeQitem(pMsg);
|
|
}
|
|
|
|
return code;
|
|
}
|
|
|
|
int32_t vmGetQueueSize(SVnodeMgmt *pMgmt, int32_t vgId, EQueueType qtype) {
|
|
int32_t size = -1;
|
|
SVnodeObj *pVnode = vmAcquireVnode(pMgmt, vgId);
|
|
if (pVnode != NULL) {
|
|
switch (qtype) {
|
|
case WRITE_QUEUE:
|
|
size = taosQueueItemSize(pVnode->pWriteW.queue);
|
|
break;
|
|
case SYNC_QUEUE:
|
|
size = taosQueueItemSize(pVnode->pSyncW.queue);
|
|
break;
|
|
case APPLY_QUEUE:
|
|
size = taosQueueItemSize(pVnode->pApplyW.queue);
|
|
break;
|
|
case QUERY_QUEUE:
|
|
size = taosQueueItemSize(pVnode->pQueryQ);
|
|
break;
|
|
case FETCH_QUEUE:
|
|
size = taosQueueItemSize(pVnode->pFetchQ);
|
|
break;
|
|
case STREAM_QUEUE:
|
|
size = taosQueueItemSize(pVnode->pStreamQ);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
vmReleaseVnode(pMgmt, pVnode);
|
|
}
|
|
if (size < 0) {
|
|
dTrace("vgId:%d, can't get size from queue since %s, qtype:%d", vgId, terrstr(), qtype);
|
|
size = 0;
|
|
}
|
|
return size;
|
|
}
|
|
|
|
int32_t vmAllocQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) {
|
|
SMultiWorkerCfg wcfg = {.max = 1, .name = "vnode-write", .fp = (FItems)vnodeProposeWriteMsg, .param = pVnode->pImpl};
|
|
SMultiWorkerCfg scfg = {.max = 1, .name = "vnode-sync", .fp = (FItems)vmProcessSyncQueue, .param = pVnode};
|
|
SMultiWorkerCfg sccfg = {.max = 1, .name = "vnode-sync-rd", .fp = (FItems)vmProcessSyncQueue, .param = pVnode};
|
|
SMultiWorkerCfg acfg = {.max = 1, .name = "vnode-apply", .fp = (FItems)vnodeApplyWriteMsg, .param = pVnode->pImpl};
|
|
(void)tMultiWorkerInit(&pVnode->pWriteW, &wcfg);
|
|
(void)tMultiWorkerInit(&pVnode->pSyncW, &scfg);
|
|
(void)tMultiWorkerInit(&pVnode->pSyncRdW, &sccfg);
|
|
(void)tMultiWorkerInit(&pVnode->pApplyW, &acfg);
|
|
|
|
pVnode->pQueryQ = tQWorkerAllocQueue(&pMgmt->queryPool, pVnode, (FItem)vmProcessQueryQueue);
|
|
pVnode->pStreamQ = tAutoQWorkerAllocQueue(&pMgmt->streamPool, pVnode, (FItem)vmProcessStreamQueue);
|
|
pVnode->pFetchQ = tWWorkerAllocQueue(&pMgmt->fetchPool, pVnode, (FItems)vmProcessFetchQueue);
|
|
|
|
if (pVnode->pWriteW.queue == NULL || pVnode->pSyncW.queue == NULL || pVnode->pSyncRdW.queue == NULL ||
|
|
pVnode->pApplyW.queue == NULL || pVnode->pQueryQ == NULL || pVnode->pStreamQ == NULL || pVnode->pFetchQ == NULL) {
|
|
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
|
return -1;
|
|
}
|
|
|
|
dInfo("vgId:%d, write-queue:%p is alloced, thread:%08" PRId64, pVnode->vgId, pVnode->pWriteW.queue,
|
|
pVnode->pWriteW.queue->threadId);
|
|
dInfo("vgId:%d, sync-queue:%p is alloced, thread:%08" PRId64, pVnode->vgId, pVnode->pSyncW.queue,
|
|
pVnode->pSyncW.queue->threadId);
|
|
dInfo("vgId:%d, sync-rd-queue:%p is alloced, thread:%08" PRId64, pVnode->vgId, pVnode->pSyncRdW.queue,
|
|
pVnode->pSyncRdW.queue->threadId);
|
|
dInfo("vgId:%d, apply-queue:%p is alloced, thread:%08" PRId64, pVnode->vgId, pVnode->pApplyW.queue,
|
|
pVnode->pApplyW.queue->threadId);
|
|
dInfo("vgId:%d, query-queue:%p is alloced", pVnode->vgId, pVnode->pQueryQ);
|
|
dInfo("vgId:%d, fetch-queue:%p is alloced, thread:%08" PRId64, pVnode->vgId, pVnode->pFetchQ,
|
|
pVnode->pFetchQ->threadId);
|
|
dInfo("vgId:%d, stream-queue:%p is alloced", pVnode->vgId, pVnode->pStreamQ);
|
|
return 0;
|
|
}
|
|
|
|
void vmFreeQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) {
|
|
tQWorkerFreeQueue(&pMgmt->queryPool, pVnode->pQueryQ);
|
|
tAutoQWorkerFreeQueue(&pMgmt->streamPool, pVnode->pStreamQ);
|
|
tWWorkerFreeQueue(&pMgmt->fetchPool, pVnode->pFetchQ);
|
|
pVnode->pQueryQ = NULL;
|
|
pVnode->pStreamQ = NULL;
|
|
pVnode->pFetchQ = NULL;
|
|
dDebug("vgId:%d, queue is freed", pVnode->vgId);
|
|
}
|
|
|
|
int32_t vmStartWorker(SVnodeMgmt *pMgmt) {
|
|
SQWorkerPool *pQPool = &pMgmt->queryPool;
|
|
pQPool->name = "vnode-query";
|
|
pQPool->min = tsNumOfVnodeQueryThreads;
|
|
pQPool->max = tsNumOfVnodeQueryThreads;
|
|
if (tQWorkerInit(pQPool) != 0) return -1;
|
|
|
|
SAutoQWorkerPool *pStreamPool = &pMgmt->streamPool;
|
|
pStreamPool->name = "vnode-stream";
|
|
pStreamPool->ratio = tsRatioOfVnodeStreamThreads;
|
|
if (tAutoQWorkerInit(pStreamPool) != 0) return -1;
|
|
|
|
SWWorkerPool *pFPool = &pMgmt->fetchPool;
|
|
pFPool->name = "vnode-fetch";
|
|
pFPool->max = tsNumOfVnodeFetchThreads;
|
|
if (tWWorkerInit(pFPool) != 0) return -1;
|
|
|
|
SSingleWorkerCfg mgmtCfg = {
|
|
.min = 1,
|
|
.max = 1,
|
|
.name = "vnode-mgmt",
|
|
.fp = (FItem)vmProcessMgmtQueue,
|
|
.param = pMgmt,
|
|
};
|
|
if (tSingleWorkerInit(&pMgmt->mgmtWorker, &mgmtCfg) != 0) return -1;
|
|
|
|
dDebug("vnode workers are initialized");
|
|
return 0;
|
|
}
|
|
|
|
void vmStopWorker(SVnodeMgmt *pMgmt) {
|
|
tQWorkerCleanup(&pMgmt->queryPool);
|
|
tAutoQWorkerCleanup(&pMgmt->streamPool);
|
|
tWWorkerCleanup(&pMgmt->fetchPool);
|
|
dDebug("vnode workers are closed");
|
|
}
|