From 7f3b15020cd2a82eecfb9503016d67e8431f5a1a Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 11 Apr 2022 18:13:55 +0800 Subject: [PATCH 1/4] fix(cluster): get monitor info in multi-process mode --- include/libs/monitor/monitor.h | 7 ++- include/os/osSysinfo.h | 2 +- source/dnode/mgmt/dm/dmMonitor.c | 19 +++---- source/dnode/mgmt/qm/qmWorker.c | 2 +- source/dnode/mnode/impl/src/mndDnode.c | 2 + source/libs/monitor/src/monMain.c | 3 + source/libs/monitor/src/monMsg.c | 14 ++--- source/os/src/osSysinfo.c | 76 +++++++++++--------------- tests/script/tmp/monitor.sim | 35 ++++++++++++ 9 files changed, 94 insertions(+), 66 deletions(-) create mode 100644 tests/script/tmp/monitor.sim diff --git a/include/libs/monitor/monitor.h b/include/libs/monitor/monitor.h index f5080fbe7b..af0580674d 100644 --- a/include/libs/monitor/monitor.h +++ b/include/libs/monitor/monitor.h @@ -78,6 +78,9 @@ typedef struct { typedef struct { float uptime; // day int8_t has_mnode; + int8_t has_qnode; + int8_t has_snode; + int8_t has_bnode; SMonDiskDesc logdir; SMonDiskDesc tempdir; } SMonDnodeInfo; @@ -134,8 +137,8 @@ typedef struct { typedef struct { int32_t expire_time; - int32_t timeseries_used; - int32_t timeseries_total; + int64_t timeseries_used; + int64_t timeseries_total; } SMonGrantInfo; typedef struct { diff --git a/include/os/osSysinfo.h b/include/os/osSysinfo.h index 022f11bb0e..c009bcf350 100644 --- a/include/os/osSysinfo.h +++ b/include/os/osSysinfo.h @@ -39,7 +39,7 @@ int32_t taosGetEmail(char *email, int32_t maxLen); int32_t taosGetOsReleaseName(char *releaseName, int32_t maxLen); int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores); int32_t taosGetCpuCores(float *numOfCores); -int32_t taosGetCpuUsage(double *cpu_system, double *cpu_engine); +void taosGetCpuUsage(double *cpu_system, double *cpu_engine); int32_t taosGetTotalMemory(int64_t *totalKB); int32_t taosGetProcMemory(int64_t *usedKB); int32_t taosGetSysMemory(int64_t *usedKB); diff --git a/source/dnode/mgmt/dm/dmMonitor.c b/source/dnode/mgmt/dm/dmMonitor.c index fb5855070c..6edf85106b 100644 --- a/source/dnode/mgmt/dm/dmMonitor.c +++ b/source/dnode/mgmt/dm/dmMonitor.c @@ -25,11 +25,10 @@ static void dmGetMonitorBasicInfo(SDnode *pDnode, SMonBasicInfo *pInfo) { static void dmGetMonitorDnodeInfo(SDnode *pDnode, SMonDnodeInfo *pInfo) { pInfo->uptime = (taosGetTimestampMs() - pDnode->rebootTime) / (86400000.0f); - SMgmtWrapper *pWrapper = dndAcquireWrapper(pDnode, MNODE); - if (pWrapper != NULL) { - pInfo->has_mnode = pWrapper->required; - dndReleaseWrapper(pWrapper); - } + pInfo->has_mnode = pDnode->wrappers[MNODE].required; + pInfo->has_qnode = pDnode->wrappers[QNODE].required; + pInfo->has_snode = pDnode->wrappers[SNODE].required; + pInfo->has_bnode = pDnode->wrappers[BNODE].required; tstrncpy(pInfo->logdir.name, tsLogDir, sizeof(pInfo->logdir.name)); pInfo->logdir.size = tsLogSpace.size; tstrncpy(pInfo->tempdir.name, tsTempDir, sizeof(pInfo->tempdir.name)); @@ -65,7 +64,7 @@ void dmSendMonitorReport(SDnode *pDnode) { bool getFromAPI = !tsMultiProcess; pWrapper = &pDnode->wrappers[MNODE]; if (getFromAPI) { - if (dndMarkWrapper(pWrapper) != 0) { + if (dndMarkWrapper(pWrapper) == 0) { mmGetMonitorInfo(pWrapper, &mmInfo); dndReleaseWrapper(pWrapper); } @@ -82,7 +81,7 @@ void dmSendMonitorReport(SDnode *pDnode) { pWrapper = &pDnode->wrappers[VNODES]; if (getFromAPI) { - if (dndMarkWrapper(pWrapper) != 0) { + if (dndMarkWrapper(pWrapper) == 0) { vmGetMonitorInfo(pWrapper, &vmInfo); dndReleaseWrapper(pWrapper); } @@ -99,7 +98,7 @@ void dmSendMonitorReport(SDnode *pDnode) { pWrapper = &pDnode->wrappers[QNODE]; if (getFromAPI) { - if (dndMarkWrapper(pWrapper) != 0) { + if (dndMarkWrapper(pWrapper) == 0) { qmGetMonitorInfo(pWrapper, &qmInfo); dndReleaseWrapper(pWrapper); } @@ -116,7 +115,7 @@ void dmSendMonitorReport(SDnode *pDnode) { pWrapper = &pDnode->wrappers[SNODE]; if (getFromAPI) { - if (dndMarkWrapper(pWrapper) != 0) { + if (dndMarkWrapper(pWrapper) == 0) { smGetMonitorInfo(pWrapper, &smInfo); dndReleaseWrapper(pWrapper); } @@ -133,7 +132,7 @@ void dmSendMonitorReport(SDnode *pDnode) { pWrapper = &pDnode->wrappers[BNODE]; if (getFromAPI) { - if (dndMarkWrapper(pWrapper) != 0) { + if (dndMarkWrapper(pWrapper) == 0) { bmGetMonitorInfo(pWrapper, &bmInfo); dndReleaseWrapper(pWrapper); } diff --git a/source/dnode/mgmt/qm/qmWorker.c b/source/dnode/mgmt/qm/qmWorker.c index 974052cdf6..6b27af4fbd 100644 --- a/source/dnode/mgmt/qm/qmWorker.c +++ b/source/dnode/mgmt/qm/qmWorker.c @@ -32,7 +32,7 @@ static void qmProcessMonitorQueue(SQueueInfo *pInfo, SNodeMsg *pMsg) { SRpcMsg *pRpc = &pMsg->rpcMsg; int32_t code = -1; - if (pMsg->rpcMsg.msgType == TDMT_MON_SM_INFO) { + if (pMsg->rpcMsg.msgType == TDMT_MON_QM_INFO) { code = qmProcessGetMonQmInfoReq(pMgmt->pWrapper, pMsg); } else { terrno = TSDB_CODE_MSG_NOT_PROCESSED; diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 86ec49127a..30ddfbe44c 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -330,9 +330,11 @@ static int32_t mndProcessStatusReq(SNodeMsg *pReq) { SVgObj *pVgroup = mndAcquireVgroup(pMnode, pVload->vgId); if (pVgroup != NULL) { + mInfo("------>0 vgId:%d, numOfTimeSeries:%" PRId64, pVgroup->vgId, pVload->numOfTimeSeries); if (pVload->role == TAOS_SYNC_STATE_LEADER) { pVgroup->numOfTables = pVload->numOfTables; pVgroup->numOfTimeSeries = pVload->numOfTimeSeries; + mInfo("------>1 vgId:%d, numOfTimeSeries:%" PRId64, pVgroup->vgId, pVgroup->numOfTimeSeries); pVgroup->totalStorage = pVload->totalStorage; pVgroup->compStorage = pVload->compStorage; pVgroup->pointsWritten = pVload->pointsWritten; diff --git a/source/libs/monitor/src/monMain.c b/source/libs/monitor/src/monMain.c index c90b1f58e8..4e8e1e3b62 100644 --- a/source/libs/monitor/src/monMain.c +++ b/source/libs/monitor/src/monMain.c @@ -375,6 +375,9 @@ static void monGenDnodeJson(SMonInfo *pMonitor) { tjsonAddDoubleToObject(pJson, "vnodes_num", pStat->totalVnodes); tjsonAddDoubleToObject(pJson, "masters", pStat->masterNum); tjsonAddDoubleToObject(pJson, "has_mnode", pInfo->has_mnode); + tjsonAddDoubleToObject(pJson, "has_qnode", pInfo->has_qnode); + tjsonAddDoubleToObject(pJson, "has_snode", pInfo->has_snode); + tjsonAddDoubleToObject(pJson, "has_bnode", pInfo->has_bnode); } static void monGenDiskJson(SMonInfo *pMonitor) { diff --git a/source/libs/monitor/src/monMsg.c b/source/libs/monitor/src/monMsg.c index 3aafcf071d..adacbf479b 100644 --- a/source/libs/monitor/src/monMsg.c +++ b/source/libs/monitor/src/monMsg.c @@ -194,9 +194,9 @@ int32_t tDecodeSMonVgroupInfo(SCoder *decoder, SMonVgroupInfo *pInfo) { if (tDecodeCStrTo(decoder, desc.database_name) < 0) return -1; if (tDecodeCStrTo(decoder, desc.status) < 0) return -1; for (int32_t j = 0; j < TSDB_MAX_REPLICA; ++j) { - SMonVnodeDesc vdesc = {0}; - if (tDecodeI32(decoder, &vdesc.dnode_id) < 0) return -1; - if (tDecodeCStrTo(decoder, vdesc.vnode_role) < 0) return -1; + SMonVnodeDesc *pVDesc = &desc.vnodes[j]; + if (tDecodeI32(decoder, &pVDesc->dnode_id) < 0) return -1; + if (tDecodeCStrTo(decoder, pVDesc->vnode_role) < 0) return -1; } taosArrayPush(pInfo->vgroups, &desc); } @@ -205,15 +205,15 @@ int32_t tDecodeSMonVgroupInfo(SCoder *decoder, SMonVgroupInfo *pInfo) { int32_t tEncodeSMonGrantInfo(SCoder *encoder, const SMonGrantInfo *pInfo) { if (tEncodeI32(encoder, pInfo->expire_time) < 0) return -1; - if (tEncodeI32(encoder, pInfo->timeseries_used) < 0) return -1; - if (tEncodeI32(encoder, pInfo->timeseries_total) < 0) return -1; + if (tEncodeI64(encoder, pInfo->timeseries_used) < 0) return -1; + if (tEncodeI64(encoder, pInfo->timeseries_total) < 0) return -1; return 0; } int32_t tDecodeSMonGrantInfo(SCoder *decoder, SMonGrantInfo *pInfo) { if (tDecodeI32(decoder, &pInfo->expire_time) < 0) return -1; - if (tDecodeI32(decoder, &pInfo->timeseries_used) < 0) return -1; - if (tDecodeI32(decoder, &pInfo->timeseries_total) < 0) return -1; + if (tDecodeI64(decoder, &pInfo->timeseries_used) < 0) return -1; + if (tDecodeI64(decoder, &pInfo->timeseries_total) < 0) return -1; return 0; } diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index 26de26ab67..4ffbc13fb3 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -369,53 +369,33 @@ int32_t taosGetCpuCores(float *numOfCores) { #endif } -int32_t taosGetCpuUsage(double *cpu_system, double *cpu_engine) { -#if defined(_TD_WINDOWS_64) || defined(_TD_WINDOWS_32) +void taosGetCpuUsage(double *cpu_system, double *cpu_engine) { + static int64_t lastSysUsed = 0; + static int64_t lastSysTotal = 0; + static int64_t lastProcTotal = 0; + static int64_t curSysUsed = 0; + static int64_t curSysTotal = 0; + static int64_t curProcTotal = 0; + *cpu_system = 0; *cpu_engine = 0; - return 0; -#elif defined(_TD_DARWIN_64) - *cpu_system = 0; - *cpu_engine = 0; - return 0; -#else - static uint64_t lastSysUsed = 0; - static uint64_t lastSysTotal = 0; - static uint64_t lastProcTotal = 0; - SysCpuInfo sysCpu; - ProcCpuInfo procCpu; - if (taosGetSysCpuInfo(&sysCpu) != 0) { - return -1; + SysCpuInfo sysCpu = {0}; + ProcCpuInfo procCpu = {0}; + if (taosGetSysCpuInfo(&sysCpu) == 0 && taosGetProcCpuInfo(&procCpu) == 0) { + curSysUsed = sysCpu.user + sysCpu.nice + sysCpu.system; + curSysTotal = curSysUsed + sysCpu.idle; + curProcTotal = procCpu.utime + procCpu.stime + procCpu.cutime + procCpu.cstime; + + if (curSysTotal > lastSysTotal && curSysUsed >= lastSysUsed && curProcTotal >= lastProcTotal) { + *cpu_engine = (curSysUsed - lastSysUsed) / (double)(curSysTotal - lastSysTotal) * 100; + *cpu_system = (curProcTotal - lastProcTotal) / (double)(curSysTotal - lastSysTotal) * 100; + } + + lastSysUsed = curSysUsed; + lastSysTotal = curSysTotal; + lastProcTotal = curProcTotal; } - if (taosGetProcCpuInfo(&procCpu) != 0) { - return -1; - } - - uint64_t curSysUsed = sysCpu.user + sysCpu.nice + sysCpu.system; - uint64_t curSysTotal = curSysUsed + sysCpu.idle; - uint64_t curProcTotal = procCpu.utime + procCpu.stime + procCpu.cutime + procCpu.cstime; - - if (lastSysUsed == 0 || lastSysTotal == 0 || lastProcTotal == 0) { - lastSysUsed = curSysUsed > 1 ? curSysUsed : 1; - lastSysTotal = curSysTotal > 1 ? curSysTotal : 1; - lastProcTotal = curProcTotal > 1 ? curProcTotal : 1; - return -1; - } - - if (curSysTotal == lastSysTotal) { - return -1; - } - - *cpu_engine = (curSysUsed - lastSysUsed) / (double)(curSysTotal - lastSysTotal) * 100; - *cpu_system = (curProcTotal - lastProcTotal) / (double)(curSysTotal - lastSysTotal) * 100; - - lastSysUsed = curSysUsed; - lastSysTotal = curSysTotal; - lastProcTotal = curProcTotal; - - return 0; -#endif } int32_t taosGetTotalMemory(int64_t *totalKB) { @@ -618,7 +598,6 @@ void taosGetProcIODelta(int64_t *rchars, int64_t *wchars, int64_t *read_bytes, i static int64_t last_wchars = 0; static int64_t last_read_bytes = 0; static int64_t last_write_bytes = 0; - static int64_t cur_rchars = 0; static int64_t cur_wchars = 0; static int64_t cur_read_bytes = 0; @@ -632,6 +611,11 @@ void taosGetProcIODelta(int64_t *rchars, int64_t *wchars, int64_t *read_bytes, i last_wchars = cur_wchars; last_read_bytes = cur_read_bytes; last_write_bytes = cur_write_bytes; + } else { + *rchars = 0; + *wchars = 0; + *read_bytes = 0; + *write_bytes = 0; } } @@ -693,7 +677,6 @@ int32_t taosGetCardInfo(int64_t *receive_bytes, int64_t *transmit_bytes) { void taosGetCardInfoDelta(int64_t *receive_bytes, int64_t *transmit_bytes) { static int64_t last_receive_bytes = 0; static int64_t last_transmit_bytes = 0; - static int64_t cur_receive_bytes = 0; static int64_t cur_transmit_bytes = 0; if (taosGetCardInfo(&cur_receive_bytes, &cur_transmit_bytes) == 0) { @@ -701,6 +684,9 @@ void taosGetCardInfoDelta(int64_t *receive_bytes, int64_t *transmit_bytes) { *transmit_bytes = cur_transmit_bytes - last_transmit_bytes; last_receive_bytes = cur_receive_bytes; last_transmit_bytes = cur_transmit_bytes; + } else { + *receive_bytes = 0; + *transmit_bytes = 0; } } diff --git a/tests/script/tmp/monitor.sim b/tests/script/tmp/monitor.sim new file mode 100644 index 0000000000..ba98bec2d0 --- /dev/null +++ b/tests/script/tmp/monitor.sim @@ -0,0 +1,35 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/cfg.sh -n dnode1 -c monitorfqdn -v localhost +system sh/cfg.sh -n dnode1 -c monitorport -v 80 +system sh/cfg.sh -n dnode1 -c monitorInterval -v 1 +system sh/cfg.sh -n dnode1 -c monitorComp -v 1 +#system sh/cfg.sh -n dnode1 -c supportVnodes -v 128 + +system sh/exec.sh -n dnode1 -s start +sql connect + +print =============== show dnodes +sleep 2000 +sql create database db vgroups 2; +sleep 2000 + +print =============== create drop qnode 1 +sql create qnode on dnode 1 +sql create snode on dnode 1 +sql create bnode on dnode 1 + +return +print =============== restart +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start + + +return +system sh/deploy.sh -n dnode2 -i 2 +system sh/exec.sh -n dnode2 -s start +system sh/exec.sh -n dnode2 -s stop -x SIGINT +system sh/exec.sh -n dnode2 -s start + +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode2 -s stop -x SIGINT From 34286624f35df6a5763a51612334553c7bae2509 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 11 Apr 2022 19:47:24 +0800 Subject: [PATCH 2/4] fix(cluster): delete redundant logs --- source/dnode/mnode/impl/src/mndDnode.c | 2 -- source/libs/monitor/src/monMain.c | 2 +- source/libs/transport/src/thttp.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 30ddfbe44c..86ec49127a 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -330,11 +330,9 @@ static int32_t mndProcessStatusReq(SNodeMsg *pReq) { SVgObj *pVgroup = mndAcquireVgroup(pMnode, pVload->vgId); if (pVgroup != NULL) { - mInfo("------>0 vgId:%d, numOfTimeSeries:%" PRId64, pVgroup->vgId, pVload->numOfTimeSeries); if (pVload->role == TAOS_SYNC_STATE_LEADER) { pVgroup->numOfTables = pVload->numOfTables; pVgroup->numOfTimeSeries = pVload->numOfTimeSeries; - mInfo("------>1 vgId:%d, numOfTimeSeries:%" PRId64, pVgroup->vgId, pVgroup->numOfTimeSeries); pVgroup->totalStorage = pVload->totalStorage; pVgroup->compStorage = pVload->compStorage; pVgroup->pointsWritten = pVload->pointsWritten; diff --git a/source/libs/monitor/src/monMain.c b/source/libs/monitor/src/monMain.c index 4e8e1e3b62..3ece089a28 100644 --- a/source/libs/monitor/src/monMain.c +++ b/source/libs/monitor/src/monMain.c @@ -533,7 +533,7 @@ void monSendReport() { if (pCont != NULL) { EHttpCompFlag flag = tsMonitor.cfg.comp ? HTTP_GZIP : HTTP_FLAT; if (taosSendHttpReport(tsMonitor.cfg.server, tsMonitor.cfg.port, pCont, strlen(pCont), flag) != 0) { - uError("failed to send monitor msg since %s", terrstr()); + uError("failed to send monitor msg"); } taosMemoryFree(pCont); } diff --git a/source/libs/transport/src/thttp.c b/source/libs/transport/src/thttp.c index cd1fbf8e0e..c747e69339 100644 --- a/source/libs/transport/src/thttp.c +++ b/source/libs/transport/src/thttp.c @@ -164,8 +164,8 @@ int32_t taosSendHttpReport(const char* server, uint16_t port, char* pCont, int32 wb[1] = uv_buf_init((char*)pCont, contLen); connect->data = wb; - uv_tcp_connect(connect, &socket_tcp, (const struct sockaddr*)&dest, clientConnCb); terrno = 0; + uv_tcp_connect(connect, &socket_tcp, (const struct sockaddr*)&dest, clientConnCb); uv_run(loop, UV_RUN_DEFAULT); uv_loop_close(loop); taosMemoryFree(connect); From d98e87de5ac05cecfcc542832d91201df2f65516 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 12 Apr 2022 13:20:34 +0800 Subject: [PATCH 3/4] fix(cluster): the log is printed twice in multi-process mode --- source/os/src/osProc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/os/src/osProc.c b/source/os/src/osProc.c index 1cdd41ad78..b6de638ac2 100644 --- a/source/os/src/osProc.c +++ b/source/os/src/osProc.c @@ -24,7 +24,7 @@ int32_t taosNewProc(char **args) { if (pid == 0) { args[0] = tsProcPath; // close(STDIN_FILENO); - close(STDOUT_FILENO); + // close(STDOUT_FILENO); // close(STDERR_FILENO); return execvp(tsProcPath, args); } else { From 0094f30140f20dca2f7a6a062743918068ea2e7b Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 12 Apr 2022 15:27:17 +0800 Subject: [PATCH 4/4] refact(cluster): modify node management code --- source/dnode/mgmt/bm/bmHandle.c | 3 +- source/dnode/mgmt/inc/bmInt.h | 2 +- source/dnode/mgmt/inc/dndInt.h | 1 + source/dnode/mgmt/main/dndExec.c | 195 ++++++++++++++++++------------- source/dnode/mgmt/mm/mmHandle.c | 1 + source/dnode/mgmt/qm/qmHandle.c | 3 +- source/dnode/mgmt/sm/smHandle.c | 3 +- 7 files changed, 126 insertions(+), 82 deletions(-) diff --git a/source/dnode/mgmt/bm/bmHandle.c b/source/dnode/mgmt/bm/bmHandle.c index 645a1d09c2..d110592603 100644 --- a/source/dnode/mgmt/bm/bmHandle.c +++ b/source/dnode/mgmt/bm/bmHandle.c @@ -58,7 +58,7 @@ int32_t bmProcessCreateReq(SMgmtWrapper *pWrapper, SNodeMsg *pMsg) { dError("failed to create bnode since %s, input:%d cur:%d", terrstr(), createReq.dnodeId, pDnode->dnodeId); return -1; } else { - return bmOpen(pWrapper); + return dndOpenNode(pWrapper); } } @@ -77,6 +77,7 @@ int32_t bmProcessDropReq(SMgmtWrapper *pWrapper, SNodeMsg *pMsg) { dError("failed to drop bnode since %s", terrstr()); return -1; } else { + // dndCloseNode(pWrapper); return bmDrop(pWrapper); } } diff --git a/source/dnode/mgmt/inc/bmInt.h b/source/dnode/mgmt/inc/bmInt.h index 84a6a53e99..3158fe7d34 100644 --- a/source/dnode/mgmt/inc/bmInt.h +++ b/source/dnode/mgmt/inc/bmInt.h @@ -27,7 +27,7 @@ extern "C" { typedef struct SBnodeMgmt { SBnode *pBnode; SDnode *pDnode; - SMgmtWrapper *pWrapper; + SMgmtWrapper *pWrapper; const char *path; SMultiWorker writeWorker; SSingleWorker monitorWorker; diff --git a/source/dnode/mgmt/inc/dndInt.h b/source/dnode/mgmt/inc/dndInt.h index 7faf1e4276..a38fe87b59 100644 --- a/source/dnode/mgmt/inc/dndInt.h +++ b/source/dnode/mgmt/inc/dndInt.h @@ -126,6 +126,7 @@ typedef struct SDnode { int32_t numOfDisks; uint16_t serverPort; bool dropped; + EProcType procType; EDndType ntype; EDndStatus status; EDndEvent event; diff --git a/source/dnode/mgmt/main/dndExec.c b/source/dnode/mgmt/main/dndExec.c index 6c0d0456c9..51569d2ed4 100644 --- a/source/dnode/mgmt/main/dndExec.c +++ b/source/dnode/mgmt/main/dndExec.c @@ -27,46 +27,42 @@ static bool dndRequireNode(SMgmtWrapper *pWrapper) { return required; } -int32_t dndOpenNode(SMgmtWrapper *pWrapper) { - if (taosMkDir(pWrapper->path) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("node:%s, failed to create dir:%s since %s", pWrapper->name, pWrapper->path, terrstr()); +static int32_t dndInitNodeProc(SMgmtWrapper *pWrapper) { + int32_t shmsize = tsMnodeShmSize; + if (pWrapper->ntype == VNODES) { + shmsize = tsVnodeShmSize; + } else if (pWrapper->ntype == QNODE) { + shmsize = tsQnodeShmSize; + } else if (pWrapper->ntype == SNODE) { + shmsize = tsSnodeShmSize; + } else if (pWrapper->ntype == MNODE) { + shmsize = tsMnodeShmSize; + } else if (pWrapper->ntype == BNODE) { + shmsize = tsBnodeShmSize; + } else { return -1; } - if ((*pWrapper->fp.openFp)(pWrapper) != 0) { - dError("node:%s, failed to open since %s", pWrapper->name, terrstr()); + if (taosCreateShm(&pWrapper->shm, pWrapper->ntype, shmsize) != 0) { + terrno = TAOS_SYSTEM_ERROR(terrno); + dError("node:%s, failed to create shm size:%d since %s", pWrapper->name, shmsize, terrstr()); + return -1; + } + dInfo("node:%s, shm:%d is created, size:%d", pWrapper->name, pWrapper->shm.id, shmsize); + + SProcCfg cfg = dndGenProcCfg(pWrapper); + cfg.isChild = false; + pWrapper->procType = PROC_PARENT; + pWrapper->pProc = taosProcInit(&cfg); + if (pWrapper->pProc == NULL) { + dError("node:%s, failed to create proc since %s", pWrapper->name, terrstr()); return -1; } - dDebug("node:%s, has been opened", pWrapper->name); - pWrapper->deployed = true; return 0; } -void dndCloseNode(SMgmtWrapper *pWrapper) { - dDebug("node:%s, mgmt start to close", pWrapper->name); - pWrapper->required = false; - taosWLockLatch(&pWrapper->latch); - if (pWrapper->deployed) { - (*pWrapper->fp.closeFp)(pWrapper); - pWrapper->deployed = false; - } - taosWUnLockLatch(&pWrapper->latch); - - while (pWrapper->refCount > 0) { - taosMsleep(10); - } - - if (pWrapper->pProc) { - taosProcCleanup(pWrapper->pProc); - pWrapper->pProc = NULL; - } - dDebug("node:%s, mgmt has been closed", pWrapper->name); -} - - -static int32_t dndNewProc(SMgmtWrapper *pWrapper, EDndType n) { +static int32_t dndNewNodeProc(SMgmtWrapper *pWrapper, EDndType n) { char tstr[8] = {0}; char *args[6] = {0}; snprintf(tstr, sizeof(tstr), "%d", n); @@ -88,6 +84,86 @@ static int32_t dndNewProc(SMgmtWrapper *pWrapper, EDndType n) { return 0; } +static int32_t dndRunNodeProc(SMgmtWrapper *pWrapper) { + if (pWrapper->pDnode->ntype == NODE_MAX) { + dInfo("node:%s, should be started manually", pWrapper->name); + } else { + if (dndNewNodeProc(pWrapper, pWrapper->ntype) != 0) { + return -1; + } + } + + if (taosProcRun(pWrapper->pProc) != 0) { + dError("node:%s, failed to run proc since %s", pWrapper->name, terrstr()); + return -1; + } + + return 0; +} + +static int32_t dndOpenNodeImp(SMgmtWrapper *pWrapper) { + if (taosMkDir(pWrapper->path) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + dError("node:%s, failed to create dir:%s since %s", pWrapper->name, pWrapper->path, terrstr()); + return -1; + } + + if ((*pWrapper->fp.openFp)(pWrapper) != 0) { + dError("node:%s, failed to open since %s", pWrapper->name, terrstr()); + return -1; + } + + dDebug("node:%s, has been opened", pWrapper->name); + pWrapper->deployed = true; + return 0; +} + +int32_t dndOpenNode(SMgmtWrapper *pWrapper) { + SDnode *pDnode = pWrapper->pDnode; + if (pDnode->procType == PROC_SINGLE) { + return dndOpenNodeImp(pWrapper); + } else if (pDnode->procType == PROC_PARENT) { + if (dndInitNodeProc(pWrapper) != 0) return -1; + if (dndWriteShmFile(pDnode) != 0) return -1; + if (dndRunNodeProc(pWrapper) != 0) return -1; + } + return 0; +} + +static void dndCloseNodeImp(SMgmtWrapper *pWrapper) { + dDebug("node:%s, mgmt start to close", pWrapper->name); + pWrapper->required = false; + taosWLockLatch(&pWrapper->latch); + if (pWrapper->deployed) { + (*pWrapper->fp.closeFp)(pWrapper); + pWrapper->deployed = false; + } + taosWUnLockLatch(&pWrapper->latch); + + while (pWrapper->refCount > 0) { + taosMsleep(10); + } + + if (pWrapper->pProc) { + taosProcCleanup(pWrapper->pProc); + pWrapper->pProc = NULL; + } + dDebug("node:%s, mgmt has been closed", pWrapper->name); +} + +void dndCloseNode(SMgmtWrapper *pWrapper) { + if (pWrapper->pDnode->procType == PROC_PARENT) { + if (pWrapper->procId > 0 && taosProcExist(pWrapper->procId)) { + dInfo("node:%s, send kill signal to the child process:%d", pWrapper->name, pWrapper->procId); + taosKillProc(pWrapper->procId); + dInfo("node:%s, wait for child process:%d to stop", pWrapper->name, pWrapper->procId); + taosWaitProc(pWrapper->procId); + dInfo("node:%s, child process:%d is stopped", pWrapper->name, pWrapper->procId); + } + } + dndCloseNodeImp(pWrapper); +} + static void dndProcessProcHandle(void *handle) { dWarn("handle:%p, the child process dies and send an offline rsp", handle); SRpcMsg rpcMsg = {.handle = handle, .code = TSDB_CODE_NODE_OFFLINE}; @@ -96,13 +172,14 @@ static void dndProcessProcHandle(void *handle) { static int32_t dndRunInSingleProcess(SDnode *pDnode) { dInfo("dnode run in single process"); + pDnode->procType = PROC_SINGLE; for (EDndType n = DNODE; n < NODE_MAX; ++n) { SMgmtWrapper *pWrapper = &pDnode->wrappers[n]; pWrapper->required = dndRequireNode(pWrapper); if (!pWrapper->required) continue; - if (dndOpenNode(pWrapper) != 0) { + if (dndOpenNodeImp(pWrapper) != 0) { dError("node:%s, failed to start since %s", pWrapper->name, terrstr()); return -1; } @@ -136,8 +213,10 @@ static int32_t dndRunInSingleProcess(SDnode *pDnode) { static int32_t dndRunInParentProcess(SDnode *pDnode) { dInfo("dnode run in parent process"); + pDnode->procType = PROC_PARENT; + SMgmtWrapper *pDWrapper = &pDnode->wrappers[DNODE]; - if (dndOpenNode(pDWrapper) != 0) { + if (dndOpenNodeImp(pDWrapper) != 0) { dError("node:%s, failed to start since %s", pDWrapper->name, terrstr()); return -1; } @@ -146,36 +225,7 @@ static int32_t dndRunInParentProcess(SDnode *pDnode) { SMgmtWrapper *pWrapper = &pDnode->wrappers[n]; pWrapper->required = dndRequireNode(pWrapper); if (!pWrapper->required) continue; - - int32_t shmsize = tsMnodeShmSize; - if (n == VNODES) { - shmsize = tsVnodeShmSize; - } else if (n == QNODE) { - shmsize = tsQnodeShmSize; - } else if (n == SNODE) { - shmsize = tsSnodeShmSize; - } else if (n == MNODE) { - shmsize = tsMnodeShmSize; - } else if (n == BNODE) { - shmsize = tsBnodeShmSize; - } else { - } - - if (taosCreateShm(&pWrapper->shm, n, shmsize) != 0) { - terrno = TAOS_SYSTEM_ERROR(terrno); - dError("node:%s, failed to create shm size:%d since %s", pWrapper->name, shmsize, terrstr()); - return -1; - } - dInfo("node:%s, shm:%d is created, size:%d", pWrapper->name, pWrapper->shm.id, shmsize); - - SProcCfg cfg = dndGenProcCfg(pWrapper); - cfg.isChild = false; - pWrapper->procType = PROC_PARENT; - pWrapper->pProc = taosProcInit(&cfg); - if (pWrapper->pProc == NULL) { - dError("node:%s, failed to create proc since %s", pWrapper->name, terrstr()); - return -1; - } + if (dndInitNodeProc(pWrapper) != 0) return -1; } if (dndWriteShmFile(pDnode) != 0) { @@ -186,19 +236,7 @@ static int32_t dndRunInParentProcess(SDnode *pDnode) { for (EDndType n = DNODE + 1; n < NODE_MAX; ++n) { SMgmtWrapper *pWrapper = &pDnode->wrappers[n]; if (!pWrapper->required) continue; - - if (pDnode->ntype == NODE_MAX) { - dInfo("node:%s, should be started manually", pWrapper->name); - } else { - if (dndNewProc(pWrapper, n) != 0) { - return -1; - } - } - - if (taosProcRun(pWrapper->pProc) != 0) { - dError("node:%s, failed to run proc since %s", pWrapper->name, terrstr()); - return -1; - } + if (dndRunNodeProc(pWrapper) != 0) return -1; } dndSetStatus(pDnode, DND_STAT_RUNNING); @@ -239,7 +277,7 @@ static int32_t dndRunInParentProcess(SDnode *pDnode) { if (pWrapper->procId <= 0 || !taosProcExist(pWrapper->procId)) { dWarn("node:%s, process:%d is killed and needs to be restarted", pWrapper->name, pWrapper->procId); taosProcCloseHandles(pWrapper->pProc, dndProcessProcHandle); - dndNewProc(pWrapper, n); + dndNewNodeProc(pWrapper, n); } } } @@ -253,6 +291,7 @@ static int32_t dndRunInParentProcess(SDnode *pDnode) { static int32_t dndRunInChildProcess(SDnode *pDnode) { SMgmtWrapper *pWrapper = &pDnode->wrappers[pDnode->ntype]; dInfo("%s run in child process", pWrapper->name); + pDnode->procType = PROC_CHILD; pWrapper->required = dndRequireNode(pWrapper); if (!pWrapper->required) { @@ -264,7 +303,7 @@ static int32_t dndRunInChildProcess(SDnode *pDnode) { tmsgSetDefaultMsgCb(&msgCb); pWrapper->procType = PROC_CHILD; - if (dndOpenNode(pWrapper) != 0) { + if (dndOpenNodeImp(pWrapper) != 0) { dError("node:%s, failed to start since %s", pWrapper->name, terrstr()); return -1; } diff --git a/source/dnode/mgmt/mm/mmHandle.c b/source/dnode/mgmt/mm/mmHandle.c index 6ad0b8c0ed..63240c3224 100644 --- a/source/dnode/mgmt/mm/mmHandle.c +++ b/source/dnode/mgmt/mm/mmHandle.c @@ -80,6 +80,7 @@ int32_t mmProcessDropReq(SMgmtWrapper *pWrapper, SNodeMsg *pMsg) { dError("failed to drop mnode since %s", terrstr()); return -1; } else { + // dndCloseNode(pWrapper); return mmDrop(pWrapper); } } diff --git a/source/dnode/mgmt/qm/qmHandle.c b/source/dnode/mgmt/qm/qmHandle.c index 96fc338529..4fda72759a 100644 --- a/source/dnode/mgmt/qm/qmHandle.c +++ b/source/dnode/mgmt/qm/qmHandle.c @@ -58,7 +58,7 @@ int32_t qmProcessCreateReq(SMgmtWrapper *pWrapper, SNodeMsg *pMsg) { dError("failed to create qnode since %s", terrstr()); return -1; } else { - return qmOpen(pWrapper); + return dndOpenNode(pWrapper); } } @@ -77,6 +77,7 @@ int32_t qmProcessDropReq(SMgmtWrapper *pWrapper, SNodeMsg *pMsg) { dError("failed to drop qnode since %s", terrstr()); return -1; } else { + // dndCloseNode(pWrapper); return qmDrop(pWrapper); } } diff --git a/source/dnode/mgmt/sm/smHandle.c b/source/dnode/mgmt/sm/smHandle.c index 36345cf490..5b30dc04bc 100644 --- a/source/dnode/mgmt/sm/smHandle.c +++ b/source/dnode/mgmt/sm/smHandle.c @@ -58,7 +58,7 @@ int32_t smProcessCreateReq(SMgmtWrapper *pWrapper, SNodeMsg *pMsg) { dError("failed to create snode since %s", terrstr()); return -1; } else { - return smOpen(pWrapper); + return dndOpenNode(pWrapper); } } @@ -78,6 +78,7 @@ int32_t smProcessDropReq(SMgmtWrapper *pWrapper, SNodeMsg *pMsg) { return -1; } else { return smDrop(pWrapper); + // return dndCloseNode(pWrapper); } }