From 7f3b15020cd2a82eecfb9503016d67e8431f5a1a Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 11 Apr 2022 18:13:55 +0800 Subject: [PATCH] fix(cluster): get monitor info in multi-process mode --- include/libs/monitor/monitor.h | 7 ++- include/os/osSysinfo.h | 2 +- source/dnode/mgmt/dm/dmMonitor.c | 19 +++---- source/dnode/mgmt/qm/qmWorker.c | 2 +- source/dnode/mnode/impl/src/mndDnode.c | 2 + source/libs/monitor/src/monMain.c | 3 + source/libs/monitor/src/monMsg.c | 14 ++--- source/os/src/osSysinfo.c | 76 +++++++++++--------------- tests/script/tmp/monitor.sim | 35 ++++++++++++ 9 files changed, 94 insertions(+), 66 deletions(-) create mode 100644 tests/script/tmp/monitor.sim diff --git a/include/libs/monitor/monitor.h b/include/libs/monitor/monitor.h index f5080fbe7b..af0580674d 100644 --- a/include/libs/monitor/monitor.h +++ b/include/libs/monitor/monitor.h @@ -78,6 +78,9 @@ typedef struct { typedef struct { float uptime; // day int8_t has_mnode; + int8_t has_qnode; + int8_t has_snode; + int8_t has_bnode; SMonDiskDesc logdir; SMonDiskDesc tempdir; } SMonDnodeInfo; @@ -134,8 +137,8 @@ typedef struct { typedef struct { int32_t expire_time; - int32_t timeseries_used; - int32_t timeseries_total; + int64_t timeseries_used; + int64_t timeseries_total; } SMonGrantInfo; typedef struct { diff --git a/include/os/osSysinfo.h b/include/os/osSysinfo.h index 022f11bb0e..c009bcf350 100644 --- a/include/os/osSysinfo.h +++ b/include/os/osSysinfo.h @@ -39,7 +39,7 @@ int32_t taosGetEmail(char *email, int32_t maxLen); int32_t taosGetOsReleaseName(char *releaseName, int32_t maxLen); int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores); int32_t taosGetCpuCores(float *numOfCores); -int32_t taosGetCpuUsage(double *cpu_system, double *cpu_engine); +void taosGetCpuUsage(double *cpu_system, double *cpu_engine); int32_t taosGetTotalMemory(int64_t *totalKB); int32_t taosGetProcMemory(int64_t *usedKB); int32_t taosGetSysMemory(int64_t *usedKB); diff --git a/source/dnode/mgmt/dm/dmMonitor.c b/source/dnode/mgmt/dm/dmMonitor.c index fb5855070c..6edf85106b 100644 --- a/source/dnode/mgmt/dm/dmMonitor.c +++ b/source/dnode/mgmt/dm/dmMonitor.c @@ -25,11 +25,10 @@ static void dmGetMonitorBasicInfo(SDnode *pDnode, SMonBasicInfo *pInfo) { static void dmGetMonitorDnodeInfo(SDnode *pDnode, SMonDnodeInfo *pInfo) { pInfo->uptime = (taosGetTimestampMs() - pDnode->rebootTime) / (86400000.0f); - SMgmtWrapper *pWrapper = dndAcquireWrapper(pDnode, MNODE); - if (pWrapper != NULL) { - pInfo->has_mnode = pWrapper->required; - dndReleaseWrapper(pWrapper); - } + pInfo->has_mnode = pDnode->wrappers[MNODE].required; + pInfo->has_qnode = pDnode->wrappers[QNODE].required; + pInfo->has_snode = pDnode->wrappers[SNODE].required; + pInfo->has_bnode = pDnode->wrappers[BNODE].required; tstrncpy(pInfo->logdir.name, tsLogDir, sizeof(pInfo->logdir.name)); pInfo->logdir.size = tsLogSpace.size; tstrncpy(pInfo->tempdir.name, tsTempDir, sizeof(pInfo->tempdir.name)); @@ -65,7 +64,7 @@ void dmSendMonitorReport(SDnode *pDnode) { bool getFromAPI = !tsMultiProcess; pWrapper = &pDnode->wrappers[MNODE]; if (getFromAPI) { - if (dndMarkWrapper(pWrapper) != 0) { + if (dndMarkWrapper(pWrapper) == 0) { mmGetMonitorInfo(pWrapper, &mmInfo); dndReleaseWrapper(pWrapper); } @@ -82,7 +81,7 @@ void dmSendMonitorReport(SDnode *pDnode) { pWrapper = &pDnode->wrappers[VNODES]; if (getFromAPI) { - if (dndMarkWrapper(pWrapper) != 0) { + if (dndMarkWrapper(pWrapper) == 0) { vmGetMonitorInfo(pWrapper, &vmInfo); dndReleaseWrapper(pWrapper); } @@ -99,7 +98,7 @@ void dmSendMonitorReport(SDnode *pDnode) { pWrapper = &pDnode->wrappers[QNODE]; if (getFromAPI) { - if (dndMarkWrapper(pWrapper) != 0) { + if (dndMarkWrapper(pWrapper) == 0) { qmGetMonitorInfo(pWrapper, &qmInfo); dndReleaseWrapper(pWrapper); } @@ -116,7 +115,7 @@ void dmSendMonitorReport(SDnode *pDnode) { pWrapper = &pDnode->wrappers[SNODE]; if (getFromAPI) { - if (dndMarkWrapper(pWrapper) != 0) { + if (dndMarkWrapper(pWrapper) == 0) { smGetMonitorInfo(pWrapper, &smInfo); dndReleaseWrapper(pWrapper); } @@ -133,7 +132,7 @@ void dmSendMonitorReport(SDnode *pDnode) { pWrapper = &pDnode->wrappers[BNODE]; if (getFromAPI) { - if (dndMarkWrapper(pWrapper) != 0) { + if (dndMarkWrapper(pWrapper) == 0) { bmGetMonitorInfo(pWrapper, &bmInfo); dndReleaseWrapper(pWrapper); } diff --git a/source/dnode/mgmt/qm/qmWorker.c b/source/dnode/mgmt/qm/qmWorker.c index 974052cdf6..6b27af4fbd 100644 --- a/source/dnode/mgmt/qm/qmWorker.c +++ b/source/dnode/mgmt/qm/qmWorker.c @@ -32,7 +32,7 @@ static void qmProcessMonitorQueue(SQueueInfo *pInfo, SNodeMsg *pMsg) { SRpcMsg *pRpc = &pMsg->rpcMsg; int32_t code = -1; - if (pMsg->rpcMsg.msgType == TDMT_MON_SM_INFO) { + if (pMsg->rpcMsg.msgType == TDMT_MON_QM_INFO) { code = qmProcessGetMonQmInfoReq(pMgmt->pWrapper, pMsg); } else { terrno = TSDB_CODE_MSG_NOT_PROCESSED; diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 86ec49127a..30ddfbe44c 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -330,9 +330,11 @@ static int32_t mndProcessStatusReq(SNodeMsg *pReq) { SVgObj *pVgroup = mndAcquireVgroup(pMnode, pVload->vgId); if (pVgroup != NULL) { + mInfo("------>0 vgId:%d, numOfTimeSeries:%" PRId64, pVgroup->vgId, pVload->numOfTimeSeries); if (pVload->role == TAOS_SYNC_STATE_LEADER) { pVgroup->numOfTables = pVload->numOfTables; pVgroup->numOfTimeSeries = pVload->numOfTimeSeries; + mInfo("------>1 vgId:%d, numOfTimeSeries:%" PRId64, pVgroup->vgId, pVgroup->numOfTimeSeries); pVgroup->totalStorage = pVload->totalStorage; pVgroup->compStorage = pVload->compStorage; pVgroup->pointsWritten = pVload->pointsWritten; diff --git a/source/libs/monitor/src/monMain.c b/source/libs/monitor/src/monMain.c index c90b1f58e8..4e8e1e3b62 100644 --- a/source/libs/monitor/src/monMain.c +++ b/source/libs/monitor/src/monMain.c @@ -375,6 +375,9 @@ static void monGenDnodeJson(SMonInfo *pMonitor) { tjsonAddDoubleToObject(pJson, "vnodes_num", pStat->totalVnodes); tjsonAddDoubleToObject(pJson, "masters", pStat->masterNum); tjsonAddDoubleToObject(pJson, "has_mnode", pInfo->has_mnode); + tjsonAddDoubleToObject(pJson, "has_qnode", pInfo->has_qnode); + tjsonAddDoubleToObject(pJson, "has_snode", pInfo->has_snode); + tjsonAddDoubleToObject(pJson, "has_bnode", pInfo->has_bnode); } static void monGenDiskJson(SMonInfo *pMonitor) { diff --git a/source/libs/monitor/src/monMsg.c b/source/libs/monitor/src/monMsg.c index 3aafcf071d..adacbf479b 100644 --- a/source/libs/monitor/src/monMsg.c +++ b/source/libs/monitor/src/monMsg.c @@ -194,9 +194,9 @@ int32_t tDecodeSMonVgroupInfo(SCoder *decoder, SMonVgroupInfo *pInfo) { if (tDecodeCStrTo(decoder, desc.database_name) < 0) return -1; if (tDecodeCStrTo(decoder, desc.status) < 0) return -1; for (int32_t j = 0; j < TSDB_MAX_REPLICA; ++j) { - SMonVnodeDesc vdesc = {0}; - if (tDecodeI32(decoder, &vdesc.dnode_id) < 0) return -1; - if (tDecodeCStrTo(decoder, vdesc.vnode_role) < 0) return -1; + SMonVnodeDesc *pVDesc = &desc.vnodes[j]; + if (tDecodeI32(decoder, &pVDesc->dnode_id) < 0) return -1; + if (tDecodeCStrTo(decoder, pVDesc->vnode_role) < 0) return -1; } taosArrayPush(pInfo->vgroups, &desc); } @@ -205,15 +205,15 @@ int32_t tDecodeSMonVgroupInfo(SCoder *decoder, SMonVgroupInfo *pInfo) { int32_t tEncodeSMonGrantInfo(SCoder *encoder, const SMonGrantInfo *pInfo) { if (tEncodeI32(encoder, pInfo->expire_time) < 0) return -1; - if (tEncodeI32(encoder, pInfo->timeseries_used) < 0) return -1; - if (tEncodeI32(encoder, pInfo->timeseries_total) < 0) return -1; + if (tEncodeI64(encoder, pInfo->timeseries_used) < 0) return -1; + if (tEncodeI64(encoder, pInfo->timeseries_total) < 0) return -1; return 0; } int32_t tDecodeSMonGrantInfo(SCoder *decoder, SMonGrantInfo *pInfo) { if (tDecodeI32(decoder, &pInfo->expire_time) < 0) return -1; - if (tDecodeI32(decoder, &pInfo->timeseries_used) < 0) return -1; - if (tDecodeI32(decoder, &pInfo->timeseries_total) < 0) return -1; + if (tDecodeI64(decoder, &pInfo->timeseries_used) < 0) return -1; + if (tDecodeI64(decoder, &pInfo->timeseries_total) < 0) return -1; return 0; } diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index 26de26ab67..4ffbc13fb3 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -369,53 +369,33 @@ int32_t taosGetCpuCores(float *numOfCores) { #endif } -int32_t taosGetCpuUsage(double *cpu_system, double *cpu_engine) { -#if defined(_TD_WINDOWS_64) || defined(_TD_WINDOWS_32) +void taosGetCpuUsage(double *cpu_system, double *cpu_engine) { + static int64_t lastSysUsed = 0; + static int64_t lastSysTotal = 0; + static int64_t lastProcTotal = 0; + static int64_t curSysUsed = 0; + static int64_t curSysTotal = 0; + static int64_t curProcTotal = 0; + *cpu_system = 0; *cpu_engine = 0; - return 0; -#elif defined(_TD_DARWIN_64) - *cpu_system = 0; - *cpu_engine = 0; - return 0; -#else - static uint64_t lastSysUsed = 0; - static uint64_t lastSysTotal = 0; - static uint64_t lastProcTotal = 0; - SysCpuInfo sysCpu; - ProcCpuInfo procCpu; - if (taosGetSysCpuInfo(&sysCpu) != 0) { - return -1; + SysCpuInfo sysCpu = {0}; + ProcCpuInfo procCpu = {0}; + if (taosGetSysCpuInfo(&sysCpu) == 0 && taosGetProcCpuInfo(&procCpu) == 0) { + curSysUsed = sysCpu.user + sysCpu.nice + sysCpu.system; + curSysTotal = curSysUsed + sysCpu.idle; + curProcTotal = procCpu.utime + procCpu.stime + procCpu.cutime + procCpu.cstime; + + if (curSysTotal > lastSysTotal && curSysUsed >= lastSysUsed && curProcTotal >= lastProcTotal) { + *cpu_engine = (curSysUsed - lastSysUsed) / (double)(curSysTotal - lastSysTotal) * 100; + *cpu_system = (curProcTotal - lastProcTotal) / (double)(curSysTotal - lastSysTotal) * 100; + } + + lastSysUsed = curSysUsed; + lastSysTotal = curSysTotal; + lastProcTotal = curProcTotal; } - if (taosGetProcCpuInfo(&procCpu) != 0) { - return -1; - } - - uint64_t curSysUsed = sysCpu.user + sysCpu.nice + sysCpu.system; - uint64_t curSysTotal = curSysUsed + sysCpu.idle; - uint64_t curProcTotal = procCpu.utime + procCpu.stime + procCpu.cutime + procCpu.cstime; - - if (lastSysUsed == 0 || lastSysTotal == 0 || lastProcTotal == 0) { - lastSysUsed = curSysUsed > 1 ? curSysUsed : 1; - lastSysTotal = curSysTotal > 1 ? curSysTotal : 1; - lastProcTotal = curProcTotal > 1 ? curProcTotal : 1; - return -1; - } - - if (curSysTotal == lastSysTotal) { - return -1; - } - - *cpu_engine = (curSysUsed - lastSysUsed) / (double)(curSysTotal - lastSysTotal) * 100; - *cpu_system = (curProcTotal - lastProcTotal) / (double)(curSysTotal - lastSysTotal) * 100; - - lastSysUsed = curSysUsed; - lastSysTotal = curSysTotal; - lastProcTotal = curProcTotal; - - return 0; -#endif } int32_t taosGetTotalMemory(int64_t *totalKB) { @@ -618,7 +598,6 @@ void taosGetProcIODelta(int64_t *rchars, int64_t *wchars, int64_t *read_bytes, i static int64_t last_wchars = 0; static int64_t last_read_bytes = 0; static int64_t last_write_bytes = 0; - static int64_t cur_rchars = 0; static int64_t cur_wchars = 0; static int64_t cur_read_bytes = 0; @@ -632,6 +611,11 @@ void taosGetProcIODelta(int64_t *rchars, int64_t *wchars, int64_t *read_bytes, i last_wchars = cur_wchars; last_read_bytes = cur_read_bytes; last_write_bytes = cur_write_bytes; + } else { + *rchars = 0; + *wchars = 0; + *read_bytes = 0; + *write_bytes = 0; } } @@ -693,7 +677,6 @@ int32_t taosGetCardInfo(int64_t *receive_bytes, int64_t *transmit_bytes) { void taosGetCardInfoDelta(int64_t *receive_bytes, int64_t *transmit_bytes) { static int64_t last_receive_bytes = 0; static int64_t last_transmit_bytes = 0; - static int64_t cur_receive_bytes = 0; static int64_t cur_transmit_bytes = 0; if (taosGetCardInfo(&cur_receive_bytes, &cur_transmit_bytes) == 0) { @@ -701,6 +684,9 @@ void taosGetCardInfoDelta(int64_t *receive_bytes, int64_t *transmit_bytes) { *transmit_bytes = cur_transmit_bytes - last_transmit_bytes; last_receive_bytes = cur_receive_bytes; last_transmit_bytes = cur_transmit_bytes; + } else { + *receive_bytes = 0; + *transmit_bytes = 0; } } diff --git a/tests/script/tmp/monitor.sim b/tests/script/tmp/monitor.sim new file mode 100644 index 0000000000..ba98bec2d0 --- /dev/null +++ b/tests/script/tmp/monitor.sim @@ -0,0 +1,35 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/cfg.sh -n dnode1 -c monitorfqdn -v localhost +system sh/cfg.sh -n dnode1 -c monitorport -v 80 +system sh/cfg.sh -n dnode1 -c monitorInterval -v 1 +system sh/cfg.sh -n dnode1 -c monitorComp -v 1 +#system sh/cfg.sh -n dnode1 -c supportVnodes -v 128 + +system sh/exec.sh -n dnode1 -s start +sql connect + +print =============== show dnodes +sleep 2000 +sql create database db vgroups 2; +sleep 2000 + +print =============== create drop qnode 1 +sql create qnode on dnode 1 +sql create snode on dnode 1 +sql create bnode on dnode 1 + +return +print =============== restart +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start + + +return +system sh/deploy.sh -n dnode2 -i 2 +system sh/exec.sh -n dnode2 -s start +system sh/exec.sh -n dnode2 -s stop -x SIGINT +system sh/exec.sh -n dnode2 -s start + +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode2 -s stop -x SIGINT