diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index 9d97c9004f..9c0302a15f 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -24,6 +24,14 @@ extern "C" { #include "thash.h" #include "query.h" +typedef enum SQL_RESULT_CODE { + SQL_RESULT_SUCCESS = 0, + SQL_RESULT_FAILED = 1, + SQL_RESULT_CANCEL = 2, +} SQL_RESULT_CODE; + +const char* resultStr(SQL_RESULT_CODE code); + typedef struct { char clusterKey[512]; SEpSet epSet; @@ -38,8 +46,7 @@ void clusterMonitorClose(const char* clusterKey); taos_counter_t* createClusterCounter(const char* clusterKey, const char* name, const char* help, size_t label_key_count, const char** label_keys); int taosClusterCounterInc(const char* clusterKey, const char* counterName, const char** label_values); - -void cluster_monitor_stop(); +void cluster_monitor_stop(); #ifdef __cplusplus } diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index bae0627055..257ee35aaf 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -432,11 +432,10 @@ int32_t clientParseSqlImpl(void* param, const char* dbName, const char* sql, boo #endif void clusterSlowQueryMonitorInit(const char* clusterKey); -void clusterSlowQueryLog(const char* clusterKey, int32_t cost); -void SlowQueryLog(int64_t rid, int32_t cost); +void SlowQueryLog(int64_t rid, bool killed, int32_t code, int32_t cost); void clusterSelectMonitorInit(const char* clusterKey); -void clusterSelectLog(const char* clusterKey); +void selectLog(int64_t rid, bool killed, int32_t code); #ifdef __cplusplus } diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index d5f444a0e3..bd83f15e7e 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -105,6 +105,7 @@ static void deregisterRequest(SRequestObj *pRequest) { pRequest->metric.planCostUs, pRequest->metric.execCostUs); atomic_add_fetch_64((int64_t *)&pActivity->queryElapsedTime, duration); + selectLog(pTscObj->id, pRequest->killed, pRequest->code); reqType = SLOW_LOG_TYPE_QUERY; } } @@ -115,7 +116,7 @@ static void deregisterRequest(SRequestObj *pRequest) { taosPrintSlowLog("PID:%d, Conn:%u, QID:0x%" PRIx64 ", Start:%" PRId64 ", Duration:%" PRId64 "us, SQL:%s", taosGetPId(), pTscObj->connId, pRequest->requestId, pRequest->metric.start, duration, pRequest->sqlstr); - SlowQueryLog(pTscObj->id, duration); + SlowQueryLog(pTscObj->id, pRequest->killed, pRequest->code, duration); } } diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 405df7377f..a165ef6e88 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -160,6 +160,7 @@ STscObj* taos_connect_internal(const char* ip, const char* user, const char* pas pInst = &p; clusterSlowQueryMonitorInit(p->instKey); + clusterSelectMonitorInit(p->instKey); } else { ASSERTS((*pInst) && (*pInst)->pAppHbMgr, "*pInst:%p, pAppHgMgr:%p", *pInst, (*pInst) ? (*pInst)->pAppHbMgr : NULL); // reset to 0 in case of conn with duplicated user key but its user has ever been dropped. diff --git a/source/client/src/selectMonitor.c b/source/client/src/clientSqlMonitor.c similarity index 60% rename from source/client/src/selectMonitor.c rename to source/client/src/clientSqlMonitor.c index c1cab23188..79a862275f 100644 --- a/source/client/src/selectMonitor.c +++ b/source/client/src/clientSqlMonitor.c @@ -16,10 +16,12 @@ #include "clientMonitor.h" #include "clientLog.h" -const char* selectMonitorName = "slow_query"; -const char* selectMonitorHelp = "slow query log when cost > 3s"; -const int selectMonitorLabelCount = 1; -const char* selectMonitorLabels[] = {"default"}; +const char* selectMonitorName = "select sql"; +const char* selectMonitorHelp = "count for select sql"; +const int selectMonitorLabelCount = 4; +const char* selectMonitorLabels[] = {"cluster_id", "sql_type", "username", "result"}; + +static const char* defaultClusterID = ""; void clusterSelectMonitorInit(const char* clusterKey) { SAppInstInfo* pAppInstInfo = getAppInstInfo(clusterKey); @@ -28,18 +30,27 @@ void clusterSelectMonitorInit(const char* clusterKey) { createClusterCounter(clusterKey, selectMonitorName, selectMonitorHelp, selectMonitorLabelCount, selectMonitorLabels); } -void clusterSelectLog(const char* clusterKey) { - const char* selectMonitorLabelValues[] = {"default"}; +void clusterSelectLog(const char* clusterKey, const char* user, SQL_RESULT_CODE result) { + const char* selectMonitorLabelValues[] = {defaultClusterID, "select", user, resultStr(result)}; taosClusterCounterInc(clusterKey, selectMonitorName, selectMonitorLabelValues); } -void selectLog(int64_t rid) { +void selectLog(int64_t rid, bool killed, int32_t code) { + SQL_RESULT_CODE result = SQL_RESULT_SUCCESS; + if (TSDB_CODE_SUCCESS != code) { + result = SQL_RESULT_FAILED; + } + // to do Distinguish active Kill events + // else if (killed) { + // result = SQL_RESULT_CANCEL; + // } + STscObj* pTscObj = acquireTscObj(rid); if (pTscObj != NULL) { - if(pTscObj->pAppInfo == NULL) { + if (pTscObj->pAppInfo == NULL) { tscLog("selectLog, not found pAppInfo"); } - return clusterSelectLog(pTscObj->pAppInfo->instKey); + return clusterSelectLog(pTscObj->pAppInfo->instKey, pTscObj->user, result); } else { tscLog("selectLog, not found rid"); } diff --git a/source/client/src/slowQueryMonitor.c b/source/client/src/slowQueryMonitor.c index 420b66a954..8f6253007f 100644 --- a/source/client/src/slowQueryMonitor.c +++ b/source/client/src/slowQueryMonitor.c @@ -18,28 +18,23 @@ #include "tglobal.h" const char* slowQueryName = "slow_query"; -const char* slowQueryHelp = "slow query log when cost > 3s"; -const int slowQueryLabelCount = 1; -const char* slowQueryLabels[] = {"cost"}; +const char* slowQueryHelp = "slow query log when cost over than config duration"; +const int slowQueryLabelCount = 4; +const char* slowQueryLabels[] = {"cluster_id", "username", "result", "duration"}; +static const char* defaultClusterID = ""; const int64_t msInSeconds = 1000; const int64_t msInMinutes = 60 * 1000; static const char* getSlowQueryLableCostDesc(int64_t cost) { - if (cost >= 30 * msInMinutes) { - return " > 30 min"; - } else if (cost >= 10 * msInMinutes) { - return " > 10 min"; - } else if (cost >= 5 * msInMinutes) { - return " > 5 min"; - } else if (cost >= 1 * msInMinutes) { - return " > 1 min"; - } else if (cost >= 30 * msInSeconds) { - return " > 30 seconds"; + if (cost >= 10000 * msInSeconds) { + return " > 10000 seconds"; + } else if (cost >= 1000 * msInSeconds) { + return " > 1000 seconds"; + } else if (cost >= 100 * msInSeconds) { + return " > 100 seconds"; } else if (cost >= 10 * msInSeconds) { return " > 10 seconds"; - } else if (cost >= 5 * msInSeconds) { - return " > 5 seconds"; } else if (cost >= 3 * msInSeconds) { return " > 3 seconds"; } @@ -54,19 +49,28 @@ void clusterSlowQueryMonitorInit(const char* clusterKey) { createClusterCounter(clusterKey, slowQueryName, slowQueryHelp, slowQueryLabelCount, slowQueryLabels); } -void clusterSlowQueryLog(const char* clusterKey, int32_t cost) { - const char* slowQueryLabelValues[] = {getSlowQueryLableCostDesc(cost)}; +void clusterSlowQueryLog(const char* clusterKey, const char* user, SQL_RESULT_CODE result, int32_t cost) { + const char* slowQueryLabelValues[] = {defaultClusterID, user, resultStr(result), getSlowQueryLableCostDesc(cost)}; taosClusterCounterInc(clusterKey, slowQueryName, slowQueryLabelValues); } -void SlowQueryLog(int64_t rid, int32_t cost) { +void SlowQueryLog(int64_t rid, bool killed, int32_t code, int32_t cost) { if (!enableSlowQueryMonitor) return; + SQL_RESULT_CODE result = SQL_RESULT_SUCCESS; + if (TSDB_CODE_SUCCESS != code) { + result = SQL_RESULT_FAILED; + } + // to do Distinguish active Kill events + // else if (killed) { + // result = SQL_RESULT_CANCEL; + // } + STscObj* pTscObj = acquireTscObj(rid); if (pTscObj != NULL) { if(pTscObj->pAppInfo == NULL) { tscLog("SlowQueryLog, not found pAppInfo"); } - return clusterSlowQueryLog(pTscObj->pAppInfo->instKey, cost); + return clusterSlowQueryLog(pTscObj->pAppInfo->instKey, pTscObj->user, result, cost); } else { tscLog("SlowQueryLog, not found rid"); } diff --git a/source/libs/monitor/src/clientMonitor.c b/source/libs/monitor/src/clientMonitor.c index db5fde0d8d..e3b49da858 100644 --- a/source/libs/monitor/src/clientMonitor.c +++ b/source/libs/monitor/src/clientMonitor.c @@ -3,21 +3,26 @@ #include "tmisce.h" #include "ttime.h" #include "ttimer.h" +#include "tglobal.h" SRWLatch monitorLock; void* tmrClientMonitor; tmr_h tmrStartHandle; SHashObj* clusterMonitorInfoTable; -static const int interval = 1000; // ms -static const int sendBathchSize = 10; +static int interval = 30 * 1000; +static int sendBathchSize = 1; int32_t sendReport(ClientMonitor* pMonitor, char* pCont); void generateClusterReport(ClientMonitor* pMonitor, bool send) { char ts[50]; sprintf(ts, "%" PRId64, taosGetTimestamp(TSDB_TIME_PRECISION_MILLI)); char* pCont = (char*)taos_collector_registry_bridge_new(pMonitor->registry, ts, "%" PRId64, NULL); - if (send && strlen(pCont) != TSDB_CODE_SUCCESS) { + if(NULL == pCont) { + uError("generateClusterReport failed, get null content."); + return; + } + if (send && strlen(pCont) != 0) { if (sendReport(pMonitor, pCont) == 0) { taos_collector_registry_clear_batch(pMonitor->registry); } @@ -25,7 +30,7 @@ void generateClusterReport(ClientMonitor* pMonitor, bool send) { } void reportSendProcess(void* param, void* tmrId) { - taosTmrReset(reportSendProcess, interval, NULL, tmrClientMonitor, &tmrStartHandle); + taosTmrReset(reportSendProcess, tsMonitorInterval * 1000, NULL, tmrClientMonitor, &tmrStartHandle); taosRLockLatch(&monitorLock); static int index = 0; @@ -49,7 +54,15 @@ void monitorClientInitOnce() { (SHashObj*)taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); tmrClientMonitor = taosTmrInit(0, 0, 0, "MONITOR"); - tmrStartHandle = taosTmrStart(reportSendProcess, interval, NULL, tmrClientMonitor); + tmrStartHandle = taosTmrStart(reportSendProcess, tsMonitorInterval * 1000, NULL, tmrClientMonitor); + if(tsMonitorInterval < 1){ + interval = 30 * 1000; + } else { + interval = tsMonitorInterval * 1000; + } + if (tsMonitorInterval < 10) { + sendBathchSize = (10 / sendBathchSize) + 1; + } taosInitRWLatch(&monitorLock); } } @@ -185,3 +198,8 @@ void clusterMonitorClose(const char* clusterKey) { } taosWUnLockLatch(&monitorLock); } + +const char* resultStr(SQL_RESULT_CODE code) { + static const char* result_state[] = {"Success", "Failed", "Cancel"}; + return result_state[code]; +}