Merge pull request #30322 from taosdata/fix/mergegpt

fix(gpt): fix error in anomalywindow count
2025-03-22 16:58:55 +08:00 · 2025-03-22 16:58:55 +08:00 · 2e42a4b046
parent 05fd1cd12d fdb395f628
commit 2e42a4b046
14 changed files with 41 additions and 33 deletions
--- a/source/libs/executor/src/anomalywindowoperator.c
+++ b/source/libs/executor/src/anomalywindowoperator.c
@ -327,7 +327,7 @@ static int32_t anomalyParseJson(SJson* pJson, SArray* pWindows, const char* pId)
      qError("%s failed to exec forecast, msg:%s", pId, pMsg);
    }
-    return TSDB_CODE_ANA_INTERNAL_ERROR;
+    return TSDB_CODE_ANA_ANODE_RETURN_ERROR;
  } else if (rows == 0) {
    return TSDB_CODE_SUCCESS;
  }
@ -593,7 +593,7 @@ static int32_t anomalyAggregateBlocks(SOperatorInfo* pOperator) {
    for (int32_t r = 0; r < pBlock->info.rows; ++r) {
      TSKEY key = tsList[r];
-      bool  keyInWin = (key >= pSupp->curWin.skey && key < pSupp->curWin.ekey);
+      bool  keyInWin = (key >= pSupp->curWin.skey && key <= pSupp->curWin.ekey);
      bool  lastRow = (r == pBlock->info.rows - 1);
      if (keyInWin) {
--- a/source/libs/executor/src/forecastoperator.c
+++ b/source/libs/executor/src/forecastoperator.c
@ -235,7 +235,7 @@ static int32_t forecastAnalysis(SForecastSupp* pSupp, SSDataBlock* pBlock, const
    }
    tjsonDelete(pJson);
-    return TSDB_CODE_ANA_INTERNAL_ERROR;
+    return TSDB_CODE_ANA_ANODE_RETURN_ERROR;
  }
  if (code < 0) {
--- a/source/util/src/terror.c
+++ b/source/util/src/terror.c
@ -377,7 +377,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_ANA_BUF_INVALID_TYPE,        "Analysis invalid buffe
 TAOS_DEFINE_ERROR(TSDB_CODE_ANA_ANODE_RETURN_ERROR,      "Analysis failed since anode return error")
 TAOS_DEFINE_ERROR(TSDB_CODE_ANA_ANODE_TOO_MANY_ROWS,     "Analysis failed since too many input rows for anode")
 TAOS_DEFINE_ERROR(TSDB_CODE_ANA_WN_DATA,                 "white-noise data not processed")
-TAOS_DEFINE_ERROR(TSDB_CODE_ANA_INTERNAL_ERROR,          "tdgpt internal error, not processed")
+TAOS_DEFINE_ERROR(TSDB_CODE_ANA_INTERNAL_ERROR,          "Analysis internal error, not processed")
 // mnode-sma
 TAOS_DEFINE_ERROR(TSDB_CODE_MND_SMA_ALREADY_EXIST,        "SMA already exists")
--- a/tests/script/tsim/analytics/basic0.sim
+++ b/tests/script/tsim/analytics/basic0.sim
@ -23,8 +23,8 @@ endi
 print =============== show info
 sql show anodes full
-if $rows != 8 then
+if $rows != 10 then
-  print expect 8 , actual $rows
+  print expect 10 , actual $rows
  return -1
 endi
--- a/tools/tdgpt/cfg/taosanode.ini
+++ b/tools/tdgpt/cfg/taosanode.ini
@ -78,4 +78,4 @@ model-dir = /usr/local/taos/taosanode/model/
 log-level = DEBUG
 # draw the query results
-draw-result = 1
+draw-result = 0
--- a/tools/tdgpt/taosanalytics/algo/anomaly.py
+++ b/tools/tdgpt/taosanalytics/algo/anomaly.py
@ -5,6 +5,7 @@
 from matplotlib import pyplot as plt
 from taosanalytics.conf import app_logger, conf
 from taosanalytics.servicemgmt import loader
 from taosanalytics.util import convert_results_to_windows
 def do_ad_check(input_list, ts_list, algo_name, params):
@ -22,17 +23,19 @@ def do_ad_check(input_list, ts_list, algo_name, params):
    res = s.execute()
-    n_error = abs(sum(filter(lambda x: x == -1, res)))
+    n_error = abs(sum(filter(lambda x: x != s.valid_code, res)))
    app_logger.log_inst.debug("There are %d in input, and %d anomaly points found: %s",
                              len(input_list),
                              n_error,
                              res)
-    draw_ad_results(input_list, res, algo_name)
+    # draw_ad_results(input_list, res, algo_name, s.valid_code)
-    return res
+
    ano_window = convert_results_to_windows(res, ts_list, s.valid_code)
    return res, ano_window
-def draw_ad_results(input_list, res, fig_name):
+def draw_ad_results(input_list, res, fig_name, valid_code):
    """ draw the detected anomaly points """
    # not in debug, do not visualize the anomaly detection result
@ -41,9 +44,8 @@ def draw_ad_results(input_list, res, fig_name):
    plt.clf()
    for index, val in enumerate(res):
-        if val != -1:
+        if val != valid_code:
-            continue
+            plt.scatter(index, input_list[index], marker='o', color='r', alpha=0.5, s=100, zorder=3)
        plt.scatter(index, input_list[index], marker='o', color='r', alpha=0.5, s=100, zorder=3)
    plt.plot(input_list, label='sample')
    plt.savefig(fig_name)
--- a/tools/tdgpt/taosanalytics/algo/fc/gpt.py
+++ b/tools/tdgpt/taosanalytics/algo/fc/gpt.py
@ -10,7 +10,7 @@ from taosanalytics.service import AbstractForecastService
 class _GPTService(AbstractForecastService):
-    name = 'td_gpt_fc'
+    name = 'TDtsfm_1'
    desc = "internal gpt forecast model based on transformer"
    def __init__(self):
@ -23,7 +23,6 @@ class _GPTService(AbstractForecastService):
        self.std = None
        self.threshold = None
        self.time_interval = None
        self.dir = 'internal-gpt'
    def execute(self):
--- a/tools/tdgpt/taosanalytics/algo/forecast.py
+++ b/tools/tdgpt/taosanalytics/algo/forecast.py
@ -34,7 +34,7 @@ def do_forecast(input_list, ts_list, algo_name, params):
    check_fc_results(res)
    fc = res["res"]
-    draw_fc_results(input_list, len(fc) > 2, fc, len(fc[0]), algo_name)
+    # draw_fc_results(input_list, len(fc) > 2, fc, len(fc[0]), algo_name)
    return res
--- a/tools/tdgpt/taosanalytics/app.py
+++ b/tools/tdgpt/taosanalytics/app.py
@ -22,11 +22,12 @@ app_logger.set_handler(conf.get_log_path())
 app_logger.set_log_level(conf.get_log_level())
 loader.load_all_service()
 _ANODE_VER = 'TDgpt - TDengine© Time-Series Data Analytics Platform (ver 3.3.6.0)'
@app.route("/")
 def start():
    """ default rsp """
-    return "TDengine© Time Series Data Analytics Platform (ver 1.0.1)"
+    return _ANODE_VER
@app.route("/status")
@ -90,9 +91,7 @@ def handle_ad_request():
    # 4. do anomaly detection
    try:
-        res_list = do_ad_check(payload[data_index], payload[ts_index], algo, params)
+        res_list, ano_window = do_ad_check(payload[data_index], payload[ts_index], algo, params)
        ano_window = convert_results_to_windows(res_list, payload[ts_index])
        result = {"algo": algo, "option": options, "res": ano_window, "rows": len(ano_window)}
        app_logger.log_inst.debug("anomaly-detection result: %s", str(result))
--- a/tools/tdgpt/taosanalytics/service.py
+++ b/tools/tdgpt/taosanalytics/service.py
@ -51,6 +51,7 @@ class AbstractAnomalyDetectionService(AbstractAnalyticsService, ABC):
     inherent from this class"""
    def __init__(self):
        self.valid_code = 1
        super().__init__()
        self.type = "anomaly-detection"
@ -58,6 +59,12 @@ class AbstractAnomalyDetectionService(AbstractAnalyticsService, ABC):
        """ check if the input list is empty or None """
        return (self.list is None) or (len(self.list) == 0)
    def set_params(self, params: dict) -> None:
        super().set_params(params)
        if "valid_code" in params:
            self.valid_code = int(params["valid_code"])
 class AbstractForecastService(AbstractAnalyticsService, ABC):
    """abstract forecast service, all forecast algorithms class should be inherent from
--- a/tools/tdgpt/taosanalytics/test/anomaly_test.py
+++ b/tools/tdgpt/taosanalytics/test/anomaly_test.py
@ -44,7 +44,7 @@ class AnomalyDetectionTest(unittest.TestCase):
        s.set_params({"k": 2})
        r = s.execute()
-        draw_ad_results(AnomalyDetectionTest.input_list, r, "ksigma")
+        draw_ad_results(AnomalyDetectionTest.input_list, r, "ksigma", s.valid_code)
        self.assertEqual(r[-1], -1)
        self.assertEqual(len(r), len(AnomalyDetectionTest.input_list))
@ -64,7 +64,7 @@ class AnomalyDetectionTest(unittest.TestCase):
            self.assertEqual(1, 0, e)
        r = s.execute()
-        draw_ad_results(AnomalyDetectionTest.input_list, r, "iqr")
+        draw_ad_results(AnomalyDetectionTest.input_list, r, "iqr", s.valid_code)
        self.assertEqual(r[-1], -1)
        self.assertEqual(len(r), len(AnomalyDetectionTest.input_list))
@ -82,7 +82,7 @@ class AnomalyDetectionTest(unittest.TestCase):
        s.set_params({"alpha": 0.95})
        r = s.execute()
-        draw_ad_results(AnomalyDetectionTest.input_list, r, "grubbs")
+        draw_ad_results(AnomalyDetectionTest.input_list, r, "grubbs", s.valid_code)
        self.assertEqual(r[-1], -1)
        self.assertEqual(len(r), len(AnomalyDetectionTest.input_list))
@ -100,7 +100,7 @@ class AnomalyDetectionTest(unittest.TestCase):
        s.set_input_list(AnomalyDetectionTest.input_list, None)
        r = s.execute()
-        draw_ad_results(AnomalyDetectionTest.input_list, r, "shesd")
+        draw_ad_results(AnomalyDetectionTest.input_list, r, "shesd", s.valid_code)
        self.assertEqual(r[-1], -1)
@ -116,7 +116,7 @@ class AnomalyDetectionTest(unittest.TestCase):
        s.set_input_list(AnomalyDetectionTest.input_list, None)
        r = s.execute()
-        draw_ad_results(AnomalyDetectionTest.input_list, r, "lof")
+        draw_ad_results(AnomalyDetectionTest.input_list, r, "lof", s.valid_code)
        self.assertEqual(r[-1], -1)
        self.assertEqual(r[-2], -1)
--- a/tools/tdgpt/taosanalytics/test/restful_api_test.py
+++ b/tools/tdgpt/taosanalytics/test/restful_api_test.py
@ -23,7 +23,8 @@ class RestfulTest(TestCase):
        """ test asscess default main page """
        response = self.client.get('/')
        self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.content_length, len("TDengine© Time Series Data Analytics Platform (ver 1.0.1)") + 1)
+        self.assertEqual(response.content_length,
                         len("TDgpt - TDengine© Time-Series Data Analytics Platform (ver 3.3.6.0)") + 1)
    def test_load_status(self):
        """ test load the server status """
--- a/tools/tdgpt/taosanalytics/test/unit_test.py
+++ b/tools/tdgpt/taosanalytics/test/unit_test.py
@ -17,7 +17,7 @@ class UtilTest(unittest.TestCase):
    def test_generate_anomaly_window(self):
        # Test case 1: Normal input
        wins = convert_results_to_windows([1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, -1],
-                                          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
+                                          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 1)
        print(f"The result window is:{wins}")
        # Assert the number of windows
@ -30,15 +30,15 @@ class UtilTest(unittest.TestCase):
        self.assertListEqual(wins[1], [12, 12])
        # Test case 2: Anomaly input list is empty
-        wins = convert_results_to_windows([], [1, 2])
+        wins = convert_results_to_windows([], [1, 2], 1)
        self.assertListEqual(wins, [])
        # Test case 3: Anomaly input list is None
-        wins = convert_results_to_windows([], None)
+        wins = convert_results_to_windows([], None, 1)
        self.assertListEqual(wins, [])
        # Test case 4: Timestamp list is None
-        wins = convert_results_to_windows(None, [])
+        wins = convert_results_to_windows(None, [], 1)
        self.assertListEqual(wins, [])
    def test_validate_input_data(self):
--- a/tools/tdgpt/taosanalytics/util.py
+++ b/tools/tdgpt/taosanalytics/util.py
@ -36,7 +36,7 @@ def validate_pay_load(json_obj):
        raise ValueError('invalid schema info, data column is missing')
-def convert_results_to_windows(result, ts_list):
+def convert_results_to_windows(result, ts_list, valid_code):
    """generate the window according to anomaly detection result"""
    skey, ekey = -1, -1
    wins = []
@ -45,7 +45,7 @@ def convert_results_to_windows(result, ts_list):
        return wins
    for index, val in enumerate(result):
-        if val == -1:
+        if val != valid_code:
            ekey = ts_list[index]
            if skey == -1:
                skey = ts_list[index]