diff --git a/docs/zh/06-advanced/06-TDgpt/04-forecast/04-lstm.md b/docs/zh/06-advanced/06-TDgpt/04-forecast/04-lstm.md
new file mode 100644
index 0000000000..dd00a18885
--- /dev/null
+++ b/docs/zh/06-advanced/06-TDgpt/04-forecast/04-lstm.md
@@ -0,0 +1,31 @@
+---
+title: "LSTM"
+sidebar_label: "LSTM"
+---
+
+本节说明 LSTM 模型的使用方法。
+
+## 功能概述
+
+LSTM模型即长短期记忆网络(Long Short Term Memory),是一种特殊的循环神经网络,适用于处理时间序列数据、自然语言处理等任务,通过其独特的门控机制,能够有效捕捉长期依赖关系,
+解决传统RNN的梯度消失问题,从而对序列数据进行准确预测,不过它不直接提供计算的置信区间范围结果。
+
+
+完整的调用SQL语句如下:
+```SQL
+SELECT _frowts, FORECAST(i32, "algo=lstm,alpha=95,period=10,start_p=1,max_p=5,start_q=1,max_q=5") from foo
+```
+
+```json5
+{
+"rows": fc_rows, // 返回结果的行数
+"period": period, // 返回结果的周期性,同输入
+"alpha": alpha, // 返回结果的置信区间,同输入
+"algo": "lstm", // 返回结果使用的算法
+"mse": mse, // 拟合输入时间序列时候生成模型的最小均方误差(MSE)
+"res": res // 列模式的结果
+}
+```
+
+### 参考文献
+- [1] Hochreiter S. Long Short-term Memory[J]. Neural Computation MIT-Press, 1997.
\ No newline at end of file
diff --git a/docs/zh/06-advanced/06-TDgpt/05-anomaly-detection/04-machine-learning.md b/docs/zh/06-advanced/06-TDgpt/05-anomaly-detection/04-machine-learning.md
index b752d446eb..80a5cbe972 100644
--- a/docs/zh/06-advanced/06-TDgpt/05-anomaly-detection/04-machine-learning.md
+++ b/docs/zh/06-advanced/06-TDgpt/05-anomaly-detection/04-machine-learning.md
@@ -3,7 +3,9 @@ title: "机器学习算法"
sidebar_label: "机器学习算法"
---
-Autoencoder[1]: TDgpt 内置使用自编码器(Autoencoder)的异常检测算法,对周期性的时间序列数据具有较好的检测结果。使用该模型需要针对输入时序数据进行预训练,同时将训练完成的模型保存在到服务目录 `ad_autoencoder` 中,然后在 SQL 语句中指定调用该算法模型即可使用。
+Autoencoder[1]: TDgpt 内置使用自编码器(Autoencoder)的异常检测算法,
+对周期性的时间序列数据具有较好的检测结果。使用该模型需要针对输入时序数据进行预训练,
+同时将训练完成的模型保存在到服务目录 `ad_autoencoder` 中,然后在 SQL 语句中指定调用该算法模型即可使用。
```SQL
--- 在 options 中增加 model 的名称,ad_autoencoder_foo, 针对 foo 数据集(表)训练的采用自编码器的异常检测模型进行异常检测
diff --git a/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.info b/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.info
new file mode 100644
index 0000000000..0703c99255
Binary files /dev/null and b/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.info differ
diff --git a/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.keras b/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.keras
new file mode 100644
index 0000000000..43d90cb986
Binary files /dev/null and b/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.keras differ
diff --git a/tools/tdgpt/taosanalytics/algo/ad/autoencoder.py b/tools/tdgpt/taosanalytics/algo/ad/autoencoder.py
index e58db3f54b..89813656fc 100644
--- a/tools/tdgpt/taosanalytics/algo/ad/autoencoder.py
+++ b/tools/tdgpt/taosanalytics/algo/ad/autoencoder.py
@@ -4,6 +4,7 @@
import os.path
import joblib
+import keras
import numpy as np
import pandas as pd
@@ -13,8 +14,8 @@ from taosanalytics.util import create_sequences
class _AutoEncoderDetectionService(AbstractAnomalyDetectionService):
- name = 'ad_encoder'
- desc = "anomaly detection based on auto encoder"
+ name = 'sample_ad_model'
+ desc = "sample anomaly detection model based on auto encoder"
def __init__(self):
super().__init__()
@@ -25,7 +26,7 @@ class _AutoEncoderDetectionService(AbstractAnomalyDetectionService):
self.threshold = None
self.time_interval = None
self.model = None
- self.dir = 'ad_autoencoder'
+ self.dir = 'sample-ad-autoencoder'
self.root_path = conf.get_model_directory()
@@ -61,11 +62,6 @@ class _AutoEncoderDetectionService(AbstractAnomalyDetectionService):
# Detect all the samples which are anomalies.
anomalies = mae > self.threshold
- # syslogger.log_inst(
- # "Number of anomaly samples: %f, Indices of anomaly samples:{}".
- # format(np.sum(anomalies), np.where(anomalies))
- # )
-
# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
ad_indices = []
for data_idx in range(self.time_interval - 1,
@@ -82,13 +78,13 @@ class _AutoEncoderDetectionService(AbstractAnomalyDetectionService):
name = params['model']
- module_file_path = f'{self.root_path}/{name}.dat'
+ module_file_path = f'{self.root_path}/{name}.keras'
module_info_path = f'{self.root_path}/{name}.info'
app_logger.log_inst.info("try to load module:%s", module_file_path)
if os.path.exists(module_file_path):
- self.model = joblib.load(module_file_path)
+ self.model = keras.models.load_model(module_file_path)
else:
app_logger.log_inst.error("failed to load autoencoder model file: %s", module_file_path)
raise FileNotFoundError(f"{module_file_path} not found")
diff --git a/tools/tdgpt/taosanalytics/algo/fc/arima.py b/tools/tdgpt/taosanalytics/algo/fc/arima.py
index 9e087a5e9e..fa587c3604 100644
--- a/tools/tdgpt/taosanalytics/algo/fc/arima.py
+++ b/tools/tdgpt/taosanalytics/algo/fc/arima.py
@@ -68,24 +68,6 @@ class _ArimaService(AbstractForecastService):
fc = model.predict(n_periods=fc_rows, return_conf_int=self.return_conf,
alpha=self.conf)
- # plt.plot(source_data, label='training')
- # plt.plot(xrange, actual_data, label='actual')
-
- # fc_list = fc.tolist()
- # fc_without_diff = restore_from_diff(self.list, fc_list, 2)
- # print(fc_without_diff)
-
- # plt.plot(xrange, fc_without_diff, label='fc')
-
- # residuals = pd.DataFrame(model.arima_res_.resid)
- # wn = is_white_noise(residuals)
- # print("residual is white noise:", wn)
-
- # fig, ax = plt.subplots(1, 2)
- # residuals.plot(title="Residuals", ax=ax[0])
- # residuals.plot(kind='kde', title='Density', ax=ax[1])
- # plt.show()
-
res1 = [fc[0].tolist(), fc[1][:, 0].tolist(),
fc[1][:, 1].tolist()] if self.return_conf else [fc.tolist()]
diff --git a/tools/tdgpt/taosanalytics/algo/fc/lstm.py b/tools/tdgpt/taosanalytics/algo/fc/lstm.py
new file mode 100644
index 0000000000..5edae7fc9f
--- /dev/null
+++ b/tools/tdgpt/taosanalytics/algo/fc/lstm.py
@@ -0,0 +1,81 @@
+# encoding:utf-8
+# pylint: disable=c0103
+""" auto encoder algorithms to detect anomaly for time series data"""
+import os.path
+
+import keras
+
+from taosanalytics.algo.forecast import insert_ts_list
+from taosanalytics.conf import app_logger, conf
+from taosanalytics.service import AbstractForecastService
+
+
+class _LSTMService(AbstractForecastService):
+ name = 'sample_forecast_model'
+ desc = "sample forecast model based on LSTM"
+
+ def __init__(self):
+ super().__init__()
+
+ self.table_name = None
+ self.mean = None
+ self.std = None
+ self.threshold = None
+ self.time_interval = None
+ self.model = None
+ self.dir = 'sample-fc-lstm'
+
+ self.root_path = conf.get_model_directory()
+
+ self.root_path = self.root_path + f'/{self.dir}/'
+
+ if not os.path.exists(self.root_path):
+ app_logger.log_inst.error(
+ "%s ad algorithm failed to locate default module directory:"
+ "%s, not active", self.__class__.__name__, self.root_path)
+ else:
+ app_logger.log_inst.info("%s ad algorithm root path is: %s", self.__class__.__name__,
+ self.root_path)
+
+ def execute(self):
+ if self.input_is_empty():
+ return []
+
+ if self.model is None:
+ raise FileNotFoundError("not load autoencoder model yet, or load model failed")
+
+ res = self.model.predict(self.list)
+
+ insert_ts_list(res, self.start_ts, self.time_step, self.fc_rows)
+
+ if self.return_conf:
+ res1 = [res.tolist(), res.tolist(), res.tolist()], None
+ else:
+ res1 = [res.tolist()], None
+
+ # add the conf range if required
+ return {
+ "mse": None,
+ "res": res1
+ }
+
+ def set_params(self, params):
+
+ if "model" not in params:
+ raise ValueError("model needs to be specified")
+
+ name = params['model']
+
+ module_file_path = f'{self.root_path}/{name}.keras'
+ # module_info_path = f'{self.root_path}/{name}.info'
+
+ app_logger.log_inst.info("try to load module:%s", module_file_path)
+
+ if os.path.exists(module_file_path):
+ self.model = keras.models.load_model(module_file_path)
+ else:
+ app_logger.log_inst.error("failed to load LSTM model file: %s", module_file_path)
+ raise FileNotFoundError(f"{module_file_path} not found")
+
+ def get_params(self):
+ return {"dir": self.dir + '/*'}
diff --git a/tools/tdgpt/taosanalytics/test/anomaly_test.py b/tools/tdgpt/taosanalytics/test/anomaly_test.py
index f44a7f0d52..bc173cd25b 100644
--- a/tools/tdgpt/taosanalytics/test/anomaly_test.py
+++ b/tools/tdgpt/taosanalytics/test/anomaly_test.py
@@ -141,15 +141,14 @@ class AnomalyDetectionTest(unittest.TestCase):
def test_autoencoder_ad(self):
"""for local test only, disabled it in github action"""
- pass
-
+ pass
# data = self.__load_remote_data_for_ad()
#
- # s = loader.get_service("ad_encoder")
+ # s = loader.get_service("sample_ad_model")
# s.set_input_list(data)
#
# try:
- # s.set_params({"model": "ad_encoder_"})
+ # s.set_params({"model": "sample-ad-autoencoder"})
# except ValueError as e:
# app_logger.log_inst.error(f"failed to set the param for auto_encoder algorithm, reason:{e}")
# return
@@ -157,9 +156,9 @@ class AnomalyDetectionTest(unittest.TestCase):
# r = s.execute()
#
# num_of_error = -(sum(filter(lambda x: x == -1, r)))
- # self.assertEqual(num_of_error, 109)
- #
# draw_ad_results(data, r, "autoencoder")
+ #
+ # self.assertEqual(num_of_error, 109)
def test_get_all_services(self):
"""Test get all services"""
diff --git a/tools/tdgpt/taosanalytics/test/unit_test.py b/tools/tdgpt/taosanalytics/test/unit_test.py
index f6ecdf0d5b..aef689a8b6 100644
--- a/tools/tdgpt/taosanalytics/test/unit_test.py
+++ b/tools/tdgpt/taosanalytics/test/unit_test.py
@@ -99,7 +99,7 @@ class ServiceTest(unittest.TestCase):
if item["type"] == "anomaly-detection":
self.assertEqual(len(item["algo"]), 6)
else:
- self.assertEqual(len(item["algo"]), 2)
+ self.assertEqual(len(item["algo"]), 3)
if __name__ == '__main__':