diff --git a/docs/zh/06-advanced/06-TDgpt/04-forecast/04-lstm.md b/docs/zh/06-advanced/06-TDgpt/04-forecast/04-lstm.md new file mode 100644 index 0000000000..dd00a18885 --- /dev/null +++ b/docs/zh/06-advanced/06-TDgpt/04-forecast/04-lstm.md @@ -0,0 +1,31 @@ +--- +title: "LSTM" +sidebar_label: "LSTM" +--- + +本节说明 LSTM 模型的使用方法。 + +## 功能概述 + +LSTM模型即长短期记忆网络(Long Short Term Memory),是一种特殊的循环神经网络,适用于处理时间序列数据、自然语言处理等任务,通过其独特的门控机制,能够有效捕捉长期依赖关系, +解决传统RNN的梯度消失问题,从而对序列数据进行准确预测,不过它不直接提供计算的置信区间范围结果。 + + +完整的调用SQL语句如下: +```SQL +SELECT _frowts, FORECAST(i32, "algo=lstm,alpha=95,period=10,start_p=1,max_p=5,start_q=1,max_q=5") from foo +``` + +```json5 +{ +"rows": fc_rows, // 返回结果的行数 +"period": period, // 返回结果的周期性,同输入 +"alpha": alpha, // 返回结果的置信区间,同输入 +"algo": "lstm", // 返回结果使用的算法 +"mse": mse, // 拟合输入时间序列时候生成模型的最小均方误差(MSE) +"res": res // 列模式的结果 +} +``` + +### 参考文献 +- [1] Hochreiter S. Long Short-term Memory[J]. Neural Computation MIT-Press, 1997. \ No newline at end of file diff --git a/docs/zh/06-advanced/06-TDgpt/05-anomaly-detection/04-machine-learning.md b/docs/zh/06-advanced/06-TDgpt/05-anomaly-detection/04-machine-learning.md index b752d446eb..80a5cbe972 100644 --- a/docs/zh/06-advanced/06-TDgpt/05-anomaly-detection/04-machine-learning.md +++ b/docs/zh/06-advanced/06-TDgpt/05-anomaly-detection/04-machine-learning.md @@ -3,7 +3,9 @@ title: "机器学习算法" sidebar_label: "机器学习算法" --- -Autoencoder[1]: TDgpt 内置使用自编码器(Autoencoder)的异常检测算法,对周期性的时间序列数据具有较好的检测结果。使用该模型需要针对输入时序数据进行预训练,同时将训练完成的模型保存在到服务目录 `ad_autoencoder` 中,然后在 SQL 语句中指定调用该算法模型即可使用。 +Autoencoder[1]: TDgpt 内置使用自编码器(Autoencoder)的异常检测算法, +对周期性的时间序列数据具有较好的检测结果。使用该模型需要针对输入时序数据进行预训练, +同时将训练完成的模型保存在到服务目录 `ad_autoencoder` 中,然后在 SQL 语句中指定调用该算法模型即可使用。 ```SQL --- 在 options 中增加 model 的名称,ad_autoencoder_foo, 针对 foo 数据集(表)训练的采用自编码器的异常检测模型进行异常检测 diff --git a/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.info b/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.info new file mode 100644 index 0000000000..0703c99255 Binary files /dev/null and b/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.info differ diff --git a/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.keras b/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.keras new file mode 100644 index 0000000000..43d90cb986 Binary files /dev/null and b/tools/tdgpt/model/sample-ad-autoencoder/sample-ad-autoencoder.keras differ diff --git a/tools/tdgpt/taosanalytics/algo/ad/autoencoder.py b/tools/tdgpt/taosanalytics/algo/ad/autoencoder.py index e58db3f54b..89813656fc 100644 --- a/tools/tdgpt/taosanalytics/algo/ad/autoencoder.py +++ b/tools/tdgpt/taosanalytics/algo/ad/autoencoder.py @@ -4,6 +4,7 @@ import os.path import joblib +import keras import numpy as np import pandas as pd @@ -13,8 +14,8 @@ from taosanalytics.util import create_sequences class _AutoEncoderDetectionService(AbstractAnomalyDetectionService): - name = 'ad_encoder' - desc = "anomaly detection based on auto encoder" + name = 'sample_ad_model' + desc = "sample anomaly detection model based on auto encoder" def __init__(self): super().__init__() @@ -25,7 +26,7 @@ class _AutoEncoderDetectionService(AbstractAnomalyDetectionService): self.threshold = None self.time_interval = None self.model = None - self.dir = 'ad_autoencoder' + self.dir = 'sample-ad-autoencoder' self.root_path = conf.get_model_directory() @@ -61,11 +62,6 @@ class _AutoEncoderDetectionService(AbstractAnomalyDetectionService): # Detect all the samples which are anomalies. anomalies = mae > self.threshold - # syslogger.log_inst( - # "Number of anomaly samples: %f, Indices of anomaly samples:{}". - # format(np.sum(anomalies), np.where(anomalies)) - # ) - # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies ad_indices = [] for data_idx in range(self.time_interval - 1, @@ -82,13 +78,13 @@ class _AutoEncoderDetectionService(AbstractAnomalyDetectionService): name = params['model'] - module_file_path = f'{self.root_path}/{name}.dat' + module_file_path = f'{self.root_path}/{name}.keras' module_info_path = f'{self.root_path}/{name}.info' app_logger.log_inst.info("try to load module:%s", module_file_path) if os.path.exists(module_file_path): - self.model = joblib.load(module_file_path) + self.model = keras.models.load_model(module_file_path) else: app_logger.log_inst.error("failed to load autoencoder model file: %s", module_file_path) raise FileNotFoundError(f"{module_file_path} not found") diff --git a/tools/tdgpt/taosanalytics/algo/fc/arima.py b/tools/tdgpt/taosanalytics/algo/fc/arima.py index 9e087a5e9e..fa587c3604 100644 --- a/tools/tdgpt/taosanalytics/algo/fc/arima.py +++ b/tools/tdgpt/taosanalytics/algo/fc/arima.py @@ -68,24 +68,6 @@ class _ArimaService(AbstractForecastService): fc = model.predict(n_periods=fc_rows, return_conf_int=self.return_conf, alpha=self.conf) - # plt.plot(source_data, label='training') - # plt.plot(xrange, actual_data, label='actual') - - # fc_list = fc.tolist() - # fc_without_diff = restore_from_diff(self.list, fc_list, 2) - # print(fc_without_diff) - - # plt.plot(xrange, fc_without_diff, label='fc') - - # residuals = pd.DataFrame(model.arima_res_.resid) - # wn = is_white_noise(residuals) - # print("residual is white noise:", wn) - - # fig, ax = plt.subplots(1, 2) - # residuals.plot(title="Residuals", ax=ax[0]) - # residuals.plot(kind='kde', title='Density', ax=ax[1]) - # plt.show() - res1 = [fc[0].tolist(), fc[1][:, 0].tolist(), fc[1][:, 1].tolist()] if self.return_conf else [fc.tolist()] diff --git a/tools/tdgpt/taosanalytics/algo/fc/lstm.py b/tools/tdgpt/taosanalytics/algo/fc/lstm.py new file mode 100644 index 0000000000..5edae7fc9f --- /dev/null +++ b/tools/tdgpt/taosanalytics/algo/fc/lstm.py @@ -0,0 +1,81 @@ +# encoding:utf-8 +# pylint: disable=c0103 +""" auto encoder algorithms to detect anomaly for time series data""" +import os.path + +import keras + +from taosanalytics.algo.forecast import insert_ts_list +from taosanalytics.conf import app_logger, conf +from taosanalytics.service import AbstractForecastService + + +class _LSTMService(AbstractForecastService): + name = 'sample_forecast_model' + desc = "sample forecast model based on LSTM" + + def __init__(self): + super().__init__() + + self.table_name = None + self.mean = None + self.std = None + self.threshold = None + self.time_interval = None + self.model = None + self.dir = 'sample-fc-lstm' + + self.root_path = conf.get_model_directory() + + self.root_path = self.root_path + f'/{self.dir}/' + + if not os.path.exists(self.root_path): + app_logger.log_inst.error( + "%s ad algorithm failed to locate default module directory:" + "%s, not active", self.__class__.__name__, self.root_path) + else: + app_logger.log_inst.info("%s ad algorithm root path is: %s", self.__class__.__name__, + self.root_path) + + def execute(self): + if self.input_is_empty(): + return [] + + if self.model is None: + raise FileNotFoundError("not load autoencoder model yet, or load model failed") + + res = self.model.predict(self.list) + + insert_ts_list(res, self.start_ts, self.time_step, self.fc_rows) + + if self.return_conf: + res1 = [res.tolist(), res.tolist(), res.tolist()], None + else: + res1 = [res.tolist()], None + + # add the conf range if required + return { + "mse": None, + "res": res1 + } + + def set_params(self, params): + + if "model" not in params: + raise ValueError("model needs to be specified") + + name = params['model'] + + module_file_path = f'{self.root_path}/{name}.keras' + # module_info_path = f'{self.root_path}/{name}.info' + + app_logger.log_inst.info("try to load module:%s", module_file_path) + + if os.path.exists(module_file_path): + self.model = keras.models.load_model(module_file_path) + else: + app_logger.log_inst.error("failed to load LSTM model file: %s", module_file_path) + raise FileNotFoundError(f"{module_file_path} not found") + + def get_params(self): + return {"dir": self.dir + '/*'} diff --git a/tools/tdgpt/taosanalytics/test/anomaly_test.py b/tools/tdgpt/taosanalytics/test/anomaly_test.py index f44a7f0d52..bc173cd25b 100644 --- a/tools/tdgpt/taosanalytics/test/anomaly_test.py +++ b/tools/tdgpt/taosanalytics/test/anomaly_test.py @@ -141,15 +141,14 @@ class AnomalyDetectionTest(unittest.TestCase): def test_autoencoder_ad(self): """for local test only, disabled it in github action""" - pass - + pass # data = self.__load_remote_data_for_ad() # - # s = loader.get_service("ad_encoder") + # s = loader.get_service("sample_ad_model") # s.set_input_list(data) # # try: - # s.set_params({"model": "ad_encoder_"}) + # s.set_params({"model": "sample-ad-autoencoder"}) # except ValueError as e: # app_logger.log_inst.error(f"failed to set the param for auto_encoder algorithm, reason:{e}") # return @@ -157,9 +156,9 @@ class AnomalyDetectionTest(unittest.TestCase): # r = s.execute() # # num_of_error = -(sum(filter(lambda x: x == -1, r))) - # self.assertEqual(num_of_error, 109) - # # draw_ad_results(data, r, "autoencoder") + # + # self.assertEqual(num_of_error, 109) def test_get_all_services(self): """Test get all services""" diff --git a/tools/tdgpt/taosanalytics/test/unit_test.py b/tools/tdgpt/taosanalytics/test/unit_test.py index f6ecdf0d5b..aef689a8b6 100644 --- a/tools/tdgpt/taosanalytics/test/unit_test.py +++ b/tools/tdgpt/taosanalytics/test/unit_test.py @@ -99,7 +99,7 @@ class ServiceTest(unittest.TestCase): if item["type"] == "anomaly-detection": self.assertEqual(len(item["algo"]), 6) else: - self.assertEqual(len(item["algo"]), 2) + self.assertEqual(len(item["algo"]), 3) if __name__ == '__main__':