diff --git a/APP_Framework/Applications/knowing_app/mnist/model.h b/APP_Framework/Applications/knowing_app/mnist/model.h index ad7b2c594..98f1b1b1e 100644 --- a/APP_Framework/Applications/knowing_app/mnist/model.h +++ b/APP_Framework/Applications/knowing_app/mnist/model.h @@ -1,4 +1,4 @@ -unsigned char mnist_model[] = { +const unsigned char mnist_model[] = { 0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x14, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x1c, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, diff --git a/APP_Framework/Framework/knowing/Kconfig b/APP_Framework/Framework/knowing/Kconfig index a13d6e2fe..13807b9ae 100644 --- a/APP_Framework/Framework/knowing/Kconfig +++ b/APP_Framework/Framework/knowing/Kconfig @@ -1,13 +1,13 @@ -menuconfig SUPPORT_KNOWING_FRAMEWORK - bool "support knowing framework" - default n - select TRANSFORM_LAYER_ATTRIUBUTE - -if SUPPORT_KNOWING_FRAMEWORK - source "$APP_DIR/Framework/knowing/tensorflow-lite/Kconfig" - source "$APP_DIR/Framework/knowing/filter/Kconfig" - source "$APP_DIR/Framework/knowing/ota/Kconfig" - source "$APP_DIR/Framework/knowing/image_processing/Kconfig" - source "$APP_DIR/Framework/knowing/cmsis_5/Kconfig" - source "$APP_DIR/Framework/knowing/kpu/Kconfig" -endif +menuconfig SUPPORT_KNOWING_FRAMEWORK + bool "support knowing framework" + default n + select TRANSFORM_LAYER_ATTRIUBUTE + +if SUPPORT_KNOWING_FRAMEWORK + source "$APP_DIR/Framework/knowing/tensorflow-lite/Kconfig" + source "$APP_DIR/Framework/knowing/filter/Kconfig" + source "$APP_DIR/Framework/knowing/ota/Kconfig" + source "$APP_DIR/Framework/knowing/image_processing/Kconfig" + source "$APP_DIR/Framework/knowing/cmsis_5/Kconfig" + source "$APP_DIR/Framework/knowing/kpu/Kconfig" +endif diff --git a/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c b/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c index d0420c239..38fabc951 100644 --- a/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c +++ b/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c @@ -27,7 +27,8 @@ * Target Processor: Cortex-M * * -------------------------------------------------------------------- */ - +#include +#include "../../../Core/Include/cmsis_gcc.h" #include "arm_nnsupportfunctions.h" /** diff --git a/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c b/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c index 9ee217935..72dc50a23 100644 --- a/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c +++ b/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c @@ -27,7 +27,8 @@ * Target Processor: Cortex-M * * -------------------------------------------------------------------- */ - +#include +#include "../../../Core/Include/cmsis_gcc.h" #include "arm_nnsupportfunctions.h" /** diff --git a/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c b/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c index 1e799ac39..994ba78a9 100644 --- a/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c +++ b/APP_Framework/Framework/knowing/cmsis_5/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c @@ -28,7 +28,8 @@ * Target Processor: Cortex-M * * -------------------------------------------------------------------- */ - +#include +#include "../../../Core/Include/cmsis_gcc.h" #include "arm_nnsupportfunctions.h" /** diff --git a/APP_Framework/Framework/knowing/kpu/k210_yolov2_detect_procedure/k210_yolov2_detect.c b/APP_Framework/Framework/knowing/kpu/k210_yolov2_detect_procedure/k210_yolov2_detect.c index b2d121509..5724e08bc 100644 --- a/APP_Framework/Framework/knowing/kpu/k210_yolov2_detect_procedure/k210_yolov2_detect.c +++ b/APP_Framework/Framework/knowing/kpu/k210_yolov2_detect_procedure/k210_yolov2_detect.c @@ -46,8 +46,8 @@ void k210_detect(char *json_file_path) printf("open ov2640 fail !!"); return; } - _ioctl_set_dvp_reso set_dvp_reso = {detect_params.sensor_output_size[1], detect_params.sensor_output_size[0]}; - ioctl(g_fd, IOCTRL_CAMERA_SET_DVP_RESO, &set_dvp_reso); + _ioctl_set_reso set_dvp_reso = {detect_params.sensor_output_size[1], detect_params.sensor_output_size[0]}; + ioctl(g_fd, IOCTRL_CAMERA_OUT_SIZE_RESO, &set_dvp_reso); showbuffer = (unsigned char *)rt_malloc_align(detect_params.sensor_output_size[0] * detect_params.sensor_output_size[1] * 2, 64); if (NULL == showbuffer) { diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/SConscript b/APP_Framework/Framework/knowing/tensorflow-lite/SConscript index dd1f8e3b9..5325b343d 100644 --- a/APP_Framework/Framework/knowing/tensorflow-lite/SConscript +++ b/APP_Framework/Framework/knowing/tensorflow-lite/SConscript @@ -102,75 +102,76 @@ tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/svdf.cc ''') cmsis = Split(''' -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c -tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c +../cmsis_5/NN/Source/ActivationFunctions/arm_nn_activations_q15.c +../cmsis_5/NN/Source/ActivationFunctions/arm_nn_activations_q7.c +../cmsis_5/NN/Source/ActivationFunctions/arm_relu6_s8.c +../cmsis_5/NN/Source/ActivationFunctions/arm_relu_q15.c +../cmsis_5/NN/Source/ActivationFunctions/arm_relu_q7.c +../cmsis_5/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c +../cmsis_5/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c +../cmsis_5/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c +../cmsis_5/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c +../cmsis_5/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c +../cmsis_5/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_s8.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c +../cmsis_5/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c +../cmsis_5/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c +../cmsis_5/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c +../cmsis_5/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c +../cmsis_5/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c +../cmsis_5/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c +../cmsis_5/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c +../cmsis_5/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_add_q7.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nntables.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c +../cmsis_5/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c +../cmsis_5/NN/Source/PoolingFunctions/arm_avgpool_s8.c +../cmsis_5/NN/Source/PoolingFunctions/arm_max_pool_s8.c +../cmsis_5/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c +../cmsis_5/NN/Source/ReshapeFunctions/arm_reshape_s8.c +../cmsis_5/NN/Source/SVDFunctions/arm_svdf_s8.c +../cmsis_5/NN/Source/SoftmaxFunctions/arm_softmax_q15.c +../cmsis_5/NN/Source/SoftmaxFunctions/arm_softmax_q7.c +../cmsis_5/NN/Source/SoftmaxFunctions/arm_softmax_s8.c +../cmsis_5/NN/Source/SoftmaxFunctions/arm_softmax_u8.c +../cmsis_5/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c ''') CPPPATH = [ @@ -179,10 +180,10 @@ CPPPATH = [ os.path.join(cwd, 'tensorflow-lite-for-mcu/source/third_party/gemmlowp'), os.path.join(cwd, 'tensorflow-lite-for-mcu/source/third_party/flatbuffers/include'), os.path.join(cwd, 'tensorflow-lite-for-mcu/source/third_party/ruy'), - os.path.join(cwd, 'tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis'), - os.path.join(cwd, 'tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include'), - os.path.join(cwd, 'tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include'), - os.path.join(cwd, 'tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/Core/Include'), + os.path.join(cwd, '../cmsis_5/'), + os.path.join(cwd, '../cmsis_5/DSP/Include'), + os.path.join(cwd, '../cmsis_5/NN/Include'), + os.path.join(cwd, '../cmsis_5/Core/Include'), ] # embedded C++ std don't have some math functions, use global math functions instead diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/kernels/internal/compatibility.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/kernels/internal/compatibility.h index 61becad30..835203a85 100644 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/kernels/internal/compatibility.h +++ b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/kernels/internal/compatibility.h @@ -76,6 +76,7 @@ limitations under the License. #define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT #endif +/* #ifndef TF_LITE_STATIC_MEMORY // TODO(b/162019032): Consider removing these type-aliases. using int8 = std::int8_t; @@ -85,6 +86,7 @@ using uint16 = std::uint16_t; using int32 = std::int32_t; using uint32 = std::uint32_t; #endif // !defined(TF_LITE_STATIC_MEMORY) +*/ // TFLITE_DEPRECATED() // diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/add.cc b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/add.cc index 2816e1182..8a6525e87 100644 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/add.cc +++ b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/add.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/add.h" -#include "CMSIS/NN/Include/arm_nnfunctions.h" +#include "../../../../../../../../cmsis_5/NN/Include/arm_nnfunctions.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/kernels/internal/quantization_util.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h" diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc index b13149d6d..172150d9a 100644 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc +++ b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc @@ -15,8 +15,8 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/conv.h" -#include "CMSIS/NN/Include/arm_nn_types.h" -#include "CMSIS/NN/Include/arm_nnfunctions.h" +#include "../../../../../../../../cmsis_5/NN/Include/arm_nn_types.h" +#include "../../../../../../../../cmsis_5/NN/Include/arm_nnfunctions.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc index 7715dbe46..3dffb0b81 100644 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc +++ b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" -#include "CMSIS/NN/Include/arm_nnfunctions.h" +#include "../../../../../../../../cmsis_5/NN/Include/arm_nnfunctions.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc index 11a0f0bdc..3516dafef 100644 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc +++ b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/fully_connected.h" -#include "CMSIS/NN/Include/arm_nnfunctions.h" +#include "../../../../../../../../cmsis_5/NN/Include/arm_nnfunctions.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc index 20686500a..eaf649ac2 100644 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc +++ b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/mul.h" -#include "CMSIS/NN/Include/arm_nnfunctions.h" +#include "../../../../../../../../cmsis_5/NN/Include/arm_nnfunctions.h" #include "tensorflow/lite/kernels/internal/quantization_util.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" #include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc index e1ac2b595..910b5eec8 100644 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc +++ b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc @@ -14,7 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/lite/kernels/internal/reference/pooling.h" -#include "CMSIS/NN/Include/arm_nnfunctions.h" +#include "../../../../../../../../cmsis_5/NN/Include/arm_nnfunctions.h" #include "flatbuffers/base.h" // from @flatbuffers #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h" diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc index 13a90c607..b1d7580d2 100644 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc +++ b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/softmax.h" -#include "CMSIS/NN/Include/arm_nnfunctions.h" +#include "../../../../../../../../cmsis_5/NN/Include/arm_nnfunctions.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" #include "tensorflow/lite/kernels/internal/quantization_util.h" diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/svdf.cc b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/svdf.cc index f4ee0c73c..58134683a 100644 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/svdf.cc +++ b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/svdf.cc @@ -16,8 +16,8 @@ limitations under the License. #include #include -#include "CMSIS/NN/Include/arm_nn_types.h" -#include "CMSIS/NN/Include/arm_nnfunctions.h" +#include "../../../../../../../../cmsis_5/NN/Include/arm_nn_types.h" +#include "../../../../../../../../cmsis_5/NN/Include/arm_nnfunctions.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/Core/Include/cmsis_compiler.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/Core/Include/cmsis_compiler.h deleted file mode 100644 index adbf296f1..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/Core/Include/cmsis_compiler.h +++ /dev/null @@ -1,283 +0,0 @@ -/**************************************************************************//** - * @file cmsis_compiler.h - * @brief CMSIS compiler generic header file - * @version V5.1.0 - * @date 09. October 2018 - ******************************************************************************/ -/* - * Copyright (c) 2009-2018 Arm Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CMSIS_COMPILER_H -#define __CMSIS_COMPILER_H - -#include - -/* - * Arm Compiler 4/5 - */ -#if defined ( __CC_ARM ) - #include "cmsis_armcc.h" - - -/* - * Arm Compiler 6.6 LTM (armclang) - */ -#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) && (__ARMCC_VERSION < 6100100) - #include "cmsis_armclang_ltm.h" - - /* - * Arm Compiler above 6.10.1 (armclang) - */ -#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) - #include "cmsis_armclang.h" - - -/* - * GNU Compiler - */ -#elif defined ( __GNUC__ ) - #include "cmsis_gcc.h" - - -/* - * IAR Compiler - */ -#elif defined ( __ICCARM__ ) - #include - - -/* - * TI Arm Compiler - */ -#elif defined ( __TI_ARM__ ) - #include - - #ifndef __ASM - #define __ASM __asm - #endif - #ifndef __INLINE - #define __INLINE inline - #endif - #ifndef __STATIC_INLINE - #define __STATIC_INLINE static inline - #endif - #ifndef __STATIC_FORCEINLINE - #define __STATIC_FORCEINLINE __STATIC_INLINE - #endif - #ifndef __NO_RETURN - #define __NO_RETURN __attribute__((noreturn)) - #endif - #ifndef __USED - #define __USED __attribute__((used)) - #endif - #ifndef __WEAK - #define __WEAK __attribute__((weak)) - #endif - #ifndef __PACKED - #define __PACKED __attribute__((packed)) - #endif - #ifndef __PACKED_STRUCT - #define __PACKED_STRUCT struct __attribute__((packed)) - #endif - #ifndef __PACKED_UNION - #define __PACKED_UNION union __attribute__((packed)) - #endif - #ifndef __UNALIGNED_UINT32 /* deprecated */ - struct __attribute__((packed)) T_UINT32 { uint32_t v; }; - #define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v) - #endif - #ifndef __UNALIGNED_UINT16_WRITE - __PACKED_STRUCT T_UINT16_WRITE { uint16_t v; }; - #define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void*)(addr))->v) = (val)) - #endif - #ifndef __UNALIGNED_UINT16_READ - __PACKED_STRUCT T_UINT16_READ { uint16_t v; }; - #define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v) - #endif - #ifndef __UNALIGNED_UINT32_WRITE - __PACKED_STRUCT T_UINT32_WRITE { uint32_t v; }; - #define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val)) - #endif - #ifndef __UNALIGNED_UINT32_READ - __PACKED_STRUCT T_UINT32_READ { uint32_t v; }; - #define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v) - #endif - #ifndef __ALIGNED - #define __ALIGNED(x) __attribute__((aligned(x))) - #endif - #ifndef __RESTRICT - #define __RESTRICT __restrict - #endif - #ifndef __COMPILER_BARRIER - #warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored. - #define __COMPILER_BARRIER() (void)0 - #endif - - -/* - * TASKING Compiler - */ -#elif defined ( __TASKING__ ) - /* - * The CMSIS functions have been implemented as intrinsics in the compiler. - * Please use "carm -?i" to get an up to date list of all intrinsics, - * Including the CMSIS ones. - */ - - #ifndef __ASM - #define __ASM __asm - #endif - #ifndef __INLINE - #define __INLINE inline - #endif - #ifndef __STATIC_INLINE - #define __STATIC_INLINE static inline - #endif - #ifndef __STATIC_FORCEINLINE - #define __STATIC_FORCEINLINE __STATIC_INLINE - #endif - #ifndef __NO_RETURN - #define __NO_RETURN __attribute__((noreturn)) - #endif - #ifndef __USED - #define __USED __attribute__((used)) - #endif - #ifndef __WEAK - #define __WEAK __attribute__((weak)) - #endif - #ifndef __PACKED - #define __PACKED __packed__ - #endif - #ifndef __PACKED_STRUCT - #define __PACKED_STRUCT struct __packed__ - #endif - #ifndef __PACKED_UNION - #define __PACKED_UNION union __packed__ - #endif - #ifndef __UNALIGNED_UINT32 /* deprecated */ - struct __packed__ T_UINT32 { uint32_t v; }; - #define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v) - #endif - #ifndef __UNALIGNED_UINT16_WRITE - __PACKED_STRUCT T_UINT16_WRITE { uint16_t v; }; - #define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val)) - #endif - #ifndef __UNALIGNED_UINT16_READ - __PACKED_STRUCT T_UINT16_READ { uint16_t v; }; - #define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v) - #endif - #ifndef __UNALIGNED_UINT32_WRITE - __PACKED_STRUCT T_UINT32_WRITE { uint32_t v; }; - #define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val)) - #endif - #ifndef __UNALIGNED_UINT32_READ - __PACKED_STRUCT T_UINT32_READ { uint32_t v; }; - #define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v) - #endif - #ifndef __ALIGNED - #define __ALIGNED(x) __align(x) - #endif - #ifndef __RESTRICT - #warning No compiler specific solution for __RESTRICT. __RESTRICT is ignored. - #define __RESTRICT - #endif - #ifndef __COMPILER_BARRIER - #warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored. - #define __COMPILER_BARRIER() (void)0 - #endif - - -/* - * COSMIC Compiler - */ -#elif defined ( __CSMC__ ) - #include - - #ifndef __ASM - #define __ASM _asm - #endif - #ifndef __INLINE - #define __INLINE inline - #endif - #ifndef __STATIC_INLINE - #define __STATIC_INLINE static inline - #endif - #ifndef __STATIC_FORCEINLINE - #define __STATIC_FORCEINLINE __STATIC_INLINE - #endif - #ifndef __NO_RETURN - // NO RETURN is automatically detected hence no warning here - #define __NO_RETURN - #endif - #ifndef __USED - #warning No compiler specific solution for __USED. __USED is ignored. - #define __USED - #endif - #ifndef __WEAK - #define __WEAK __weak - #endif - #ifndef __PACKED - #define __PACKED @packed - #endif - #ifndef __PACKED_STRUCT - #define __PACKED_STRUCT @packed struct - #endif - #ifndef __PACKED_UNION - #define __PACKED_UNION @packed union - #endif - #ifndef __UNALIGNED_UINT32 /* deprecated */ - @packed struct T_UINT32 { uint32_t v; }; - #define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v) - #endif - #ifndef __UNALIGNED_UINT16_WRITE - __PACKED_STRUCT T_UINT16_WRITE { uint16_t v; }; - #define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val)) - #endif - #ifndef __UNALIGNED_UINT16_READ - __PACKED_STRUCT T_UINT16_READ { uint16_t v; }; - #define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v) - #endif - #ifndef __UNALIGNED_UINT32_WRITE - __PACKED_STRUCT T_UINT32_WRITE { uint32_t v; }; - #define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val)) - #endif - #ifndef __UNALIGNED_UINT32_READ - __PACKED_STRUCT T_UINT32_READ { uint32_t v; }; - #define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v) - #endif - #ifndef __ALIGNED - #warning No compiler specific solution for __ALIGNED. __ALIGNED is ignored. - #define __ALIGNED(x) - #endif - #ifndef __RESTRICT - #warning No compiler specific solution for __RESTRICT. __RESTRICT is ignored. - #define __RESTRICT - #endif - #ifndef __COMPILER_BARRIER - #warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored. - #define __COMPILER_BARRIER() (void)0 - #endif - - -#else - #error Unknown compiler. -#endif - - -#endif /* __CMSIS_COMPILER_H */ - diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_common_tables.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_common_tables.h deleted file mode 100644 index 40b351b4d..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_common_tables.h +++ /dev/null @@ -1,529 +0,0 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_common_tables.h - * Description: Extern declaration for common tables - * - * $Date: 27. January 2017 - * $Revision: V.1.5.1 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _ARM_COMMON_TABLES_H -#define _ARM_COMMON_TABLES_H - -#include "arm_math_types.h" -#include "dsp/fast_math_functions.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) - /* Double Precision Float CFFT twiddles */ - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024) - extern const uint16_t armBitRevTable[1024]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_16) - extern const uint64_t twiddleCoefF64_16[32]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_32) - extern const uint64_t twiddleCoefF64_32[64]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_64) - extern const uint64_t twiddleCoefF64_64[128]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_128) - extern const uint64_t twiddleCoefF64_128[256]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_256) - extern const uint64_t twiddleCoefF64_256[512]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_512) - extern const uint64_t twiddleCoefF64_512[1024]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_1024) - extern const uint64_t twiddleCoefF64_1024[2048]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_2048) - extern const uint64_t twiddleCoefF64_2048[4096]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_4096) - extern const uint64_t twiddleCoefF64_4096[8192]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_16) - extern const float32_t twiddleCoef_16[32]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_32) - extern const float32_t twiddleCoef_32[64]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_64) - extern const float32_t twiddleCoef_64[128]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_128) - extern const float32_t twiddleCoef_128[256]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_256) - extern const float32_t twiddleCoef_256[512]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_512) - extern const float32_t twiddleCoef_512[1024]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_1024) - extern const float32_t twiddleCoef_1024[2048]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_2048) - extern const float32_t twiddleCoef_2048[4096]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096) - extern const float32_t twiddleCoef_4096[8192]; - #define twiddleCoef twiddleCoef_4096 - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - /* Q31 */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_16) - extern const q31_t twiddleCoef_16_q31[24]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_32) - extern const q31_t twiddleCoef_32_q31[48]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_64) - extern const q31_t twiddleCoef_64_q31[96]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_128) - extern const q31_t twiddleCoef_128_q31[192]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_256) - extern const q31_t twiddleCoef_256_q31[384]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_512) - extern const q31_t twiddleCoef_512_q31[768]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) - extern const q31_t twiddleCoef_1024_q31[1536]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) - extern const q31_t twiddleCoef_2048_q31[3072]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) - extern const q31_t twiddleCoef_4096_q31[6144]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_16) - extern const q15_t twiddleCoef_16_q15[24]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_32) - extern const q15_t twiddleCoef_32_q15[48]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_64) - extern const q15_t twiddleCoef_64_q15[96]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_128) - extern const q15_t twiddleCoef_128_q15[192]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_256) - extern const q15_t twiddleCoef_256_q15[384]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_512) - extern const q15_t twiddleCoef_512_q15[768]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) - extern const q15_t twiddleCoef_1024_q15[1536]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) - extern const q15_t twiddleCoef_2048_q15[3072]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) - extern const q15_t twiddleCoef_4096_q15[6144]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - /* Double Precision Float RFFT twiddles */ - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_32) - extern const uint64_t twiddleCoefF64_rfft_32[32]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_64) - extern const uint64_t twiddleCoefF64_rfft_64[64]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_128) - extern const uint64_t twiddleCoefF64_rfft_128[128]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_256) - extern const uint64_t twiddleCoefF64_rfft_256[256]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_512) - extern const uint64_t twiddleCoefF64_rfft_512[512]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_1024) - extern const uint64_t twiddleCoefF64_rfft_1024[1024]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_2048) - extern const uint64_t twiddleCoefF64_rfft_2048[2048]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_4096) - extern const uint64_t twiddleCoefF64_rfft_4096[4096]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32) - extern const float32_t twiddleCoef_rfft_32[32]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64) - extern const float32_t twiddleCoef_rfft_64[64]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128) - extern const float32_t twiddleCoef_rfft_128[128]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256) - extern const float32_t twiddleCoef_rfft_256[256]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512) - extern const float32_t twiddleCoef_rfft_512[512]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024) - extern const float32_t twiddleCoef_rfft_1024[1024]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048) - extern const float32_t twiddleCoef_rfft_2048[2048]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096) - extern const float32_t twiddleCoef_rfft_4096[4096]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - - /* Double precision floating-point bit reversal tables */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_16) - #define ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH ((uint16_t)12) - extern const uint16_t armBitRevIndexTableF64_16[ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_32) - #define ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH ((uint16_t)24) - extern const uint16_t armBitRevIndexTableF64_32[ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_64) - #define ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH ((uint16_t)56) - extern const uint16_t armBitRevIndexTableF64_64[ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_128) - #define ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH ((uint16_t)112) - extern const uint16_t armBitRevIndexTableF64_128[ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_256) - #define ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH ((uint16_t)240) - extern const uint16_t armBitRevIndexTableF64_256[ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_512) - #define ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH ((uint16_t)480) - extern const uint16_t armBitRevIndexTableF64_512[ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_1024) - #define ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH ((uint16_t)992) - extern const uint16_t armBitRevIndexTableF64_1024[ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_2048) - #define ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH ((uint16_t)1984) - extern const uint16_t armBitRevIndexTableF64_2048[ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_4096) - #define ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH ((uint16_t)4032) - extern const uint16_t armBitRevIndexTableF64_4096[ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - /* floating-point bit reversal tables */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_16) - #define ARMBITREVINDEXTABLE_16_TABLE_LENGTH ((uint16_t)20) - extern const uint16_t armBitRevIndexTable16[ARMBITREVINDEXTABLE_16_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_32) - #define ARMBITREVINDEXTABLE_32_TABLE_LENGTH ((uint16_t)48) - extern const uint16_t armBitRevIndexTable32[ARMBITREVINDEXTABLE_32_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_64) - #define ARMBITREVINDEXTABLE_64_TABLE_LENGTH ((uint16_t)56) - extern const uint16_t armBitRevIndexTable64[ARMBITREVINDEXTABLE_64_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_128) - #define ARMBITREVINDEXTABLE_128_TABLE_LENGTH ((uint16_t)208) - extern const uint16_t armBitRevIndexTable128[ARMBITREVINDEXTABLE_128_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_256) - #define ARMBITREVINDEXTABLE_256_TABLE_LENGTH ((uint16_t)440) - extern const uint16_t armBitRevIndexTable256[ARMBITREVINDEXTABLE_256_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_512) - #define ARMBITREVINDEXTABLE_512_TABLE_LENGTH ((uint16_t)448) - extern const uint16_t armBitRevIndexTable512[ARMBITREVINDEXTABLE_512_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_1024) - #define ARMBITREVINDEXTABLE_1024_TABLE_LENGTH ((uint16_t)1800) - extern const uint16_t armBitRevIndexTable1024[ARMBITREVINDEXTABLE_1024_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_2048) - #define ARMBITREVINDEXTABLE_2048_TABLE_LENGTH ((uint16_t)3808) - extern const uint16_t armBitRevIndexTable2048[ARMBITREVINDEXTABLE_2048_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_4096) - #define ARMBITREVINDEXTABLE_4096_TABLE_LENGTH ((uint16_t)4032) - extern const uint16_t armBitRevIndexTable4096[ARMBITREVINDEXTABLE_4096_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - - /* fixed-point bit reversal tables */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16) - #define ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH ((uint16_t)12) - extern const uint16_t armBitRevIndexTable_fixed_16[ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_32) - #define ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH ((uint16_t)24) - extern const uint16_t armBitRevIndexTable_fixed_32[ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64) - #define ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH ((uint16_t)56) - extern const uint16_t armBitRevIndexTable_fixed_64[ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_128) - #define ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH ((uint16_t)112) - extern const uint16_t armBitRevIndexTable_fixed_128[ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256) - #define ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH ((uint16_t)240) - extern const uint16_t armBitRevIndexTable_fixed_256[ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_512) - #define ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH ((uint16_t)480) - extern const uint16_t armBitRevIndexTable_fixed_512[ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024) - #define ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH ((uint16_t)992) - extern const uint16_t armBitRevIndexTable_fixed_1024[ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_2048) - #define ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH ((uint16_t)1984) - extern const uint16_t armBitRevIndexTable_fixed_2048[ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096) - #define ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH ((uint16_t)4032) - extern const uint16_t armBitRevIndexTable_fixed_4096[ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_F32) - extern const float32_t realCoefA[8192]; - extern const float32_t realCoefB[8192]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_Q31) - extern const q31_t realCoefAQ31[8192]; - extern const q31_t realCoefBQ31[8192]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_Q15) - extern const q15_t realCoefAQ15[8192]; - extern const q15_t realCoefBQ15[8192]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_128) - extern const float32_t Weights_128[256]; - extern const float32_t cos_factors_128[128]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_512) - extern const float32_t Weights_512[1024]; - extern const float32_t cos_factors_512[512]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_2048) - extern const float32_t Weights_2048[4096]; - extern const float32_t cos_factors_2048[2048]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_8192) - extern const float32_t Weights_8192[16384]; - extern const float32_t cos_factors_8192[8192]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_128) - extern const q15_t WeightsQ15_128[256]; - extern const q15_t cos_factorsQ15_128[128]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_512) - extern const q15_t WeightsQ15_512[1024]; - extern const q15_t cos_factorsQ15_512[512]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_2048) - extern const q15_t WeightsQ15_2048[4096]; - extern const q15_t cos_factorsQ15_2048[2048]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_8192) - extern const q15_t WeightsQ15_8192[16384]; - extern const q15_t cos_factorsQ15_8192[8192]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_128) - extern const q31_t WeightsQ31_128[256]; - extern const q31_t cos_factorsQ31_128[128]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_512) - extern const q31_t WeightsQ31_512[1024]; - extern const q31_t cos_factorsQ31_512[512]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_2048) - extern const q31_t WeightsQ31_2048[4096]; - extern const q31_t cos_factorsQ31_2048[2048]; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_8192) - extern const q31_t WeightsQ31_8192[16384]; - extern const q31_t cos_factorsQ31_8192[8192]; - #endif - -#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_TABLES) */ - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_ALLOW_TABLES) - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_RECIP_Q15) - extern const q15_t armRecipTableQ15[64]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_RECIP_Q31) - extern const q31_t armRecipTableQ31[64]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ - - /* Tables for Fast Math Sine and Cosine */ - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_F32) - extern const float32_t sinTable_f32[FAST_MATH_TABLE_SIZE + 1]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q31) - extern const q31_t sinTable_q31[FAST_MATH_TABLE_SIZE + 1]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q15) - extern const q15_t sinTable_q15[FAST_MATH_TABLE_SIZE + 1]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ - - #if defined(ARM_MATH_MVEI) - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE) - extern const q31_t sqrtTable_Q31[256]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ - #endif - - #if defined(ARM_MATH_MVEI) - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q15_MVE) - extern const q15_t sqrtTable_Q15[256]; - #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ - #endif - -#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_TABLES) */ - -#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) - extern const float32_t exp_tab[8]; - extern const float32_t __logf_lut_f32[8]; -#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */ - -#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) -extern const unsigned char hwLUT[256]; -#endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) */ - -#ifdef __cplusplus -} -#endif - -#endif /* ARM_COMMON_TABLES_H */ - diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_helium_utils.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_helium_utils.h deleted file mode 100644 index 915ad7caa..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_helium_utils.h +++ /dev/null @@ -1,748 +0,0 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_helium_utils.h - * Description: Utility functions for Helium development - * - * $Date: 09. September 2019 - * $Revision: V.1.5.1 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _ARM_UTILS_HELIUM_H_ -#define _ARM_UTILS_HELIUM_H_ - - -#ifdef __cplusplus -extern "C" -{ -#endif -/*************************************** - -Definitions available for MVEF and MVEI - -***************************************/ -#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) - -#define INACTIVELANE 0 /* inactive lane content */ - - -#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) */ - -/*************************************** - -Definitions available for MVEF only - -***************************************/ -#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) - -__STATIC_FORCEINLINE float32_t vecAddAcrossF32Mve(float32x4_t in) -{ - float32_t acc; - - acc = vgetq_lane(in, 0) + vgetq_lane(in, 1) + - vgetq_lane(in, 2) + vgetq_lane(in, 3); - - return acc; -} - -__STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in) -{ - float16x8_t tmpVec; - _Float16 acc; - - tmpVec = (float16x8_t) vrev32q_s16((int16x8_t) in); - in = vaddq_f16(tmpVec, in); - tmpVec = (float16x8_t) vrev64q_s32((int32x4_t) in); - in = vaddq_f16(tmpVec, in); - acc = (_Float16)vgetq_lane_f16(in, 0) + (_Float16)vgetq_lane_f16(in, 4); - - return acc; -} - - -/* newton initial guess */ -#define INVSQRT_MAGIC_F32 0x5f3759df -#define INV_NEWTON_INIT_F32 0x7EF127EA - - -#define INVSQRT_NEWTON_MVE_F32(invSqrt, xHalf, xStart)\ -{ \ - float32x4_t tmp; \ - \ - /* tmp = xhalf * x * x */ \ - tmp = vmulq(xStart, xStart); \ - tmp = vmulq(tmp, xHalf); \ - /* (1.5f - xhalf * x * x) */ \ - tmp = vsubq(vdupq_n_f32(1.5f), tmp); \ - /* x = x*(1.5f-xhalf*x*x); */ \ - invSqrt = vmulq(tmp, xStart); \ -} -#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) */ - - -/*************************************** - -Definitions available for f16 datatype with HW acceleration only - -***************************************/ -#if defined (ARM_MATH_MVE_FLOAT16) -__STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16( - float16x8_t vecIn) -{ - float16x8_t vecTmp, vecOut; - uint32_t tmp; - - vecTmp = (float16x8_t) vrev64q_s32((int32x4_t) vecIn); - // TO TRACK : using canonical addition leads to unefficient code generation for f16 - // vecTmp = vecTmp + vecAccCpx0; - /* - * Compute - * re0+re1 | im0+im1 | re0+re1 | im0+im1 - * re2+re3 | im2+im3 | re2+re3 | im2+im3 - */ - vecTmp = vaddq(vecTmp, vecIn); - vecOut = vecTmp; - /* - * shift left, random tmp insertion in bottom - */ - vecOut = vreinterpretq_f16_s32(vshlcq_s32(vreinterpretq_s32_f16(vecOut) , &tmp, 32)); - /* - * Compute: - * DONTCARE | DONTCARE | re0+re1+re0+re1 |im0+im1+im0+im1 - * re0+re1+re2+re3 | im0+im1+im2+im3 | re2+re3+re2+re3 |im2+im3+im2+im3 - */ - vecOut = vaddq(vecOut, vecTmp); - /* - * Cmplx sum is in 4rd & 5th f16 elt - * return full vector - */ - return vecOut; -} - - -#define mve_cmplx_sum_intra_r_i_f16(vec, Re, Im) \ -{ \ - float16x8_t vecOut = __mve_cmplx_sum_intra_vec_f16(vec); \ - Re = vgetq_lane(vecOut, 4); \ - Im = vgetq_lane(vecOut, 5); \ -} - -__STATIC_FORCEINLINE void mve_cmplx_sum_intra_vec_f16( - float16x8_t vecIn, - float16_t *pOut) -{ - float16x8_t vecOut = __mve_cmplx_sum_intra_vec_f16(vecIn); - /* - * Cmplx sum is in 4rd & 5th f16 elt - * use 32-bit extraction - */ - *(float32_t *) pOut = ((float32x4_t) vecOut)[2]; -} - - -#define INVSQRT_MAGIC_F16 0x59ba /* ( 0x1ba = 0x3759df >> 13) */ - -/* canonical version of INVSQRT_NEWTON_MVE_F16 leads to bad performance */ -#define INVSQRT_NEWTON_MVE_F16(invSqrt, xHalf, xStart) \ -{ \ - float16x8_t tmp; \ - \ - /* tmp = xhalf * x * x */ \ - tmp = vmulq(xStart, xStart); \ - tmp = vmulq(tmp, xHalf); \ - /* (1.5f - xhalf * x * x) */ \ - tmp = vsubq(vdupq_n_f16((float16_t)1.5), tmp); \ - /* x = x*(1.5f-xhalf*x*x); */ \ - invSqrt = vmulq(tmp, xStart); \ -} - -#endif - -/*************************************** - -Definitions available for MVEI and MVEF only - -***************************************/ -#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) -/* Following functions are used to transpose matrix in f32 and q31 cases */ -__STATIC_INLINE arm_status arm_mat_trans_32bit_2x2_mve( - uint32_t * pDataSrc, - uint32_t * pDataDest) -{ - static const uint32x4_t vecOffs = { 0, 2, 1, 3 }; - /* - * - * | 0 1 | => | 0 2 | - * | 2 3 | | 1 3 | - * - */ - uint32x4_t vecIn = vldrwq_u32((uint32_t const *)pDataSrc); - vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs, vecIn); - - return (ARM_MATH_SUCCESS); -} - -__STATIC_INLINE arm_status arm_mat_trans_32bit_3x3_mve( - uint32_t * pDataSrc, - uint32_t * pDataDest) -{ - const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; - const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; - /* - * - * | 0 1 2 | | 0 3 6 | 4 x 32 flattened version | 0 3 6 1 | - * | 3 4 5 | => | 1 4 7 | => | 4 7 2 5 | - * | 6 7 8 | | 2 5 8 | (row major) | 8 . . . | - * - */ - uint32x4_t vecIn1 = vldrwq_u32((uint32_t const *) pDataSrc); - uint32x4_t vecIn2 = vldrwq_u32((uint32_t const *) &pDataSrc[4]); - - vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs1, vecIn1); - vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs2, vecIn2); - - pDataDest[8] = pDataSrc[8]; - - return (ARM_MATH_SUCCESS); -} - -__STATIC_INLINE arm_status arm_mat_trans_32bit_4x4_mve(uint32_t * pDataSrc, uint32_t * pDataDest) -{ - /* - * 4x4 Matrix transposition - * is 4 x de-interleave operation - * - * 0 1 2 3 0 4 8 12 - * 4 5 6 7 1 5 9 13 - * 8 9 10 11 2 6 10 14 - * 12 13 14 15 3 7 11 15 - */ - - uint32x4x4_t vecIn; - - vecIn = vld4q((uint32_t const *) pDataSrc); - vstrwq(pDataDest, vecIn.val[0]); - pDataDest += 4; - vstrwq(pDataDest, vecIn.val[1]); - pDataDest += 4; - vstrwq(pDataDest, vecIn.val[2]); - pDataDest += 4; - vstrwq(pDataDest, vecIn.val[3]); - - return (ARM_MATH_SUCCESS); -} - - -__STATIC_INLINE arm_status arm_mat_trans_32bit_generic_mve( - uint16_t srcRows, - uint16_t srcCols, - uint32_t * pDataSrc, - uint32_t * pDataDest) -{ - uint32x4_t vecOffs; - uint32_t i; - uint32_t blkCnt; - uint32_t const *pDataC; - uint32_t *pDataDestR; - uint32x4_t vecIn; - - vecOffs = vidupq_u32((uint32_t)0, 1); - vecOffs = vecOffs * srcCols; - - i = srcCols; - do - { - pDataC = (uint32_t const *) pDataSrc; - pDataDestR = pDataDest; - - blkCnt = srcRows >> 2; - while (blkCnt > 0U) - { - vecIn = vldrwq_gather_shifted_offset_u32(pDataC, vecOffs); - vstrwq(pDataDestR, vecIn); - pDataDestR += 4; - pDataC = pDataC + srcCols * 4; - /* - * Decrement the blockSize loop counter - */ - blkCnt--; - } - - /* - * tail - */ - blkCnt = srcRows & 3; - if (blkCnt > 0U) - { - mve_pred16_t p0 = vctp32q(blkCnt); - vecIn = vldrwq_gather_shifted_offset_u32(pDataC, vecOffs); - vstrwq_p(pDataDestR, vecIn, p0); - } - - pDataSrc += 1; - pDataDest += srcRows; - } - while (--i); - - return (ARM_MATH_SUCCESS); -} - -__STATIC_INLINE arm_status arm_mat_cmplx_trans_32bit( - uint16_t srcRows, - uint16_t srcCols, - uint32_t *pDataSrc, - uint16_t dstRows, - uint16_t dstCols, - uint32_t *pDataDest) -{ - uint32_t i; - uint32_t const *pDataC; - uint32_t *pDataRow; - uint32_t *pDataDestR, *pDataDestRow; - uint32x4_t vecOffsRef, vecOffsCur; - uint32_t blkCnt; - uint32x4_t vecIn; - -#ifdef ARM_MATH_MATRIX_CHECK - /* - * Check for matrix mismatch condition - */ - if ((srcRows != dstCols) || (srcCols != dstRows)) - { - /* - * Set status as ARM_MATH_SIZE_MISMATCH - */ - return = ARM_MATH_SIZE_MISMATCH; - } -#else - (void)dstRows; - (void)dstCols; -#endif - - /* 2x2, 3x3 and 4x4 specialization to be added */ - - vecOffsRef[0] = 0; - vecOffsRef[1] = 1; - vecOffsRef[2] = srcCols << 1; - vecOffsRef[3] = (srcCols << 1) + 1; - - pDataRow = pDataSrc; - pDataDestRow = pDataDest; - i = srcCols; - do - { - pDataC = (uint32_t const *) pDataRow; - pDataDestR = pDataDestRow; - vecOffsCur = vecOffsRef; - - blkCnt = (srcRows * CMPLX_DIM) >> 2; - while (blkCnt > 0U) - { - vecIn = vldrwq_gather_shifted_offset(pDataC, vecOffsCur); - vstrwq(pDataDestR, vecIn); - pDataDestR += 4; - vecOffsCur = vaddq(vecOffsCur, (srcCols << 2)); - /* - * Decrement the blockSize loop counter - */ - blkCnt--; - } - /* - * tail - * (will be merged thru tail predication) - */ - blkCnt = (srcRows * CMPLX_DIM) & 3; - if (blkCnt > 0U) - { - mve_pred16_t p0 = vctp32q(blkCnt); - vecIn = vldrwq_gather_shifted_offset(pDataC, vecOffsCur); - vstrwq_p(pDataDestR, vecIn, p0); - } - - pDataRow += CMPLX_DIM; - pDataDestRow += (srcRows * CMPLX_DIM); - } - while (--i); - - return (ARM_MATH_SUCCESS); -} - -__STATIC_INLINE arm_status arm_mat_trans_16bit_2x2(uint16_t * pDataSrc, uint16_t * pDataDest) -{ - pDataDest[0] = pDataSrc[0]; - pDataDest[3] = pDataSrc[3]; - pDataDest[2] = pDataSrc[1]; - pDataDest[1] = pDataSrc[2]; - - return (ARM_MATH_SUCCESS); -} - -__STATIC_INLINE arm_status arm_mat_trans_16bit_3x3_mve(uint16_t * pDataSrc, uint16_t * pDataDest) -{ - static const uint16_t stridesTr33[8] = { 0, 3, 6, 1, 4, 7, 2, 5 }; - uint16x8_t vecOffs1; - uint16x8_t vecIn1; - /* - * - * | 0 1 2 | | 0 3 6 | 8 x 16 flattened version | 0 3 6 1 4 7 2 5 | - * | 3 4 5 | => | 1 4 7 | => | 8 . . . . . . . | - * | 6 7 8 | | 2 5 8 | (row major) - * - */ - vecOffs1 = vldrhq_u16((uint16_t const *) stridesTr33); - vecIn1 = vldrhq_u16((uint16_t const *) pDataSrc); - - vstrhq_scatter_shifted_offset_u16(pDataDest, vecOffs1, vecIn1); - - pDataDest[8] = pDataSrc[8]; - - return (ARM_MATH_SUCCESS); -} - - -__STATIC_INLINE arm_status arm_mat_trans_16bit_4x4_mve(uint16_t * pDataSrc, uint16_t * pDataDest) -{ - static const uint16_t stridesTr44_1[8] = { 0, 4, 8, 12, 1, 5, 9, 13 }; - static const uint16_t stridesTr44_2[8] = { 2, 6, 10, 14, 3, 7, 11, 15 }; - uint16x8_t vecOffs1, vecOffs2; - uint16x8_t vecIn1, vecIn2; - uint16_t const * pDataSrcVec = (uint16_t const *) pDataSrc; - - /* - * 4x4 Matrix transposition - * - * | 0 1 2 3 | | 0 4 8 12 | 8 x 16 flattened version - * | 4 5 6 7 | => | 1 5 9 13 | => [0 4 8 12 1 5 9 13] - * | 8 9 10 11 | | 2 6 10 14 | [2 6 10 14 3 7 11 15] - * | 12 13 14 15 | | 3 7 11 15 | - */ - - vecOffs1 = vldrhq_u16((uint16_t const *) stridesTr44_1); - vecOffs2 = vldrhq_u16((uint16_t const *) stridesTr44_2); - vecIn1 = vldrhq_u16(pDataSrcVec); - pDataSrcVec += 8; - vecIn2 = vldrhq_u16(pDataSrcVec); - - vstrhq_scatter_shifted_offset_u16(pDataDest, vecOffs1, vecIn1); - vstrhq_scatter_shifted_offset_u16(pDataDest, vecOffs2, vecIn2); - - - return (ARM_MATH_SUCCESS); -} - - - -__STATIC_INLINE arm_status arm_mat_trans_16bit_generic( - uint16_t srcRows, - uint16_t srcCols, - uint16_t * pDataSrc, - uint16_t * pDataDest) -{ - uint16x8_t vecOffs; - uint32_t i; - uint32_t blkCnt; - uint16_t const *pDataC; - uint16_t *pDataDestR; - uint16x8_t vecIn; - - vecOffs = vidupq_u16((uint32_t)0, 1); - vecOffs = vecOffs * srcCols; - - i = srcCols; - while(i > 0U) - { - pDataC = (uint16_t const *) pDataSrc; - pDataDestR = pDataDest; - - blkCnt = srcRows >> 3; - while (blkCnt > 0U) - { - vecIn = vldrhq_gather_shifted_offset_u16(pDataC, vecOffs); - vstrhq_u16(pDataDestR, vecIn); - pDataDestR += 8; - pDataC = pDataC + srcCols * 8; - /* - * Decrement the blockSize loop counter - */ - blkCnt--; - } - - /* - * tail - */ - blkCnt = srcRows & 7; - if (blkCnt > 0U) - { - mve_pred16_t p0 = vctp16q(blkCnt); - vecIn = vldrhq_gather_shifted_offset_u16(pDataC, vecOffs); - vstrhq_p_u16(pDataDestR, vecIn, p0); - } - pDataSrc += 1; - pDataDest += srcRows; - i--; - } - - return (ARM_MATH_SUCCESS); -} - - -__STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit( - uint16_t srcRows, - uint16_t srcCols, - uint16_t *pDataSrc, - uint16_t dstRows, - uint16_t dstCols, - uint16_t *pDataDest) -{ - static const uint16_t loadCmplxCol[8] = { 0, 0, 1, 1, 2, 2, 3, 3 }; - int i; - uint16x8_t vecOffsRef, vecOffsCur; - uint16_t const *pDataC; - uint16_t *pDataRow; - uint16_t *pDataDestR, *pDataDestRow; - uint32_t blkCnt; - uint16x8_t vecIn; - -#ifdef ARM_MATH_MATRIX_CHECK - /* - * Check for matrix mismatch condition - */ - if ((srcRows != dstCols) || (srcCols != dstRows)) - { - /* - * Set status as ARM_MATH_SIZE_MISMATCH - */ - return = ARM_MATH_SIZE_MISMATCH; - } -#else - (void)dstRows; - (void)dstCols; -#endif - - /* - * 2x2, 3x3 and 4x4 specialization to be added - */ - - - /* - * build [0, 1, 2xcol, 2xcol+1, 4xcol, 4xcol+1, 6xcol, 6xcol+1] - */ - vecOffsRef = vldrhq_u16((uint16_t const *) loadCmplxCol); - vecOffsRef = vmulq(vecOffsRef, (uint16_t) (srcCols * CMPLX_DIM)) - + viwdupq_u16((uint32_t)0, (uint16_t) 2, 1); - - pDataRow = pDataSrc; - pDataDestRow = pDataDest; - i = srcCols; - do - { - pDataC = (uint16_t const *) pDataRow; - pDataDestR = pDataDestRow; - vecOffsCur = vecOffsRef; - - blkCnt = (srcRows * CMPLX_DIM) >> 3; - while (blkCnt > 0U) - { - vecIn = vldrhq_gather_shifted_offset(pDataC, vecOffsCur); - vstrhq(pDataDestR, vecIn); - pDataDestR+= 8; // VEC_LANES_U16 - vecOffsCur = vaddq(vecOffsCur, (srcCols << 3)); - /* - * Decrement the blockSize loop counter - */ - blkCnt--; - } - /* - * tail - * (will be merged thru tail predication) - */ - blkCnt = (srcRows * CMPLX_DIM) & 0x7; - if (blkCnt > 0U) - { - mve_pred16_t p0 = vctp16q(blkCnt); - vecIn = vldrhq_gather_shifted_offset(pDataC, vecOffsCur); - vstrhq_p(pDataDestR, vecIn, p0); - } - - pDataRow += CMPLX_DIM; - pDataDestRow += (srcRows * CMPLX_DIM); - } - while (--i); - - return (ARM_MATH_SUCCESS); -} -#endif /* MVEF and MVEI */ - -/*************************************** - -Definitions available for MVEI only - -***************************************/ -#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI) - -#include "arm_common_tables.h" - -#define MVE_ASRL_SAT16(acc, shift) ((sqrshrl_sat48(acc, -(32-shift)) >> 32) & 0xffffffff) -#define MVE_ASRL_SAT32(acc, shift) ((sqrshrl(acc, -(32-shift)) >> 32) & 0xffffffff) - - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE) -__STATIC_INLINE q31x4_t FAST_VSQRT_Q31(q31x4_t vecIn) -{ - q63x2_t vecTmpLL; - q31x4_t vecTmp0, vecTmp1; - q31_t scale; - q63_t tmp64; - q31x4_t vecNrm, vecDst, vecIdx, vecSignBits; - - - vecSignBits = vclsq(vecIn); - vecSignBits = vbicq(vecSignBits, 1); - /* - * in = in << no_of_sign_bits; - */ - vecNrm = vshlq(vecIn, vecSignBits); - /* - * index = in >> 24; - */ - vecIdx = vecNrm >> 24; - vecIdx = vecIdx << 1; - - vecTmp0 = vldrwq_gather_shifted_offset_s32(sqrtTable_Q31, (uint32x4_t)vecIdx); - - vecIdx = vecIdx + 1; - - vecTmp1 = vldrwq_gather_shifted_offset_s32(sqrtTable_Q31, (uint32x4_t)vecIdx); - - vecTmp1 = vqrdmulhq(vecTmp1, vecNrm); - vecTmp0 = vecTmp0 - vecTmp1; - vecTmp1 = vqrdmulhq(vecTmp0, vecTmp0); - vecTmp1 = vqrdmulhq(vecNrm, vecTmp1); - vecTmp1 = vdupq_n_s32(0x18000000) - vecTmp1; - vecTmp0 = vqrdmulhq(vecTmp0, vecTmp1); - vecTmpLL = vmullbq_int(vecNrm, vecTmp0); - - /* - * scale elements 0, 2 - */ - scale = 26 + (vecSignBits[0] >> 1); - tmp64 = asrl(vecTmpLL[0], scale); - vecDst[0] = (q31_t) tmp64; - - scale = 26 + (vecSignBits[2] >> 1); - tmp64 = asrl(vecTmpLL[1], scale); - vecDst[2] = (q31_t) tmp64; - - vecTmpLL = vmulltq_int(vecNrm, vecTmp0); - - /* - * scale elements 1, 3 - */ - scale = 26 + (vecSignBits[1] >> 1); - tmp64 = asrl(vecTmpLL[0], scale); - vecDst[1] = (q31_t) tmp64; - - scale = 26 + (vecSignBits[3] >> 1); - tmp64 = asrl(vecTmpLL[1], scale); - vecDst[3] = (q31_t) tmp64; - /* - * set negative values to 0 - */ - vecDst = vdupq_m(vecDst, 0, vcmpltq_n_s32(vecIn, 0)); - - return vecDst; -} -#endif - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q15_MVE) -__STATIC_INLINE q15x8_t FAST_VSQRT_Q15(q15x8_t vecIn) -{ - q31x4_t vecTmpLev, vecTmpLodd, vecSignL; - q15x8_t vecTmp0, vecTmp1; - q15x8_t vecNrm, vecDst, vecIdx, vecSignBits; - - vecDst = vuninitializedq_s16(); - - vecSignBits = vclsq(vecIn); - vecSignBits = vbicq(vecSignBits, 1); - /* - * in = in << no_of_sign_bits; - */ - vecNrm = vshlq(vecIn, vecSignBits); - - vecIdx = vecNrm >> 8; - vecIdx = vecIdx << 1; - - vecTmp0 = vldrhq_gather_shifted_offset_s16(sqrtTable_Q15, (uint16x8_t)vecIdx); - - vecIdx = vecIdx + 1; - - vecTmp1 = vldrhq_gather_shifted_offset_s16(sqrtTable_Q15, (uint16x8_t)vecIdx); - - vecTmp1 = vqrdmulhq(vecTmp1, vecNrm); - vecTmp0 = vecTmp0 - vecTmp1; - vecTmp1 = vqrdmulhq(vecTmp0, vecTmp0); - vecTmp1 = vqrdmulhq(vecNrm, vecTmp1); - vecTmp1 = vdupq_n_s16(0x1800) - vecTmp1; - vecTmp0 = vqrdmulhq(vecTmp0, vecTmp1); - - vecSignBits = vecSignBits >> 1; - - vecTmpLev = vmullbq_int(vecNrm, vecTmp0); - vecTmpLodd = vmulltq_int(vecNrm, vecTmp0); - - vecTmp0 = vecSignBits + 10; - /* - * negate sign to apply register based vshl - */ - vecTmp0 = -vecTmp0; - - /* - * shift even elements - */ - vecSignL = vmovlbq(vecTmp0); - vecTmpLev = vshlq(vecTmpLev, vecSignL); - /* - * shift odd elements - */ - vecSignL = vmovltq(vecTmp0); - vecTmpLodd = vshlq(vecTmpLodd, vecSignL); - /* - * merge and narrow odd and even parts - */ - vecDst = vmovnbq_s32(vecDst, vecTmpLev); - vecDst = vmovntq_s32(vecDst, vecTmpLodd); - /* - * set negative values to 0 - */ - vecDst = vdupq_m(vecDst, 0, vcmpltq_n_s16(vecIn, 0)); - - return vecDst; -} -#endif - -#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI) */ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_math.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_math.h deleted file mode 100644 index 404ee91c5..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_math.h +++ /dev/null @@ -1,246 +0,0 @@ -/****************************************************************************** - * @file arm_math.h - * @brief Public header file for CMSIS DSP Library - * @version V1.7.0 - * @date 18. March 2019 - ******************************************************************************/ -/* - * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - \mainpage CMSIS DSP Software Library - * - * \section intro Introduction - * - * This user manual describes the CMSIS DSP software library, - * a suite of common signal processing functions for use on Cortex-M and Cortex-A processor - * based devices. - * - * The library is divided into a number of functions each covering a specific category: - * - Basic math functions - * - Fast math functions - * - Complex math functions - * - Filtering functions - * - Matrix functions - * - Transform functions - * - Motor control functions - * - Statistical functions - * - Support functions - * - Interpolation functions - * - Support Vector Machine functions (SVM) - * - Bayes classifier functions - * - Distance functions - * - * The library has generally separate functions for operating on 8-bit integers, 16-bit integers, - * 32-bit integer and 32-bit floating-point values. - * - * \section using Using the Library - * - * The library installer contains prebuilt versions of the libraries in the Lib folder. - * - * Here is the list of pre-built libraries : - * - arm_cortexM7lfdp_math.lib (Cortex-M7, Little endian, Double Precision Floating Point Unit) - * - arm_cortexM7bfdp_math.lib (Cortex-M7, Big endian, Double Precision Floating Point Unit) - * - arm_cortexM7lfsp_math.lib (Cortex-M7, Little endian, Single Precision Floating Point Unit) - * - arm_cortexM7bfsp_math.lib (Cortex-M7, Big endian and Single Precision Floating Point Unit on) - * - arm_cortexM7l_math.lib (Cortex-M7, Little endian) - * - arm_cortexM7b_math.lib (Cortex-M7, Big endian) - * - arm_cortexM4lf_math.lib (Cortex-M4, Little endian, Floating Point Unit) - * - arm_cortexM4bf_math.lib (Cortex-M4, Big endian, Floating Point Unit) - * - arm_cortexM4l_math.lib (Cortex-M4, Little endian) - * - arm_cortexM4b_math.lib (Cortex-M4, Big endian) - * - arm_cortexM3l_math.lib (Cortex-M3, Little endian) - * - arm_cortexM3b_math.lib (Cortex-M3, Big endian) - * - arm_cortexM0l_math.lib (Cortex-M0 / Cortex-M0+, Little endian) - * - arm_cortexM0b_math.lib (Cortex-M0 / Cortex-M0+, Big endian) - * - arm_ARMv8MBLl_math.lib (Armv8-M Baseline, Little endian) - * - arm_ARMv8MMLl_math.lib (Armv8-M Mainline, Little endian) - * - arm_ARMv8MMLlfsp_math.lib (Armv8-M Mainline, Little endian, Single Precision Floating Point Unit) - * - arm_ARMv8MMLld_math.lib (Armv8-M Mainline, Little endian, DSP instructions) - * - arm_ARMv8MMLldfsp_math.lib (Armv8-M Mainline, Little endian, DSP instructions, Single Precision Floating Point Unit) - * - * The library functions are declared in the public file arm_math.h which is placed in the Include folder. - * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single - * public header file arm_math.h for Cortex-M cores with little endian and big endian. Same header file will be used for floating point unit(FPU) variants. - * - * - * \section example Examples - * - * The library ships with a number of examples which demonstrate how to use the library functions. - * - * \section toolchain Toolchain Support - * - * The library is now tested on Fast Models building with cmake. - * Core M0, M7, A5 are tested. - * - * - * - * \section building Building the Library - * - * The library installer contains a project file to rebuild libraries on MDK toolchain in the CMSIS\\DSP\\Projects\\ARM folder. - * - arm_cortexM_math.uvprojx - * - * - * The libraries can be built by opening the arm_cortexM_math.uvprojx project in MDK-ARM, selecting a specific target, and defining the optional preprocessor macros detailed above. - * - * There is also a work in progress cmake build. The README file is giving more details. - * - * \section preprocessor Preprocessor Macros - * - * Each library project have different preprocessor macros. - * - * - ARM_MATH_BIG_ENDIAN: - * - * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets. - * - * - ARM_MATH_MATRIX_CHECK: - * - * Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices - * - * - ARM_MATH_ROUNDING: - * - * Define macro ARM_MATH_ROUNDING for rounding on support functions - * - * - ARM_MATH_LOOPUNROLL: - * - * Define macro ARM_MATH_LOOPUNROLL to enable manual loop unrolling in DSP functions - * - * - ARM_MATH_NEON: - * - * Define macro ARM_MATH_NEON to enable Neon versions of the DSP functions. - * It is not enabled by default when Neon is available because performances are - * dependent on the compiler and target architecture. - * - * - ARM_MATH_NEON_EXPERIMENTAL: - * - * Define macro ARM_MATH_NEON_EXPERIMENTAL to enable experimental Neon versions of - * of some DSP functions. Experimental Neon versions currently do not have better - * performances than the scalar versions. - * - * - ARM_MATH_HELIUM: - * - * It implies the flags ARM_MATH_MVEF and ARM_MATH_MVEI and ARM_MATH_FLOAT16. - * - * - ARM_MATH_MVEF: - * - * Select Helium versions of the f32 algorithms. - * It implies ARM_MATH_FLOAT16 and ARM_MATH_MVEI. - * - * - ARM_MATH_MVEI: - * - * Select Helium versions of the int and fixed point algorithms. - * - * - ARM_MATH_MVE_FLOAT16: - * - * MVE Float16 implementations of some algorithms (Requires MVE extension). - * - * - DISABLEFLOAT16: - * - * Disable float16 algorithms when __fp16 is not supported for a - * specific compiler / core configuration - * - *
- * \section pack CMSIS-DSP in ARM::CMSIS Pack - * - * The following files relevant to CMSIS-DSP are present in the ARM::CMSIS Pack directories: - * |File/Folder |Content | - * |---------------------------------|------------------------------------------------------------------------| - * |\b CMSIS\\Documentation\\DSP | This documentation | - * |\b CMSIS\\DSP\\DSP_Lib_TestSuite | DSP_Lib deprecated test suite | - * |\b CMSIS\\DSP\\Examples | Example projects demonstrating the usage of the library functions | - * |\b CMSIS\\DSP\\Include | DSP_Lib include files for using and building the lib - * |\b CMSIS\\DSP\\PrivateInclude | DSP_Lib private include files for building the lib | - * |\b CMSIS\\DSP\\Lib | DSP_Lib binaries | - * |\b CMSIS\\DSP\\Projects | Projects to rebuild DSP_Lib binaries | - * |\b CMSIS\\DSP\\Source | DSP_Lib source files | - * - *
- * \section rev Revision History of CMSIS-DSP - * Please refer to \ref ChangeLog_pg. - */ - - - - - - - - - - - -/** - * @defgroup groupExamples Examples - */ - - - - - -#ifndef _ARM_MATH_H -#define _ARM_MATH_H - - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#include "dsp/basic_math_functions.h" -#include "dsp/interpolation_functions.h" -#include "dsp/bayes_functions.h" -#include "dsp/matrix_functions.h" -#include "dsp/complex_math_functions.h" -#include "dsp/statistics_functions.h" -#include "dsp/controller_functions.h" -#include "dsp/support_functions.h" -#include "dsp/distance_functions.h" -#include "dsp/svm_functions.h" -#include "dsp/fast_math_functions.h" -#include "dsp/transform_functions.h" -#include "dsp/filtering_functions.h" - - - -#ifdef __cplusplus -extern "C" -{ -#endif - - - - -//#define TABLE_SPACING_Q31 0x400000 -//#define TABLE_SPACING_Q15 0x80 - - - - - -#ifdef __cplusplus -} -#endif - - -#endif /* _ARM_MATH_H */ - -/** - * - * End of file. - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_math_memory.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_math_memory.h deleted file mode 100644 index c7158dc92..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_math_memory.h +++ /dev/null @@ -1,240 +0,0 @@ -/****************************************************************************** - * @file arm_math_memory.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _ARM_MATH_MEMORY_H_ - -#define _ARM_MATH_MEMORY_H_ - -#include "arm_math_types.h" - - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - @brief definition to read/write two 16 bit values. - @deprecated - */ -#if defined ( __CC_ARM ) - #define __SIMD32_TYPE int32_t __packed -#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) - #define __SIMD32_TYPE int32_t -#elif defined ( __GNUC__ ) - #define __SIMD32_TYPE int32_t -#elif defined ( __ICCARM__ ) - #define __SIMD32_TYPE int32_t __packed -#elif defined ( __TI_ARM__ ) - #define __SIMD32_TYPE int32_t -#elif defined ( __CSMC__ ) - #define __SIMD32_TYPE int32_t -#elif defined ( __TASKING__ ) - #define __SIMD32_TYPE __un(aligned) int32_t -#elif defined(_MSC_VER ) - #define __SIMD32_TYPE int32_t -#else - #error Unknown compiler -#endif - -#define __SIMD32(addr) (*(__SIMD32_TYPE **) & (addr)) -#define __SIMD32_CONST(addr) ( (__SIMD32_TYPE * ) (addr)) -#define _SIMD32_OFFSET(addr) (*(__SIMD32_TYPE * ) (addr)) -#define __SIMD64(addr) (*( int64_t **) & (addr)) - - -/* SIMD replacement */ - - -/** - @brief Read 2 Q15 from Q15 pointer. - @param[in] pQ15 points to input value - @return Q31 value - */ -__STATIC_FORCEINLINE q31_t read_q15x2 ( - q15_t * pQ15) -{ - q31_t val; - -#ifdef __ARM_FEATURE_UNALIGNED - memcpy (&val, pQ15, 4); -#else - val = (pQ15[1] << 16) | (pQ15[0] & 0x0FFFF) ; -#endif - - return (val); -} - -/** - @brief Read 2 Q15 from Q15 pointer and increment pointer afterwards. - @param[in] pQ15 points to input value - @return Q31 value - */ -__STATIC_FORCEINLINE q31_t read_q15x2_ia ( - q15_t ** pQ15) -{ - q31_t val; - -#ifdef __ARM_FEATURE_UNALIGNED - memcpy (&val, *pQ15, 4); -#else - val = ((*pQ15)[1] << 16) | ((*pQ15)[0] & 0x0FFFF); -#endif - - *pQ15 += 2; - return (val); -} - -/** - @brief Read 2 Q15 from Q15 pointer and decrement pointer afterwards. - @param[in] pQ15 points to input value - @return Q31 value - */ -__STATIC_FORCEINLINE q31_t read_q15x2_da ( - q15_t ** pQ15) -{ - q31_t val; - -#ifdef __ARM_FEATURE_UNALIGNED - memcpy (&val, *pQ15, 4); -#else - val = ((*pQ15)[1] << 16) | ((*pQ15)[0] & 0x0FFFF); -#endif - - *pQ15 -= 2; - return (val); -} - -/** - @brief Write 2 Q15 to Q15 pointer and increment pointer afterwards. - @param[in] pQ15 points to input value - @param[in] value Q31 value - @return none - */ -__STATIC_FORCEINLINE void write_q15x2_ia ( - q15_t ** pQ15, - q31_t value) -{ - q31_t val = value; -#ifdef __ARM_FEATURE_UNALIGNED - memcpy (*pQ15, &val, 4); -#else - (*pQ15)[0] = (val & 0x0FFFF); - (*pQ15)[1] = (val >> 16) & 0x0FFFF; -#endif - - *pQ15 += 2; -} - -/** - @brief Write 2 Q15 to Q15 pointer. - @param[in] pQ15 points to input value - @param[in] value Q31 value - @return none - */ -__STATIC_FORCEINLINE void write_q15x2 ( - q15_t * pQ15, - q31_t value) -{ - q31_t val = value; - -#ifdef __ARM_FEATURE_UNALIGNED - memcpy (pQ15, &val, 4); -#else - pQ15[0] = val & 0x0FFFF; - pQ15[1] = val >> 16; -#endif -} - - -/** - @brief Read 4 Q7 from Q7 pointer and increment pointer afterwards. - @param[in] pQ7 points to input value - @return Q31 value - */ -__STATIC_FORCEINLINE q31_t read_q7x4_ia ( - q7_t ** pQ7) -{ - q31_t val; - - -#ifdef __ARM_FEATURE_UNALIGNED - memcpy (&val, *pQ7, 4); -#else - val =(((*pQ7)[3] & 0x0FF) << 24) | (((*pQ7)[2] & 0x0FF) << 16) | (((*pQ7)[1] & 0x0FF) << 8) | ((*pQ7)[0] & 0x0FF); -#endif - - *pQ7 += 4; - - return (val); -} - -/** - @brief Read 4 Q7 from Q7 pointer and decrement pointer afterwards. - @param[in] pQ7 points to input value - @return Q31 value - */ -__STATIC_FORCEINLINE q31_t read_q7x4_da ( - q7_t ** pQ7) -{ - q31_t val; -#ifdef __ARM_FEATURE_UNALIGNED - memcpy (&val, *pQ7, 4); -#else - val = ((((*pQ7)[3]) & 0x0FF) << 24) | ((((*pQ7)[2]) & 0x0FF) << 16) | ((((*pQ7)[1]) & 0x0FF) << 8) | ((*pQ7)[0] & 0x0FF); -#endif - *pQ7 -= 4; - - return (val); -} - -/** - @brief Write 4 Q7 to Q7 pointer and increment pointer afterwards. - @param[in] pQ7 points to input value - @param[in] value Q31 value - @return none - */ -__STATIC_FORCEINLINE void write_q7x4_ia ( - q7_t ** pQ7, - q31_t value) -{ - q31_t val = value; -#ifdef __ARM_FEATURE_UNALIGNED - memcpy (*pQ7, &val, 4); -#else - (*pQ7)[0] = val & 0x0FF; - (*pQ7)[1] = (val >> 8) & 0x0FF; - (*pQ7)[2] = (val >> 16) & 0x0FF; - (*pQ7)[3] = (val >> 24) & 0x0FF; - -#endif - *pQ7 += 4; -} - - -#ifdef __cplusplus -} -#endif - -#endif /*ifndef _ARM_MATH_MEMORY_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_math_types.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_math_types.h deleted file mode 100644 index 95a17e350..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/arm_math_types.h +++ /dev/null @@ -1,598 +0,0 @@ -/****************************************************************************** - * @file arm_math_types.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _ARM_MATH_TYPES_H_ - -#define _ARM_MATH_TYPES_H_ - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Compiler specific diagnostic adjustment */ -#if defined ( __CC_ARM ) - -#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) - -#elif defined ( __GNUC__ ) - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wsign-conversion" - #pragma GCC diagnostic ignored "-Wconversion" - #pragma GCC diagnostic ignored "-Wunused-parameter" - -#elif defined ( __ICCARM__ ) - -#elif defined ( __TI_ARM__ ) - -#elif defined ( __CSMC__ ) - -#elif defined ( __TASKING__ ) - -#elif defined ( _MSC_VER ) - -#else - #error Unknown compiler -#endif - - -/* Included for instrinsics definitions */ -#if defined (_MSC_VER ) -#include -#define __STATIC_FORCEINLINE static __forceinline -#define __STATIC_INLINE static __inline -#define __ALIGNED(x) __declspec(align(x)) - -#elif defined (__GNUC_PYTHON__) -#include -#define __ALIGNED(x) __attribute__((aligned(x))) -#define __STATIC_FORCEINLINE static __attribute__((inline)) -#define __STATIC_INLINE static __attribute__((inline)) -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wattributes" - -#else -#include "cmsis_compiler.h" -#endif - - - -#include -#include -#include -#include - -/* evaluate ARM DSP feature */ -#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)) - #define ARM_MATH_DSP 1 -#endif - -#if defined(ARM_MATH_NEON) -#include -#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - #if !defined(ARM_MATH_NEON_FLOAT16) - #define ARM_MATH_NEON_FLOAT16 - #endif -#endif -#endif - -#if !defined(ARM_MATH_AUTOVECTORIZE) - -#if __ARM_FEATURE_MVE - #if !defined(ARM_MATH_MVEI) - #define ARM_MATH_MVEI - #endif -#endif - -#if (__ARM_FEATURE_MVE & 2) - #if !defined(ARM_MATH_MVEF) - #define ARM_MATH_MVEF - #endif - #if !defined(ARM_MATH_MVE_FLOAT16) - /* HW Float16 not yet well supported on gcc for M55 */ - #if !defined(__CMSIS_GCC_H) - #define ARM_MATH_MVE_FLOAT16 - #endif - #endif -#endif - -#endif /*!defined(ARM_MATH_AUTOVECTORIZE)*/ - - -#if defined (ARM_MATH_HELIUM) - #if !defined(ARM_MATH_MVEF) - #define ARM_MATH_MVEF - #endif - - #if !defined(ARM_MATH_MVEI) - #define ARM_MATH_MVEI - #endif - - #if !defined(ARM_MATH_MVE_FLOAT16) - /* HW Float16 not yet well supported on gcc for M55 */ - #if !defined(__CMSIS_GCC_H) - #define ARM_MATH_MVE_FLOAT16 - #endif - #endif -#endif - - - -#if defined ( __CC_ARM ) - /* Enter low optimization region - place directly above function definition */ - #if defined( __ARM_ARCH_7EM__ ) - #define LOW_OPTIMIZATION_ENTER \ - _Pragma ("push") \ - _Pragma ("O1") - #else - #define LOW_OPTIMIZATION_ENTER - #endif - - /* Exit low optimization region - place directly after end of function definition */ - #if defined ( __ARM_ARCH_7EM__ ) - #define LOW_OPTIMIZATION_EXIT \ - _Pragma ("pop") - #else - #define LOW_OPTIMIZATION_EXIT - #endif - - /* Enter low optimization region - place directly above function definition */ - #define IAR_ONLY_LOW_OPTIMIZATION_ENTER - - /* Exit low optimization region - place directly after end of function definition */ - #define IAR_ONLY_LOW_OPTIMIZATION_EXIT - -#elif defined (__ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) - #define LOW_OPTIMIZATION_ENTER - #define LOW_OPTIMIZATION_EXIT - #define IAR_ONLY_LOW_OPTIMIZATION_ENTER - #define IAR_ONLY_LOW_OPTIMIZATION_EXIT - -#elif defined ( __GNUC__ ) - #define LOW_OPTIMIZATION_ENTER \ - __attribute__(( optimize("-O1") )) - #define LOW_OPTIMIZATION_EXIT - #define IAR_ONLY_LOW_OPTIMIZATION_ENTER - #define IAR_ONLY_LOW_OPTIMIZATION_EXIT - -#elif defined ( __ICCARM__ ) - /* Enter low optimization region - place directly above function definition */ - #if defined ( __ARM_ARCH_7EM__ ) - #define LOW_OPTIMIZATION_ENTER \ - _Pragma ("optimize=low") - #else - #define LOW_OPTIMIZATION_ENTER - #endif - - /* Exit low optimization region - place directly after end of function definition */ - #define LOW_OPTIMIZATION_EXIT - - /* Enter low optimization region - place directly above function definition */ - #if defined ( __ARM_ARCH_7EM__ ) - #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \ - _Pragma ("optimize=low") - #else - #define IAR_ONLY_LOW_OPTIMIZATION_ENTER - #endif - - /* Exit low optimization region - place directly after end of function definition */ - #define IAR_ONLY_LOW_OPTIMIZATION_EXIT - -#elif defined ( __TI_ARM__ ) - #define LOW_OPTIMIZATION_ENTER - #define LOW_OPTIMIZATION_EXIT - #define IAR_ONLY_LOW_OPTIMIZATION_ENTER - #define IAR_ONLY_LOW_OPTIMIZATION_EXIT - -#elif defined ( __CSMC__ ) - #define LOW_OPTIMIZATION_ENTER - #define LOW_OPTIMIZATION_EXIT - #define IAR_ONLY_LOW_OPTIMIZATION_ENTER - #define IAR_ONLY_LOW_OPTIMIZATION_EXIT - -#elif defined ( __TASKING__ ) - #define LOW_OPTIMIZATION_ENTER - #define LOW_OPTIMIZATION_EXIT - #define IAR_ONLY_LOW_OPTIMIZATION_ENTER - #define IAR_ONLY_LOW_OPTIMIZATION_EXIT - -#elif defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) - #define LOW_OPTIMIZATION_ENTER - #define LOW_OPTIMIZATION_EXIT - #define IAR_ONLY_LOW_OPTIMIZATION_ENTER - #define IAR_ONLY_LOW_OPTIMIZATION_EXIT -#endif - - - -/* Compiler specific diagnostic adjustment */ -#if defined ( __CC_ARM ) - -#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) - -#elif defined ( __GNUC__ ) -#pragma GCC diagnostic pop - -#elif defined ( __ICCARM__ ) - -#elif defined ( __TI_ARM__ ) - -#elif defined ( __CSMC__ ) - -#elif defined ( __TASKING__ ) - -#elif defined ( _MSC_VER ) - -#else - #error Unknown compiler -#endif - -#ifdef __cplusplus -} -#endif - -#if __ARM_FEATURE_MVE -#include -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif - - /** - * @brief 8-bit fractional data type in 1.7 format. - */ - typedef int8_t q7_t; - - /** - * @brief 16-bit fractional data type in 1.15 format. - */ - typedef int16_t q15_t; - - /** - * @brief 32-bit fractional data type in 1.31 format. - */ - typedef int32_t q31_t; - - /** - * @brief 64-bit fractional data type in 1.63 format. - */ - typedef int64_t q63_t; - - /** - * @brief 32-bit floating-point type definition. - */ - typedef float float32_t; - - /** - * @brief 64-bit floating-point type definition. - */ - typedef double float64_t; - - /** - * @brief vector types - */ -#if defined(ARM_MATH_NEON) || defined (ARM_MATH_MVEI) - /** - * @brief 64-bit fractional 128-bit vector data type in 1.63 format - */ - typedef int64x2_t q63x2_t; - - /** - * @brief 32-bit fractional 128-bit vector data type in 1.31 format. - */ - typedef int32x4_t q31x4_t; - - /** - * @brief 16-bit fractional 128-bit vector data type with 16-bit alignement in 1.15 format. - */ - typedef __ALIGNED(2) int16x8_t q15x8_t; - - /** - * @brief 8-bit fractional 128-bit vector data type with 8-bit alignement in 1.7 format. - */ - typedef __ALIGNED(1) int8x16_t q7x16_t; - - /** - * @brief 32-bit fractional 128-bit vector pair data type in 1.31 format. - */ - typedef int32x4x2_t q31x4x2_t; - - /** - * @brief 32-bit fractional 128-bit vector quadruplet data type in 1.31 format. - */ - typedef int32x4x4_t q31x4x4_t; - - /** - * @brief 16-bit fractional 128-bit vector pair data type in 1.15 format. - */ - typedef int16x8x2_t q15x8x2_t; - - /** - * @brief 16-bit fractional 128-bit vector quadruplet data type in 1.15 format. - */ - typedef int16x8x4_t q15x8x4_t; - - /** - * @brief 8-bit fractional 128-bit vector pair data type in 1.7 format. - */ - typedef int8x16x2_t q7x16x2_t; - - /** - * @brief 8-bit fractional 128-bit vector quadruplet data type in 1.7 format. - */ - typedef int8x16x4_t q7x16x4_t; - - /** - * @brief 32-bit fractional data type in 9.23 format. - */ - typedef int32_t q23_t; - - /** - * @brief 32-bit fractional 128-bit vector data type in 9.23 format. - */ - typedef int32x4_t q23x4_t; - - /** - * @brief 64-bit status 128-bit vector data type. - */ - typedef int64x2_t status64x2_t; - - /** - * @brief 32-bit status 128-bit vector data type. - */ - typedef int32x4_t status32x4_t; - - /** - * @brief 16-bit status 128-bit vector data type. - */ - typedef int16x8_t status16x8_t; - - /** - * @brief 8-bit status 128-bit vector data type. - */ - typedef int8x16_t status8x16_t; - - -#endif - -#if defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF) /* floating point vector*/ - /** - * @brief 32-bit floating-point 128-bit vector type - */ - typedef float32x4_t f32x4_t; - - /** - * @brief 32-bit floating-point 128-bit vector pair data type - */ - typedef float32x4x2_t f32x4x2_t; - - /** - * @brief 32-bit floating-point 128-bit vector quadruplet data type - */ - typedef float32x4x4_t f32x4x4_t; - - /** - * @brief 32-bit ubiquitous 128-bit vector data type - */ - typedef union _any32x4_t - { - float32x4_t f; - int32x4_t i; - } any32x4_t; - -#endif - -#if defined(ARM_MATH_NEON) - /** - * @brief 32-bit fractional 64-bit vector data type in 1.31 format. - */ - typedef int32x2_t q31x2_t; - - /** - * @brief 16-bit fractional 64-bit vector data type in 1.15 format. - */ - typedef __ALIGNED(2) int16x4_t q15x4_t; - - /** - * @brief 8-bit fractional 64-bit vector data type in 1.7 format. - */ - typedef __ALIGNED(1) int8x8_t q7x8_t; - - /** - * @brief 32-bit float 64-bit vector data type. - */ - typedef float32x2_t f32x2_t; - - /** - * @brief 32-bit floating-point 128-bit vector triplet data type - */ - typedef float32x4x3_t f32x4x3_t; - - - /** - * @brief 32-bit fractional 128-bit vector triplet data type in 1.31 format - */ - typedef int32x4x3_t q31x4x3_t; - - /** - * @brief 16-bit fractional 128-bit vector triplet data type in 1.15 format - */ - typedef int16x8x3_t q15x8x3_t; - - /** - * @brief 8-bit fractional 128-bit vector triplet data type in 1.7 format - */ - typedef int8x16x3_t q7x16x3_t; - - /** - * @brief 32-bit floating-point 64-bit vector pair data type - */ - typedef float32x2x2_t f32x2x2_t; - - /** - * @brief 32-bit floating-point 64-bit vector triplet data type - */ - typedef float32x2x3_t f32x2x3_t; - - /** - * @brief 32-bit floating-point 64-bit vector quadruplet data type - */ - typedef float32x2x4_t f32x2x4_t; - - - /** - * @brief 32-bit fractional 64-bit vector pair data type in 1.31 format - */ - typedef int32x2x2_t q31x2x2_t; - - /** - * @brief 32-bit fractional 64-bit vector triplet data type in 1.31 format - */ - typedef int32x2x3_t q31x2x3_t; - - /** - * @brief 32-bit fractional 64-bit vector quadruplet data type in 1.31 format - */ - typedef int32x4x3_t q31x2x4_t; - - /** - * @brief 16-bit fractional 64-bit vector pair data type in 1.15 format - */ - typedef int16x4x2_t q15x4x2_t; - - /** - * @brief 16-bit fractional 64-bit vector triplet data type in 1.15 format - */ - typedef int16x4x2_t q15x4x3_t; - - /** - * @brief 16-bit fractional 64-bit vector quadruplet data type in 1.15 format - */ - typedef int16x4x3_t q15x4x4_t; - - /** - * @brief 8-bit fractional 64-bit vector pair data type in 1.7 format - */ - typedef int8x8x2_t q7x8x2_t; - - /** - * @brief 8-bit fractional 64-bit vector triplet data type in 1.7 format - */ - typedef int8x8x3_t q7x8x3_t; - - /** - * @brief 8-bit fractional 64-bit vector quadruplet data type in 1.7 format - */ - typedef int8x8x4_t q7x8x4_t; - - /** - * @brief 32-bit ubiquitous 64-bit vector data type - */ - typedef union _any32x2_t - { - float32x2_t f; - int32x2_t i; - } any32x2_t; - - - /** - * @brief 32-bit status 64-bit vector data type. - */ - typedef int32x4_t status32x2_t; - - /** - * @brief 16-bit status 64-bit vector data type. - */ - typedef int16x8_t status16x4_t; - - /** - * @brief 8-bit status 64-bit vector data type. - */ - typedef int8x16_t status8x8_t; - -#endif - - - - - -#define F64_MAX ((float64_t)DBL_MAX) -#define F32_MAX ((float32_t)FLT_MAX) - - - -#define F64_MIN (-DBL_MAX) -#define F32_MIN (-FLT_MAX) - - - -#define F64_ABSMAX ((float64_t)DBL_MAX) -#define F32_ABSMAX ((float32_t)FLT_MAX) - - - -#define F64_ABSMIN ((float64_t)0.0) -#define F32_ABSMIN ((float32_t)0.0) - - -#define Q31_MAX ((q31_t)(0x7FFFFFFFL)) -#define Q15_MAX ((q15_t)(0x7FFF)) -#define Q7_MAX ((q7_t)(0x7F)) -#define Q31_MIN ((q31_t)(0x80000000L)) -#define Q15_MIN ((q15_t)(0x8000)) -#define Q7_MIN ((q7_t)(0x80)) - -#define Q31_ABSMAX ((q31_t)(0x7FFFFFFFL)) -#define Q15_ABSMAX ((q15_t)(0x7FFF)) -#define Q7_ABSMAX ((q7_t)(0x7F)) -#define Q31_ABSMIN ((q31_t)0) -#define Q15_ABSMIN ((q15_t)0) -#define Q7_ABSMIN ((q7_t)0) - - /* Dimension C vector space */ - #define CMPLX_DIM 2 - - /** - * @brief Error status returned by some functions in the library. - */ - - typedef enum - { - ARM_MATH_SUCCESS = 0, /**< No error */ - ARM_MATH_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */ - ARM_MATH_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */ - ARM_MATH_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation */ - ARM_MATH_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */ - ARM_MATH_SINGULAR = -5, /**< Input matrix is singular and cannot be inverted */ - ARM_MATH_TEST_FAILURE = -6 /**< Test Failed */ - } arm_status; - - -#ifdef __cplusplus -} -#endif - -#endif /*ifndef _ARM_MATH_TYPES_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/basic_math_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/basic_math_functions.h deleted file mode 100644 index b82a6dd81..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/basic_math_functions.h +++ /dev/null @@ -1,699 +0,0 @@ -/****************************************************************************** - * @file basic_math_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _BASIC_MATH_FUNCTIONS_H_ -#define _BASIC_MATH_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - * @defgroup groupMath Basic Math Functions - */ - - /** - * @brief Q7 vector multiplication. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_mult_q7( - const q7_t * pSrcA, - const q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Q15 vector multiplication. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_mult_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Q31 vector multiplication. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_mult_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Floating-point vector multiplication. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_mult_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - - - /** - * @brief Floating-point vector addition. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_add_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - - - /** - * @brief Q7 vector addition. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_add_q7( - const q7_t * pSrcA, - const q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Q15 vector addition. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_add_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Q31 vector addition. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_add_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Floating-point vector subtraction. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_sub_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - - - /** - * @brief Q7 vector subtraction. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_sub_q7( - const q7_t * pSrcA, - const q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Q15 vector subtraction. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_sub_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Q31 vector subtraction. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ - void arm_sub_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Multiplies a floating-point vector by a scalar. - * @param[in] pSrc points to the input vector - * @param[in] scale scale factor to be applied - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_scale_f32( - const float32_t * pSrc, - float32_t scale, - float32_t * pDst, - uint32_t blockSize); - - - - /** - * @brief Multiplies a Q7 vector by a scalar. - * @param[in] pSrc points to the input vector - * @param[in] scaleFract fractional portion of the scale value - * @param[in] shift number of bits to shift the result by - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_scale_q7( - const q7_t * pSrc, - q7_t scaleFract, - int8_t shift, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Multiplies a Q15 vector by a scalar. - * @param[in] pSrc points to the input vector - * @param[in] scaleFract fractional portion of the scale value - * @param[in] shift number of bits to shift the result by - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_scale_q15( - const q15_t * pSrc, - q15_t scaleFract, - int8_t shift, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Multiplies a Q31 vector by a scalar. - * @param[in] pSrc points to the input vector - * @param[in] scaleFract fractional portion of the scale value - * @param[in] shift number of bits to shift the result by - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_scale_q31( - const q31_t * pSrc, - q31_t scaleFract, - int8_t shift, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Q7 vector absolute value. - * @param[in] pSrc points to the input buffer - * @param[out] pDst points to the output buffer - * @param[in] blockSize number of samples in each vector - */ - void arm_abs_q7( - const q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Floating-point vector absolute value. - * @param[in] pSrc points to the input buffer - * @param[out] pDst points to the output buffer - * @param[in] blockSize number of samples in each vector - */ - void arm_abs_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - - - /** - * @brief Q15 vector absolute value. - * @param[in] pSrc points to the input buffer - * @param[out] pDst points to the output buffer - * @param[in] blockSize number of samples in each vector - */ - void arm_abs_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Q31 vector absolute value. - * @param[in] pSrc points to the input buffer - * @param[out] pDst points to the output buffer - * @param[in] blockSize number of samples in each vector - */ - void arm_abs_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Dot product of floating-point vectors. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] blockSize number of samples in each vector - * @param[out] result output result returned here - */ - void arm_dot_prod_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - uint32_t blockSize, - float32_t * result); - - - - /** - * @brief Dot product of Q7 vectors. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] blockSize number of samples in each vector - * @param[out] result output result returned here - */ - void arm_dot_prod_q7( - const q7_t * pSrcA, - const q7_t * pSrcB, - uint32_t blockSize, - q31_t * result); - - - /** - * @brief Dot product of Q15 vectors. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] blockSize number of samples in each vector - * @param[out] result output result returned here - */ - void arm_dot_prod_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - uint32_t blockSize, - q63_t * result); - - - /** - * @brief Dot product of Q31 vectors. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] blockSize number of samples in each vector - * @param[out] result output result returned here - */ - void arm_dot_prod_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - uint32_t blockSize, - q63_t * result); - - - /** - * @brief Shifts the elements of a Q7 vector a specified number of bits. - * @param[in] pSrc points to the input vector - * @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right. - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_shift_q7( - const q7_t * pSrc, - int8_t shiftBits, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Shifts the elements of a Q15 vector a specified number of bits. - * @param[in] pSrc points to the input vector - * @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right. - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_shift_q15( - const q15_t * pSrc, - int8_t shiftBits, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Shifts the elements of a Q31 vector a specified number of bits. - * @param[in] pSrc points to the input vector - * @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right. - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_shift_q31( - const q31_t * pSrc, - int8_t shiftBits, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Adds a constant offset to a floating-point vector. - * @param[in] pSrc points to the input vector - * @param[in] offset is the offset to be added - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_offset_f32( - const float32_t * pSrc, - float32_t offset, - float32_t * pDst, - uint32_t blockSize); - - - - /** - * @brief Adds a constant offset to a Q7 vector. - * @param[in] pSrc points to the input vector - * @param[in] offset is the offset to be added - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_offset_q7( - const q7_t * pSrc, - q7_t offset, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Adds a constant offset to a Q15 vector. - * @param[in] pSrc points to the input vector - * @param[in] offset is the offset to be added - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_offset_q15( - const q15_t * pSrc, - q15_t offset, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Adds a constant offset to a Q31 vector. - * @param[in] pSrc points to the input vector - * @param[in] offset is the offset to be added - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_offset_q31( - const q31_t * pSrc, - q31_t offset, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Negates the elements of a floating-point vector. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_negate_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Negates the elements of a Q7 vector. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_negate_q7( - const q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Negates the elements of a Q15 vector. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_negate_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Negates the elements of a Q31 vector. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ - void arm_negate_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - -/** - * @brief Compute the logical bitwise AND of two fixed-point vectors. - * @param[in] pSrcA points to input vector A - * @param[in] pSrcB points to input vector B - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_and_u16( - const uint16_t * pSrcA, - const uint16_t * pSrcB, - uint16_t * pDst, - uint32_t blockSize); - - /** - * @brief Compute the logical bitwise AND of two fixed-point vectors. - * @param[in] pSrcA points to input vector A - * @param[in] pSrcB points to input vector B - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_and_u32( - const uint32_t * pSrcA, - const uint32_t * pSrcB, - uint32_t * pDst, - uint32_t blockSize); - - /** - * @brief Compute the logical bitwise AND of two fixed-point vectors. - * @param[in] pSrcA points to input vector A - * @param[in] pSrcB points to input vector B - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_and_u8( - const uint8_t * pSrcA, - const uint8_t * pSrcB, - uint8_t * pDst, - uint32_t blockSize); - - /** - * @brief Compute the logical bitwise OR of two fixed-point vectors. - * @param[in] pSrcA points to input vector A - * @param[in] pSrcB points to input vector B - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_or_u16( - const uint16_t * pSrcA, - const uint16_t * pSrcB, - uint16_t * pDst, - uint32_t blockSize); - - /** - * @brief Compute the logical bitwise OR of two fixed-point vectors. - * @param[in] pSrcA points to input vector A - * @param[in] pSrcB points to input vector B - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_or_u32( - const uint32_t * pSrcA, - const uint32_t * pSrcB, - uint32_t * pDst, - uint32_t blockSize); - - /** - * @brief Compute the logical bitwise OR of two fixed-point vectors. - * @param[in] pSrcA points to input vector A - * @param[in] pSrcB points to input vector B - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_or_u8( - const uint8_t * pSrcA, - const uint8_t * pSrcB, - uint8_t * pDst, - uint32_t blockSize); - - /** - * @brief Compute the logical bitwise NOT of a fixed-point vector. - * @param[in] pSrc points to input vector - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_not_u16( - const uint16_t * pSrc, - uint16_t * pDst, - uint32_t blockSize); - - /** - * @brief Compute the logical bitwise NOT of a fixed-point vector. - * @param[in] pSrc points to input vector - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_not_u32( - const uint32_t * pSrc, - uint32_t * pDst, - uint32_t blockSize); - - /** - * @brief Compute the logical bitwise NOT of a fixed-point vector. - * @param[in] pSrc points to input vector - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_not_u8( - const uint8_t * pSrc, - uint8_t * pDst, - uint32_t blockSize); - -/** - * @brief Compute the logical bitwise XOR of two fixed-point vectors. - * @param[in] pSrcA points to input vector A - * @param[in] pSrcB points to input vector B - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_xor_u16( - const uint16_t * pSrcA, - const uint16_t * pSrcB, - uint16_t * pDst, - uint32_t blockSize); - - /** - * @brief Compute the logical bitwise XOR of two fixed-point vectors. - * @param[in] pSrcA points to input vector A - * @param[in] pSrcB points to input vector B - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_xor_u32( - const uint32_t * pSrcA, - const uint32_t * pSrcB, - uint32_t * pDst, - uint32_t blockSize); - - /** - * @brief Compute the logical bitwise XOR of two fixed-point vectors. - * @param[in] pSrcA points to input vector A - * @param[in] pSrcB points to input vector B - * @param[out] pDst points to output vector - * @param[in] blockSize number of samples in each vector - * @return none - */ - void arm_xor_u8( - const uint8_t * pSrcA, - const uint8_t * pSrcB, - uint8_t * pDst, - uint32_t blockSize); - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _BASIC_MATH_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/bayes_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/bayes_functions.h deleted file mode 100644 index 050386c94..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/bayes_functions.h +++ /dev/null @@ -1,86 +0,0 @@ -/****************************************************************************** - * @file bayes_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _BAYES_FUNCTIONS_H_ -#define _BAYES_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#include "dsp/statistics_functions.h" - -/** - * @defgroup groupBayes Bayesian estimators - * - * Implement the naive gaussian Bayes estimator. - * The training must be done from scikit-learn. - * - * The parameters can be easily - * generated from the scikit-learn object. Some examples are given in - * DSP/Testing/PatternGeneration/Bayes.py - */ - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - * @brief Instance structure for Naive Gaussian Bayesian estimator. - */ -typedef struct -{ - uint32_t vectorDimension; /**< Dimension of vector space */ - uint32_t numberOfClasses; /**< Number of different classes */ - const float32_t *theta; /**< Mean values for the Gaussians */ - const float32_t *sigma; /**< Variances for the Gaussians */ - const float32_t *classPriors; /**< Class prior probabilities */ - float32_t epsilon; /**< Additive value to variances */ -} arm_gaussian_naive_bayes_instance_f32; - -/** - * @brief Naive Gaussian Bayesian Estimator - * - * @param[in] S points to a naive bayes instance structure - * @param[in] in points to the elements of the input vector. - * @param[in] pBuffer points to a buffer of length numberOfClasses - * @return The predicted class - * - */ - - -uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, - const float32_t * in, - float32_t *pBuffer); - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _BAYES_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/complex_math_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/complex_math_functions.h deleted file mode 100644 index 4a765324b..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/complex_math_functions.h +++ /dev/null @@ -1,294 +0,0 @@ -/****************************************************************************** - * @file complex_math_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _COMPLEX_MATH_FUNCTIONS_H_ -#define _COMPLEX_MATH_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" -#include "dsp/fast_math_functions.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - * @defgroup groupCmplxMath Complex Math Functions - * This set of functions operates on complex data vectors. - * The data in the complex arrays is stored in an interleaved fashion - * (real, imag, real, imag, ...). - * In the API functions, the number of samples in a complex array refers - * to the number of complex values; the array contains twice this number of - * real values. - */ - - /** - * @brief Floating-point complex conjugate. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ - void arm_cmplx_conj_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - /** - * @brief Q31 complex conjugate. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ - void arm_cmplx_conj_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - - /** - * @brief Q15 complex conjugate. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ - void arm_cmplx_conj_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - - /** - * @brief Floating-point complex magnitude squared - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ - void arm_cmplx_mag_squared_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - - /** - * @brief Q31 complex magnitude squared - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ - void arm_cmplx_mag_squared_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - - /** - * @brief Q15 complex magnitude squared - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ - void arm_cmplx_mag_squared_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - -/** - * @brief Floating-point complex magnitude - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ - void arm_cmplx_mag_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - - /** - * @brief Q31 complex magnitude - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ - void arm_cmplx_mag_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - - /** - * @brief Q15 complex magnitude - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ - void arm_cmplx_mag_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - - /** - * @brief Q15 complex dot product - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] numSamples number of complex samples in each vector - * @param[out] realResult real part of the result returned here - * @param[out] imagResult imaginary part of the result returned here - */ - void arm_cmplx_dot_prod_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - uint32_t numSamples, - q31_t * realResult, - q31_t * imagResult); - - - /** - * @brief Q31 complex dot product - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] numSamples number of complex samples in each vector - * @param[out] realResult real part of the result returned here - * @param[out] imagResult imaginary part of the result returned here - */ - void arm_cmplx_dot_prod_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - uint32_t numSamples, - q63_t * realResult, - q63_t * imagResult); - - - /** - * @brief Floating-point complex dot product - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] numSamples number of complex samples in each vector - * @param[out] realResult real part of the result returned here - * @param[out] imagResult imaginary part of the result returned here - */ - void arm_cmplx_dot_prod_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - uint32_t numSamples, - float32_t * realResult, - float32_t * imagResult); - - - /** - * @brief Q15 complex-by-real multiplication - * @param[in] pSrcCmplx points to the complex input vector - * @param[in] pSrcReal points to the real input vector - * @param[out] pCmplxDst points to the complex output vector - * @param[in] numSamples number of samples in each vector - */ - void arm_cmplx_mult_real_q15( - const q15_t * pSrcCmplx, - const q15_t * pSrcReal, - q15_t * pCmplxDst, - uint32_t numSamples); - - - /** - * @brief Q31 complex-by-real multiplication - * @param[in] pSrcCmplx points to the complex input vector - * @param[in] pSrcReal points to the real input vector - * @param[out] pCmplxDst points to the complex output vector - * @param[in] numSamples number of samples in each vector - */ - void arm_cmplx_mult_real_q31( - const q31_t * pSrcCmplx, - const q31_t * pSrcReal, - q31_t * pCmplxDst, - uint32_t numSamples); - - - /** - * @brief Floating-point complex-by-real multiplication - * @param[in] pSrcCmplx points to the complex input vector - * @param[in] pSrcReal points to the real input vector - * @param[out] pCmplxDst points to the complex output vector - * @param[in] numSamples number of samples in each vector - */ - void arm_cmplx_mult_real_f32( - const float32_t * pSrcCmplx, - const float32_t * pSrcReal, - float32_t * pCmplxDst, - uint32_t numSamples); - - /** - * @brief Q15 complex-by-complex multiplication - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ - void arm_cmplx_mult_cmplx_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - q15_t * pDst, - uint32_t numSamples); - - - /** - * @brief Q31 complex-by-complex multiplication - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ - void arm_cmplx_mult_cmplx_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - q31_t * pDst, - uint32_t numSamples); - - - /** - * @brief Floating-point complex-by-complex multiplication - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ - void arm_cmplx_mult_cmplx_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - float32_t * pDst, - uint32_t numSamples); - - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _COMPLEX_MATH_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/controller_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/controller_functions.h deleted file mode 100644 index 1de68b4d1..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/controller_functions.h +++ /dev/null @@ -1,790 +0,0 @@ -/****************************************************************************** - * @file controller_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _CONTROLLER_FUNCTIONS_H_ -#define _CONTROLLER_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - - /** - * @brief Macros required for SINE and COSINE Controller functions - */ - -#define CONTROLLER_Q31_SHIFT (32 - 9) - /* 1.31(q31) Fixed value of 2/360 */ - /* -1 to +1 is divided into 360 values so total spacing is (2/360) */ -#define INPUT_SPACING 0xB60B61 - -/** - * @defgroup groupController Controller Functions - */ - - - /** - * @ingroup groupController - */ - - /** - * @addtogroup SinCos - * @{ - */ - -/** - * @brief Floating-point sin_cos function. - * @param[in] theta input value in degrees - * @param[out] pSinVal points to the processed sine output. - * @param[out] pCosVal points to the processed cos output. - */ - void arm_sin_cos_f32( - float32_t theta, - float32_t * pSinVal, - float32_t * pCosVal); - - - /** - * @brief Q31 sin_cos function. - * @param[in] theta scaled input value in degrees - * @param[out] pSinVal points to the processed sine output. - * @param[out] pCosVal points to the processed cosine output. - */ - void arm_sin_cos_q31( - q31_t theta, - q31_t * pSinVal, - q31_t * pCosVal); - - /** - * @} end of SinCos group - */ - - /** - * @ingroup groupController - */ - -/** - * @defgroup PID PID Motor Control - * - * A Proportional Integral Derivative (PID) controller is a generic feedback control - * loop mechanism widely used in industrial control systems. - * A PID controller is the most commonly used type of feedback controller. - * - * This set of functions implements (PID) controllers - * for Q15, Q31, and floating-point data types. The functions operate on a single sample - * of data and each call to the function returns a single processed value. - * S points to an instance of the PID control data structure. in - * is the input sample value. The functions return the output value. - * - * \par Algorithm: - *
-   *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
-   *    A0 = Kp + Ki + Kd
-   *    A1 = (-Kp ) - (2 * Kd )
-   *    A2 = Kd
-   * 
- * - * \par - * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant - * - * \par - * \image html PID.gif "Proportional Integral Derivative Controller" - * - * \par - * The PID controller calculates an "error" value as the difference between - * the measured output and the reference input. - * The controller attempts to minimize the error by adjusting the process control inputs. - * The proportional value determines the reaction to the current error, - * the integral value determines the reaction based on the sum of recent errors, - * and the derivative value determines the reaction based on the rate at which the error has been changing. - * - * \par Instance Structure - * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure. - * A separate instance structure must be defined for each PID Controller. - * There are separate instance structure declarations for each of the 3 supported data types. - * - * \par Reset Functions - * There is also an associated reset function for each data type which clears the state array. - * - * \par Initialization Functions - * There is also an associated initialization function for each data type. - * The initialization function performs the following operations: - * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains. - * - Zeros out the values in the state buffer. - * - * \par - * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function. - * - * \par Fixed-Point Behavior - * Care must be taken when using the fixed-point versions of the PID Controller functions. - * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - - - /** - * @brief Instance structure for the Q15 PID Control. - */ - typedef struct - { - q15_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ -#if !defined (ARM_MATH_DSP) - q15_t A1; /**< The derived gain A1 = -Kp - 2Kd */ - q15_t A2; /**< The derived gain A1 = Kd. */ -#else - q31_t A1; /**< The derived gain A1 = -Kp - 2Kd | Kd.*/ -#endif - q15_t state[3]; /**< The state array of length 3. */ - q15_t Kp; /**< The proportional gain. */ - q15_t Ki; /**< The integral gain. */ - q15_t Kd; /**< The derivative gain. */ - } arm_pid_instance_q15; - - /** - * @brief Instance structure for the Q31 PID Control. - */ - typedef struct - { - q31_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ - q31_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */ - q31_t A2; /**< The derived gain, A2 = Kd . */ - q31_t state[3]; /**< The state array of length 3. */ - q31_t Kp; /**< The proportional gain. */ - q31_t Ki; /**< The integral gain. */ - q31_t Kd; /**< The derivative gain. */ - } arm_pid_instance_q31; - - /** - * @brief Instance structure for the floating-point PID Control. - */ - typedef struct - { - float32_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ - float32_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */ - float32_t A2; /**< The derived gain, A2 = Kd . */ - float32_t state[3]; /**< The state array of length 3. */ - float32_t Kp; /**< The proportional gain. */ - float32_t Ki; /**< The integral gain. */ - float32_t Kd; /**< The derivative gain. */ - } arm_pid_instance_f32; - - - - /** - * @brief Initialization function for the floating-point PID Control. - * @param[in,out] S points to an instance of the PID structure. - * @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state. - */ - void arm_pid_init_f32( - arm_pid_instance_f32 * S, - int32_t resetStateFlag); - - - /** - * @brief Reset function for the floating-point PID Control. - * @param[in,out] S is an instance of the floating-point PID Control structure - */ - void arm_pid_reset_f32( - arm_pid_instance_f32 * S); - - - /** - * @brief Initialization function for the Q31 PID Control. - * @param[in,out] S points to an instance of the Q15 PID structure. - * @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state. - */ - void arm_pid_init_q31( - arm_pid_instance_q31 * S, - int32_t resetStateFlag); - - - /** - * @brief Reset function for the Q31 PID Control. - * @param[in,out] S points to an instance of the Q31 PID Control structure - */ - - void arm_pid_reset_q31( - arm_pid_instance_q31 * S); - - - /** - * @brief Initialization function for the Q15 PID Control. - * @param[in,out] S points to an instance of the Q15 PID structure. - * @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state. - */ - void arm_pid_init_q15( - arm_pid_instance_q15 * S, - int32_t resetStateFlag); - - - /** - * @brief Reset function for the Q15 PID Control. - * @param[in,out] S points to an instance of the q15 PID Control structure - */ - void arm_pid_reset_q15( - arm_pid_instance_q15 * S); - - - - /** - * @addtogroup PID - * @{ - */ - - /** - * @brief Process function for the floating-point PID Control. - * @param[in,out] S is an instance of the floating-point PID Control structure - * @param[in] in input sample to process - * @return processed output sample. - */ - __STATIC_FORCEINLINE float32_t arm_pid_f32( - arm_pid_instance_f32 * S, - float32_t in) - { - float32_t out; - - /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2] */ - out = (S->A0 * in) + - (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]); - - /* Update state */ - S->state[1] = S->state[0]; - S->state[0] = in; - S->state[2] = out; - - /* return to application */ - return (out); - - } - -/** - @brief Process function for the Q31 PID Control. - @param[in,out] S points to an instance of the Q31 PID Control structure - @param[in] in input sample to process - @return processed output sample. - - \par Scaling and Overflow Behavior - The function is implemented using an internal 64-bit accumulator. - The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit. - Thus, if the accumulator result overflows it wraps around rather than clip. - In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions. - After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format. - */ -__STATIC_FORCEINLINE q31_t arm_pid_q31( - arm_pid_instance_q31 * S, - q31_t in) - { - q63_t acc; - q31_t out; - - /* acc = A0 * x[n] */ - acc = (q63_t) S->A0 * in; - - /* acc += A1 * x[n-1] */ - acc += (q63_t) S->A1 * S->state[0]; - - /* acc += A2 * x[n-2] */ - acc += (q63_t) S->A2 * S->state[1]; - - /* convert output to 1.31 format to add y[n-1] */ - out = (q31_t) (acc >> 31U); - - /* out += y[n-1] */ - out += S->state[2]; - - /* Update state */ - S->state[1] = S->state[0]; - S->state[0] = in; - S->state[2] = out; - - /* return to application */ - return (out); - } - - -/** - @brief Process function for the Q15 PID Control. - @param[in,out] S points to an instance of the Q15 PID Control structure - @param[in] in input sample to process - @return processed output sample. - - \par Scaling and Overflow Behavior - The function is implemented using a 64-bit internal accumulator. - Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result. - The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. - There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. - After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. - Lastly, the accumulator is saturated to yield a result in 1.15 format. - */ -__STATIC_FORCEINLINE q15_t arm_pid_q15( - arm_pid_instance_q15 * S, - q15_t in) - { - q63_t acc; - q15_t out; - -#if defined (ARM_MATH_DSP) - /* Implementation of PID controller */ - - /* acc = A0 * x[n] */ - acc = (q31_t) __SMUAD((uint32_t)S->A0, (uint32_t)in); - - /* acc += A1 * x[n-1] + A2 * x[n-2] */ - acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)read_q15x2 (S->state), (uint64_t)acc); -#else - /* acc = A0 * x[n] */ - acc = ((q31_t) S->A0) * in; - - /* acc += A1 * x[n-1] + A2 * x[n-2] */ - acc += (q31_t) S->A1 * S->state[0]; - acc += (q31_t) S->A2 * S->state[1]; -#endif - - /* acc += y[n-1] */ - acc += (q31_t) S->state[2] << 15; - - /* saturate the output */ - out = (q15_t) (__SSAT((q31_t)(acc >> 15), 16)); - - /* Update state */ - S->state[1] = S->state[0]; - S->state[0] = in; - S->state[2] = out; - - /* return to application */ - return (out); - } - - /** - * @} end of PID group - */ - - /** - * @ingroup groupController - */ - - /** - * @defgroup park Vector Park Transform - * - * Forward Park transform converts the input two-coordinate vector to flux and torque components. - * The Park transform can be used to realize the transformation of the Ialpha and the Ibeta currents - * from the stationary to the moving reference frame and control the spatial relationship between - * the stator vector current and rotor flux vector. - * If we consider the d axis aligned with the rotor flux, the diagram below shows the - * current vector and the relationship from the two reference frames: - * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame" - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html parkFormula.gif - * where Ialpha and Ibeta are the stator vector components, - * pId and pIq are rotor vector components and cosVal and sinVal are the - * cosine and sine values of theta (rotor flux position). - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Park transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - - /** - * @addtogroup park - * @{ - */ - - /** - * @brief Floating-point Park transform - * @param[in] Ialpha input two-phase vector coordinate alpha - * @param[in] Ibeta input two-phase vector coordinate beta - * @param[out] pId points to output rotor reference frame d - * @param[out] pIq points to output rotor reference frame q - * @param[in] sinVal sine value of rotation angle theta - * @param[in] cosVal cosine value of rotation angle theta - * @return none - * - * The function implements the forward Park transform. - * - */ - __STATIC_FORCEINLINE void arm_park_f32( - float32_t Ialpha, - float32_t Ibeta, - float32_t * pId, - float32_t * pIq, - float32_t sinVal, - float32_t cosVal) - { - /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */ - *pId = Ialpha * cosVal + Ibeta * sinVal; - - /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */ - *pIq = -Ialpha * sinVal + Ibeta * cosVal; - } - - -/** - @brief Park transform for Q31 version - @param[in] Ialpha input two-phase vector coordinate alpha - @param[in] Ibeta input two-phase vector coordinate beta - @param[out] pId points to output rotor reference frame d - @param[out] pIq points to output rotor reference frame q - @param[in] sinVal sine value of rotation angle theta - @param[in] cosVal cosine value of rotation angle theta - @return none - - \par Scaling and Overflow Behavior - The function is implemented using an internal 32-bit accumulator. - The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - There is saturation on the addition and subtraction, hence there is no risk of overflow. - */ -__STATIC_FORCEINLINE void arm_park_q31( - q31_t Ialpha, - q31_t Ibeta, - q31_t * pId, - q31_t * pIq, - q31_t sinVal, - q31_t cosVal) - { - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - q31_t product3, product4; /* Temporary variables used to store intermediate results */ - - /* Intermediate product is calculated by (Ialpha * cosVal) */ - product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31); - - /* Intermediate product is calculated by (Ibeta * sinVal) */ - product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31); - - - /* Intermediate product is calculated by (Ialpha * sinVal) */ - product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31); - - /* Intermediate product is calculated by (Ibeta * cosVal) */ - product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31); - - /* Calculate pId by adding the two intermediate products 1 and 2 */ - *pId = __QADD(product1, product2); - - /* Calculate pIq by subtracting the two intermediate products 3 from 4 */ - *pIq = __QSUB(product4, product3); - } - - /** - * @} end of park group - */ - - - /** - * @ingroup groupController - */ - - /** - * @defgroup inv_park Vector Inverse Park transform - * Inverse Park transform converts the input flux and torque components to two-coordinate vector. - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html parkInvFormula.gif - * where pIalpha and pIbeta are the stator vector components, - * Id and Iq are rotor vector components and cosVal and sinVal are the - * cosine and sine values of theta (rotor flux position). - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Park transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - - /** - * @addtogroup inv_park - * @{ - */ - - /** - * @brief Floating-point Inverse Park transform - * @param[in] Id input coordinate of rotor reference frame d - * @param[in] Iq input coordinate of rotor reference frame q - * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - * @param[out] pIbeta points to output two-phase orthogonal vector axis beta - * @param[in] sinVal sine value of rotation angle theta - * @param[in] cosVal cosine value of rotation angle theta - * @return none - */ - __STATIC_FORCEINLINE void arm_inv_park_f32( - float32_t Id, - float32_t Iq, - float32_t * pIalpha, - float32_t * pIbeta, - float32_t sinVal, - float32_t cosVal) - { - /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */ - *pIalpha = Id * cosVal - Iq * sinVal; - - /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */ - *pIbeta = Id * sinVal + Iq * cosVal; - } - - -/** - @brief Inverse Park transform for Q31 version - @param[in] Id input coordinate of rotor reference frame d - @param[in] Iq input coordinate of rotor reference frame q - @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - @param[out] pIbeta points to output two-phase orthogonal vector axis beta - @param[in] sinVal sine value of rotation angle theta - @param[in] cosVal cosine value of rotation angle theta - @return none - - @par Scaling and Overflow Behavior - The function is implemented using an internal 32-bit accumulator. - The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - There is saturation on the addition, hence there is no risk of overflow. - */ -__STATIC_FORCEINLINE void arm_inv_park_q31( - q31_t Id, - q31_t Iq, - q31_t * pIalpha, - q31_t * pIbeta, - q31_t sinVal, - q31_t cosVal) - { - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - q31_t product3, product4; /* Temporary variables used to store intermediate results */ - - /* Intermediate product is calculated by (Id * cosVal) */ - product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31); - - /* Intermediate product is calculated by (Iq * sinVal) */ - product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31); - - - /* Intermediate product is calculated by (Id * sinVal) */ - product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31); - - /* Intermediate product is calculated by (Iq * cosVal) */ - product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31); - - /* Calculate pIalpha by using the two intermediate products 1 and 2 */ - *pIalpha = __QSUB(product1, product2); - - /* Calculate pIbeta by using the two intermediate products 3 and 4 */ - *pIbeta = __QADD(product4, product3); - } - - /** - * @} end of Inverse park group - */ - -/** - * @ingroup groupController - */ - - /** - * @defgroup clarke Vector Clarke Transform - * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector. - * Generally the Clarke transform uses three-phase currents Ia, Ib and Ic to calculate currents - * in the two-phase orthogonal stator axis Ialpha and Ibeta. - * When Ialpha is superposed with Ia as shown in the figure below - * \image html clarke.gif Stator current space vector and its components in (a,b). - * and Ia + Ib + Ic = 0, in this condition Ialpha and Ibeta - * can be calculated using only Ia and Ib. - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html clarkeFormula.gif - * where Ia and Ib are the instantaneous stator phases and - * pIalpha and pIbeta are the two coordinates of time invariant vector. - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Clarke transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - - /** - * @addtogroup clarke - * @{ - */ - - /** - * - * @brief Floating-point Clarke transform - * @param[in] Ia input three-phase coordinate a - * @param[in] Ib input three-phase coordinate b - * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - * @param[out] pIbeta points to output two-phase orthogonal vector axis beta - * @return none - */ - __STATIC_FORCEINLINE void arm_clarke_f32( - float32_t Ia, - float32_t Ib, - float32_t * pIalpha, - float32_t * pIbeta) - { - /* Calculate pIalpha using the equation, pIalpha = Ia */ - *pIalpha = Ia; - - /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */ - *pIbeta = (0.57735026919f * Ia + 1.15470053838f * Ib); - } - - -/** - @brief Clarke transform for Q31 version - @param[in] Ia input three-phase coordinate a - @param[in] Ib input three-phase coordinate b - @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - @param[out] pIbeta points to output two-phase orthogonal vector axis beta - @return none - - \par Scaling and Overflow Behavior - The function is implemented using an internal 32-bit accumulator. - The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - There is saturation on the addition, hence there is no risk of overflow. - */ -__STATIC_FORCEINLINE void arm_clarke_q31( - q31_t Ia, - q31_t Ib, - q31_t * pIalpha, - q31_t * pIbeta) - { - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - - /* Calculating pIalpha from Ia by equation pIalpha = Ia */ - *pIalpha = Ia; - - /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */ - product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30); - - /* Intermediate product is calculated by (2/sqrt(3) * Ib) */ - product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30); - - /* pIbeta is calculated by adding the intermediate products */ - *pIbeta = __QADD(product1, product2); - } - - /** - * @} end of clarke group - */ - - - /** - * @ingroup groupController - */ - - /** - * @defgroup inv_clarke Vector Inverse Clarke Transform - * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases. - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html clarkeInvFormula.gif - * where pIa and pIb are the instantaneous stator phases and - * Ialpha and Ibeta are the two coordinates of time invariant vector. - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Clarke transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - - /** - * @addtogroup inv_clarke - * @{ - */ - - /** - * @brief Floating-point Inverse Clarke transform - * @param[in] Ialpha input two-phase orthogonal vector axis alpha - * @param[in] Ibeta input two-phase orthogonal vector axis beta - * @param[out] pIa points to output three-phase coordinate a - * @param[out] pIb points to output three-phase coordinate b - * @return none - */ - __STATIC_FORCEINLINE void arm_inv_clarke_f32( - float32_t Ialpha, - float32_t Ibeta, - float32_t * pIa, - float32_t * pIb) - { - /* Calculating pIa from Ialpha by equation pIa = Ialpha */ - *pIa = Ialpha; - - /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */ - *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta; - } - - -/** - @brief Inverse Clarke transform for Q31 version - @param[in] Ialpha input two-phase orthogonal vector axis alpha - @param[in] Ibeta input two-phase orthogonal vector axis beta - @param[out] pIa points to output three-phase coordinate a - @param[out] pIb points to output three-phase coordinate b - @return none - - \par Scaling and Overflow Behavior - The function is implemented using an internal 32-bit accumulator. - The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - There is saturation on the subtraction, hence there is no risk of overflow. - */ -__STATIC_FORCEINLINE void arm_inv_clarke_q31( - q31_t Ialpha, - q31_t Ibeta, - q31_t * pIa, - q31_t * pIb) - { - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - - /* Calculating pIa from Ialpha by equation pIa = Ialpha */ - *pIa = Ialpha; - - /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */ - product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31); - - /* Intermediate product is calculated by (1/sqrt(3) * pIb) */ - product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31); - - /* pIb is calculated by subtracting the products */ - *pIb = __QSUB(product2, product1); - } - - /** - * @} end of inv_clarke group - */ - - - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _CONTROLLER_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/distance_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/distance_functions.h deleted file mode 100644 index d58c6c028..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/distance_functions.h +++ /dev/null @@ -1,296 +0,0 @@ -/****************************************************************************** - * @file distance_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _DISTANCE_FUNCTIONS_H_ -#define _DISTANCE_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#include "dsp/statistics_functions.h" -#include "dsp/basic_math_functions.h" -#include "dsp/fast_math_functions.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - - -/** - * @defgroup groupDistance Distance functions - * - * Distance functions for use with clustering algorithms. - * There are distance functions for float vectors and boolean vectors. - * - */ - -/* 6.14 bug */ -#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001) - -__attribute__((weak)) float __powisf2(float a, int b); - -#endif - -/** - * @brief Euclidean distance between two vectors - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ - -float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - -/** - * @brief Bray-Curtis distance between two vectors - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ -float32_t arm_braycurtis_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - -/** - * @brief Canberra distance between two vectors - * - * This function may divide by zero when samples pA[i] and pB[i] are both zero. - * The result of the computation will be correct. So the division per zero may be - * ignored. - * - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ -float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - - -/** - * @brief Chebyshev distance between two vectors - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ -float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - - -/** - * @brief Cityblock (Manhattan) distance between two vectors - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ -float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - -/** - * @brief Correlation distance between two vectors - * - * The input vectors are modified in place ! - * - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ -float32_t arm_correlation_distance_f32(float32_t *pA,float32_t *pB, uint32_t blockSize); - -/** - * @brief Cosine distance between two vectors - * - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ - -float32_t arm_cosine_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - -/** - * @brief Jensen-Shannon distance between two vectors - * - * This function is assuming that elements of second vector are > 0 - * and 0 only when the corresponding element of first vector is 0. - * Otherwise the result of the computation does not make sense - * and for speed reasons, the cases returning NaN or Infinity are not - * managed. - * - * When the function is computing x log (x / y) with x 0 and y 0, - * it will compute the right value (0) but a division per zero will occur - * and shoudl be ignored in client code. - * - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ - -float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB,uint32_t blockSize); - -/** - * @brief Minkowski distance between two vectors - * - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] n Norm order (>= 2) - * @param[in] blockSize vector length - * @return distance - * - */ - - - -float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize); - -/** - * @brief Dice distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] order Distance order - * @param[in] blockSize Number of samples - * @return distance - * - */ - - -float32_t arm_dice_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Hamming distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t arm_hamming_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Jaccard distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t arm_jaccard_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Kulsinski distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t arm_kulsinski_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Roger Stanimoto distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t arm_rogerstanimoto_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Russell-Rao distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t arm_russellrao_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Sokal-Michener distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t arm_sokalmichener_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Sokal-Sneath distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t arm_sokalsneath_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Yule distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t arm_yule_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _DISTANCE_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/fast_math_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/fast_math_functions.h deleted file mode 100644 index 4de71597e..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/fast_math_functions.h +++ /dev/null @@ -1,287 +0,0 @@ -/****************************************************************************** - * @file fast_math_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _FAST_MATH_FUNCTIONS_H_ -#define _FAST_MATH_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - - /** - * @brief Macros required for SINE and COSINE Fast math approximations - */ - -#define FAST_MATH_TABLE_SIZE 512 -#define FAST_MATH_Q31_SHIFT (32 - 10) -#define FAST_MATH_Q15_SHIFT (16 - 10) - -#ifndef PI - #define PI 3.14159265358979f -#endif - - -/** - * @defgroup groupFastMath Fast Math Functions - * This set of functions provides a fast approximation to sine, cosine, and square root. - * As compared to most of the other functions in the CMSIS math library, the fast math functions - * operate on individual values and not arrays. - * There are separate functions for Q15, Q31, and floating-point data. - * - */ - - /** - * @ingroup groupFastMath - */ - - -/** - @addtogroup sin - @{ - */ - -/** - * @brief Fast approximation to the trigonometric sine function for floating-point data. - * @param[in] x input value in radians. - * @return sin(x). - */ - float32_t arm_sin_f32( - float32_t x); - - - /** - * @brief Fast approximation to the trigonometric sine function for Q31 data. - * @param[in] x Scaled input value in radians. - * @return sin(x). - */ - q31_t arm_sin_q31( - q31_t x); - - - /** - * @brief Fast approximation to the trigonometric sine function for Q15 data. - * @param[in] x Scaled input value in radians. - * @return sin(x). - */ - q15_t arm_sin_q15( - q15_t x); - -/** - @} end of sin group - */ - -/** - @addtogroup cos - @{ - */ - - /** - * @brief Fast approximation to the trigonometric cosine function for floating-point data. - * @param[in] x input value in radians. - * @return cos(x). - */ - float32_t arm_cos_f32( - float32_t x); - - - /** - * @brief Fast approximation to the trigonometric cosine function for Q31 data. - * @param[in] x Scaled input value in radians. - * @return cos(x). - */ - q31_t arm_cos_q31( - q31_t x); - - - /** - * @brief Fast approximation to the trigonometric cosine function for Q15 data. - * @param[in] x Scaled input value in radians. - * @return cos(x). - */ - q15_t arm_cos_q15( - q15_t x); - -/** - @} end of cos group - */ - - -/** - @brief Floating-point vector of log values. - @param[in] pSrc points to the input vector - @param[out] pDst points to the output vector - @param[in] blockSize number of samples in each vector - @return none - */ - void arm_vlog_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - -/** - @brief Floating-point vector of exp values. - @param[in] pSrc points to the input vector - @param[out] pDst points to the output vector - @param[in] blockSize number of samples in each vector - @return none - */ - void arm_vexp_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - /** - * @defgroup SQRT Square Root - * - * Computes the square root of a number. - * There are separate functions for Q15, Q31, and floating-point data types. - * The square root function is computed using the Newton-Raphson algorithm. - * This is an iterative algorithm of the form: - *
-   *      x1 = x0 - f(x0)/f'(x0)
-   * 
- * where x1 is the current estimate, - * x0 is the previous estimate, and - * f'(x0) is the derivative of f() evaluated at x0. - * For the square root function, the algorithm reduces to: - *
-   *     x0 = in/2                         [initial guess]
-   *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
-   * 
- */ - - - /** - * @addtogroup SQRT - * @{ - */ - -/** - @brief Floating-point square root function. - @param[in] in input value - @param[out] pOut square root of input value - @return execution status - - \ref ARM_MATH_SUCCESS : input value is positive - - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 - */ -__STATIC_FORCEINLINE arm_status arm_sqrt_f32( - float32_t in, - float32_t * pOut) - { - if (in >= 0.0f) - { -#if defined ( __CC_ARM ) - #if defined __TARGET_FPU_VFP - *pOut = __sqrtf(in); - #else - *pOut = sqrtf(in); - #endif - -#elif defined ( __ICCARM__ ) - #if defined __ARMVFP__ - __ASM("VSQRT.F32 %0,%1" : "=t"(*pOut) : "t"(in)); - #else - *pOut = sqrtf(in); - #endif - -#else - *pOut = sqrtf(in); -#endif - - return (ARM_MATH_SUCCESS); - } - else - { - *pOut = 0.0f; - return (ARM_MATH_ARGUMENT_ERROR); - } - } - - -/** - @brief Q31 square root function. - @param[in] in input value. The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF - @param[out] pOut points to square root of input value - @return execution status - - \ref ARM_MATH_SUCCESS : input value is positive - - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 - */ -arm_status arm_sqrt_q31( - q31_t in, - q31_t * pOut); - - -/** - @brief Q15 square root function. - @param[in] in input value. The range of the input value is [0 +1) or 0x0000 to 0x7FFF - @param[out] pOut points to square root of input value - @return execution status - - \ref ARM_MATH_SUCCESS : input value is positive - - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 - */ -arm_status arm_sqrt_q15( - q15_t in, - q15_t * pOut); - - /** - * @brief Vector Floating-point square root function. - * @param[in] pIn input vector. - * @param[out] pOut vector of square roots of input elements. - * @param[in] len length of input vector. - * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if - * in is negative value and returns zero output for negative values. - */ - void arm_vsqrt_f32( - float32_t * pIn, - float32_t * pOut, - uint16_t len); - - void arm_vsqrt_q31( - q31_t * pIn, - q31_t * pOut, - uint16_t len); - - void arm_vsqrt_q15( - q15_t * pIn, - q15_t * pOut, - uint16_t len); - - /** - * @} end of SQRT group - */ - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _FAST_MATH_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/filtering_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/filtering_functions.h deleted file mode 100644 index 0cbd7cf4c..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/filtering_functions.h +++ /dev/null @@ -1,2439 +0,0 @@ -/****************************************************************************** - * @file filtering_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _FILTERING_FUNCTIONS_H_ -#define _FILTERING_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#include "dsp/support_functions.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -#define DELTA_Q31 ((q31_t)(0x100)) -#define DELTA_Q15 ((q15_t)0x5) - -/** - * @defgroup groupFilters Filtering Functions - */ - - /** - * @brief Instance structure for the Q7 FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q7_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - const q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - } arm_fir_instance_q7; - - /** - * @brief Instance structure for the Q15 FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - } arm_fir_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - } arm_fir_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - } arm_fir_instance_f32; - - /** - * @brief Processing function for the Q7 FIR filter. - * @param[in] S points to an instance of the Q7 FIR filter structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_fir_q7( - const arm_fir_instance_q7 * S, - const q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - /** - * @brief Initialization function for the Q7 FIR filter. - * @param[in,out] S points to an instance of the Q7 FIR structure. - * @param[in] numTaps Number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of samples that are processed. - * - * For the MVE version, the coefficient length must be a multiple of 16. - * You can pad with zeros if you have less coefficients. - */ - void arm_fir_init_q7( - arm_fir_instance_q7 * S, - uint16_t numTaps, - const q7_t * pCoeffs, - q7_t * pState, - uint32_t blockSize); - - /** - * @brief Processing function for the Q15 FIR filter. - * @param[in] S points to an instance of the Q15 FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_fir_q15( - const arm_fir_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - /** - * @brief Processing function for the fast Q15 FIR filter (fast version). - * @param[in] S points to an instance of the Q15 FIR filter structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_fir_fast_q15( - const arm_fir_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - /** - * @brief Initialization function for the Q15 FIR filter. - * @param[in,out] S points to an instance of the Q15 FIR filter structure. - * @param[in] numTaps Number of filter coefficients in the filter. Must be even and greater than or equal to 4. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of samples that are processed at a time. - * @return The function returns either - * ARM_MATH_SUCCESS if initialization was successful or - * ARM_MATH_ARGUMENT_ERROR if numTaps is not a supported value. - * - * For the MVE version, the coefficient length must be a multiple of 8. - * You can pad with zeros if you have less coefficients. - * - */ - arm_status arm_fir_init_q15( - arm_fir_instance_q15 * S, - uint16_t numTaps, - const q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - /** - * @brief Processing function for the Q31 FIR filter. - * @param[in] S points to an instance of the Q31 FIR filter structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_fir_q31( - const arm_fir_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - /** - * @brief Processing function for the fast Q31 FIR filter (fast version). - * @param[in] S points to an instance of the Q31 FIR filter structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_fir_fast_q31( - const arm_fir_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - /** - * @brief Initialization function for the Q31 FIR filter. - * @param[in,out] S points to an instance of the Q31 FIR structure. - * @param[in] numTaps Number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of samples that are processed at a time. - * - * For the MVE version, the coefficient length must be a multiple of 4. - * You can pad with zeros if you have less coefficients. - */ - void arm_fir_init_q31( - arm_fir_instance_q31 * S, - uint16_t numTaps, - const q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - /** - * @brief Processing function for the floating-point FIR filter. - * @param[in] S points to an instance of the floating-point FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_fir_f32( - const arm_fir_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - /** - * @brief Initialization function for the floating-point FIR filter. - * @param[in,out] S points to an instance of the floating-point FIR filter structure. - * @param[in] numTaps Number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of samples that are processed at a time. - */ - void arm_fir_init_f32( - arm_fir_instance_f32 * S, - uint16_t numTaps, - const float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - /** - * @brief Instance structure for the Q15 Biquad cascade filter. - */ - typedef struct - { - int8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q15_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - const q15_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - int8_t postShift; /**< Additional shift, in bits, applied to each output sample. */ - } arm_biquad_casd_df1_inst_q15; - - /** - * @brief Instance structure for the Q31 Biquad cascade filter. - */ - typedef struct - { - uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q31_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - const q31_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - uint8_t postShift; /**< Additional shift, in bits, applied to each output sample. */ - } arm_biquad_casd_df1_inst_q31; - - /** - * @brief Instance structure for the floating-point Biquad cascade filter. - */ - typedef struct - { - uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - const float32_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - } arm_biquad_casd_df1_inst_f32; - -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) - /** - * @brief Instance structure for the modified Biquad coefs required by vectorized code. - */ - typedef struct - { - float32_t coeffs[8][4]; /**< Points to the array of modified coefficients. The array is of length 32. There is one per stage */ - } arm_biquad_mod_coef_f32; -#endif - - /** - * @brief Processing function for the Q15 Biquad cascade filter. - * @param[in] S points to an instance of the Q15 Biquad cascade structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_biquad_cascade_df1_q15( - const arm_biquad_casd_df1_inst_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - /** - * @brief Initialization function for the Q15 Biquad cascade filter. - * @param[in,out] S points to an instance of the Q15 Biquad cascade structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] postShift Shift to be applied to the output. Varies according to the coefficients format - */ - void arm_biquad_cascade_df1_init_q15( - arm_biquad_casd_df1_inst_q15 * S, - uint8_t numStages, - const q15_t * pCoeffs, - q15_t * pState, - int8_t postShift); - - /** - * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4. - * @param[in] S points to an instance of the Q15 Biquad cascade structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_biquad_cascade_df1_fast_q15( - const arm_biquad_casd_df1_inst_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - /** - * @brief Processing function for the Q31 Biquad cascade filter - * @param[in] S points to an instance of the Q31 Biquad cascade structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_biquad_cascade_df1_q31( - const arm_biquad_casd_df1_inst_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - /** - * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4. - * @param[in] S points to an instance of the Q31 Biquad cascade structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_biquad_cascade_df1_fast_q31( - const arm_biquad_casd_df1_inst_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - /** - * @brief Initialization function for the Q31 Biquad cascade filter. - * @param[in,out] S points to an instance of the Q31 Biquad cascade structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] postShift Shift to be applied to the output. Varies according to the coefficients format - */ - void arm_biquad_cascade_df1_init_q31( - arm_biquad_casd_df1_inst_q31 * S, - uint8_t numStages, - const q31_t * pCoeffs, - q31_t * pState, - int8_t postShift); - - /** - * @brief Processing function for the floating-point Biquad cascade filter. - * @param[in] S points to an instance of the floating-point Biquad cascade structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_biquad_cascade_df1_f32( - const arm_biquad_casd_df1_inst_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - /** - * @brief Initialization function for the floating-point Biquad cascade filter. - * @param[in,out] S points to an instance of the floating-point Biquad cascade structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pCoeffsMod points to the modified filter coefficients (only MVE version). - * @param[in] pState points to the state buffer. - */ -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) - void arm_biquad_cascade_df1_mve_init_f32( - arm_biquad_casd_df1_inst_f32 * S, - uint8_t numStages, - const float32_t * pCoeffs, - arm_biquad_mod_coef_f32 * pCoeffsMod, - float32_t * pState); -#endif - - void arm_biquad_cascade_df1_init_f32( - arm_biquad_casd_df1_inst_f32 * S, - uint8_t numStages, - const float32_t * pCoeffs, - float32_t * pState); - - -/** - * @brief Convolution of floating-point sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the location where the output result is written. Length srcALen+srcBLen-1. - */ - void arm_conv_f32( - const float32_t * pSrcA, - uint32_t srcALen, - const float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst); - - - /** - * @brief Convolution of Q15 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). - */ - void arm_conv_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - -/** - * @brief Convolution of Q15 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the location where the output result is written. Length srcALen+srcBLen-1. - */ - void arm_conv_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - - /** - * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - */ - void arm_conv_fast_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - - /** - * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). - */ - void arm_conv_fast_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - - /** - * @brief Convolution of Q31 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - */ - void arm_conv_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - - /** - * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - */ - void arm_conv_fast_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - - /** - * @brief Convolution of Q7 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - * @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen). - */ - void arm_conv_opt_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - - /** - * @brief Convolution of Q7 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - */ - void arm_conv_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst); - - - /** - * @brief Partial convolution of floating-point sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ - arm_status arm_conv_partial_f32( - const float32_t * pSrcA, - uint32_t srcALen, - const float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - - /** - * @brief Partial convolution of Q15 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). - * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ - arm_status arm_conv_partial_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - - /** - * @brief Partial convolution of Q15 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ - arm_status arm_conv_partial_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - - /** - * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ - arm_status arm_conv_partial_fast_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - - /** - * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). - * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ - arm_status arm_conv_partial_fast_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - - /** - * @brief Partial convolution of Q31 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ - arm_status arm_conv_partial_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - - /** - * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ - arm_status arm_conv_partial_fast_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - - /** - * @brief Partial convolution of Q7 sequences - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen). - * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ - arm_status arm_conv_partial_opt_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - -/** - * @brief Partial convolution of Q7 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ - arm_status arm_conv_partial_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - - /** - * @brief Instance structure for the Q15 FIR decimator. - */ - typedef struct - { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - } arm_fir_decimate_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR decimator. - */ - typedef struct - { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - } arm_fir_decimate_instance_q31; - -/** - @brief Instance structure for floating-point FIR decimator. - */ -typedef struct - { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - } arm_fir_decimate_instance_f32; - - -/** - @brief Processing function for floating-point FIR decimator. - @param[in] S points to an instance of the floating-point FIR decimator structure - @param[in] pSrc points to the block of input data - @param[out] pDst points to the block of output data - @param[in] blockSize number of samples to process - */ -void arm_fir_decimate_f32( - const arm_fir_decimate_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - @brief Initialization function for the floating-point FIR decimator. - @param[in,out] S points to an instance of the floating-point FIR decimator structure - @param[in] numTaps number of coefficients in the filter - @param[in] M decimation factor - @param[in] pCoeffs points to the filter coefficients - @param[in] pState points to the state buffer - @param[in] blockSize number of input samples to process per call - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_LENGTH_ERROR : blockSize is not a multiple of M - */ -arm_status arm_fir_decimate_init_f32( - arm_fir_decimate_instance_f32 * S, - uint16_t numTaps, - uint8_t M, - const float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - - /** - * @brief Processing function for the Q15 FIR decimator. - * @param[in] S points to an instance of the Q15 FIR decimator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_decimate_q15( - const arm_fir_decimate_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4. - * @param[in] S points to an instance of the Q15 FIR decimator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_decimate_fast_q15( - const arm_fir_decimate_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Initialization function for the Q15 FIR decimator. - * @param[in,out] S points to an instance of the Q15 FIR decimator structure. - * @param[in] numTaps number of coefficients in the filter. - * @param[in] M decimation factor. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of input samples to process per call. - * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if - * blockSize is not a multiple of M. - */ - arm_status arm_fir_decimate_init_q15( - arm_fir_decimate_instance_q15 * S, - uint16_t numTaps, - uint8_t M, - const q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - - /** - * @brief Processing function for the Q31 FIR decimator. - * @param[in] S points to an instance of the Q31 FIR decimator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_decimate_q31( - const arm_fir_decimate_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - /** - * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4. - * @param[in] S points to an instance of the Q31 FIR decimator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_decimate_fast_q31( - const arm_fir_decimate_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Initialization function for the Q31 FIR decimator. - * @param[in,out] S points to an instance of the Q31 FIR decimator structure. - * @param[in] numTaps number of coefficients in the filter. - * @param[in] M decimation factor. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of input samples to process per call. - * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if - * blockSize is not a multiple of M. - */ - arm_status arm_fir_decimate_init_q31( - arm_fir_decimate_instance_q31 * S, - uint16_t numTaps, - uint8_t M, - const q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 FIR interpolator. - */ - typedef struct - { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - q15_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */ - } arm_fir_interpolate_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR interpolator. - */ - typedef struct - { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - q31_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */ - } arm_fir_interpolate_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR interpolator. - */ - typedef struct - { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - float32_t *pState; /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */ - } arm_fir_interpolate_instance_f32; - - - /** - * @brief Processing function for the Q15 FIR interpolator. - * @param[in] S points to an instance of the Q15 FIR interpolator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_interpolate_q15( - const arm_fir_interpolate_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Initialization function for the Q15 FIR interpolator. - * @param[in,out] S points to an instance of the Q15 FIR interpolator structure. - * @param[in] L upsample factor. - * @param[in] numTaps number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficient buffer. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of input samples to process per call. - * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if - * the filter length numTaps is not a multiple of the interpolation factor L. - */ - arm_status arm_fir_interpolate_init_q15( - arm_fir_interpolate_instance_q15 * S, - uint8_t L, - uint16_t numTaps, - const q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - - /** - * @brief Processing function for the Q31 FIR interpolator. - * @param[in] S points to an instance of the Q15 FIR interpolator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_interpolate_q31( - const arm_fir_interpolate_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Initialization function for the Q31 FIR interpolator. - * @param[in,out] S points to an instance of the Q31 FIR interpolator structure. - * @param[in] L upsample factor. - * @param[in] numTaps number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficient buffer. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of input samples to process per call. - * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if - * the filter length numTaps is not a multiple of the interpolation factor L. - */ - arm_status arm_fir_interpolate_init_q31( - arm_fir_interpolate_instance_q31 * S, - uint8_t L, - uint16_t numTaps, - const q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - - /** - * @brief Processing function for the floating-point FIR interpolator. - * @param[in] S points to an instance of the floating-point FIR interpolator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_interpolate_f32( - const arm_fir_interpolate_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Initialization function for the floating-point FIR interpolator. - * @param[in,out] S points to an instance of the floating-point FIR interpolator structure. - * @param[in] L upsample factor. - * @param[in] numTaps number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficient buffer. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of input samples to process per call. - * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if - * the filter length numTaps is not a multiple of the interpolation factor L. - */ - arm_status arm_fir_interpolate_init_f32( - arm_fir_interpolate_instance_f32 * S, - uint8_t L, - uint16_t numTaps, - const float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - - /** - * @brief Instance structure for the high precision Q31 Biquad cascade filter. - */ - typedef struct - { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q63_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */ - const q31_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - uint8_t postShift; /**< additional shift, in bits, applied to each output sample. */ - } arm_biquad_cas_df1_32x64_ins_q31; - - - /** - * @param[in] S points to an instance of the high precision Q31 Biquad cascade filter structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ - void arm_biquad_cas_df1_32x64_q31( - const arm_biquad_cas_df1_32x64_ins_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @param[in,out] S points to an instance of the high precision Q31 Biquad cascade filter structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] postShift shift to be applied to the output. Varies according to the coefficients format - */ - void arm_biquad_cas_df1_32x64_init_q31( - arm_biquad_cas_df1_32x64_ins_q31 * S, - uint8_t numStages, - const q31_t * pCoeffs, - q63_t * pState, - uint8_t postShift); - - - /** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ - typedef struct - { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */ - const float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - } arm_biquad_cascade_df2T_instance_f32; - - /** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ - typedef struct - { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */ - const float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - } arm_biquad_cascade_stereo_df2T_instance_f32; - - /** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ - typedef struct - { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float64_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */ - const float64_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - } arm_biquad_cascade_df2T_instance_f64; - - - /** - * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. - * @param[in] S points to an instance of the filter data structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ - void arm_biquad_cascade_df2T_f32( - const arm_biquad_cascade_df2T_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels - * @param[in] S points to an instance of the filter data structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ - void arm_biquad_cascade_stereo_df2T_f32( - const arm_biquad_cascade_stereo_df2T_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. - * @param[in] S points to an instance of the filter data structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ - void arm_biquad_cascade_df2T_f64( - const arm_biquad_cascade_df2T_instance_f64 * S, - const float64_t * pSrc, - float64_t * pDst, - uint32_t blockSize); - - -#if defined(ARM_MATH_NEON) -void arm_biquad_cascade_df2T_compute_coefs_f32( - arm_biquad_cascade_df2T_instance_f32 * S, - uint8_t numStages, - float32_t * pCoeffs); -#endif - /** - * @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter. - * @param[in,out] S points to an instance of the filter data structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - */ - void arm_biquad_cascade_df2T_init_f32( - arm_biquad_cascade_df2T_instance_f32 * S, - uint8_t numStages, - const float32_t * pCoeffs, - float32_t * pState); - - - /** - * @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter. - * @param[in,out] S points to an instance of the filter data structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - */ - void arm_biquad_cascade_stereo_df2T_init_f32( - arm_biquad_cascade_stereo_df2T_instance_f32 * S, - uint8_t numStages, - const float32_t * pCoeffs, - float32_t * pState); - - - /** - * @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter. - * @param[in,out] S points to an instance of the filter data structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - */ - void arm_biquad_cascade_df2T_init_f64( - arm_biquad_cascade_df2T_instance_f64 * S, - uint8_t numStages, - const float64_t * pCoeffs, - float64_t * pState); - - - /** - * @brief Instance structure for the Q15 FIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of filter stages. */ - q15_t *pState; /**< points to the state variable array. The array is of length numStages. */ - const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ - } arm_fir_lattice_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of filter stages. */ - q31_t *pState; /**< points to the state variable array. The array is of length numStages. */ - const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ - } arm_fir_lattice_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of filter stages. */ - float32_t *pState; /**< points to the state variable array. The array is of length numStages. */ - const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ - } arm_fir_lattice_instance_f32; - - - /** - * @brief Initialization function for the Q15 FIR lattice filter. - * @param[in] S points to an instance of the Q15 FIR lattice structure. - * @param[in] numStages number of filter stages. - * @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages. - * @param[in] pState points to the state buffer. The array is of length numStages. - */ - void arm_fir_lattice_init_q15( - arm_fir_lattice_instance_q15 * S, - uint16_t numStages, - const q15_t * pCoeffs, - q15_t * pState); - - - /** - * @brief Processing function for the Q15 FIR lattice filter. - * @param[in] S points to an instance of the Q15 FIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_fir_lattice_q15( - const arm_fir_lattice_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Initialization function for the Q31 FIR lattice filter. - * @param[in] S points to an instance of the Q31 FIR lattice structure. - * @param[in] numStages number of filter stages. - * @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages. - * @param[in] pState points to the state buffer. The array is of length numStages. - */ - void arm_fir_lattice_init_q31( - arm_fir_lattice_instance_q31 * S, - uint16_t numStages, - const q31_t * pCoeffs, - q31_t * pState); - - - /** - * @brief Processing function for the Q31 FIR lattice filter. - * @param[in] S points to an instance of the Q31 FIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ - void arm_fir_lattice_q31( - const arm_fir_lattice_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the floating-point FIR lattice filter. - * @param[in] S points to an instance of the floating-point FIR lattice structure. - * @param[in] numStages number of filter stages. - * @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages. - * @param[in] pState points to the state buffer. The array is of length numStages. - */ - void arm_fir_lattice_init_f32( - arm_fir_lattice_instance_f32 * S, - uint16_t numStages, - const float32_t * pCoeffs, - float32_t * pState); - - - /** - * @brief Processing function for the floating-point FIR lattice filter. - * @param[in] S points to an instance of the floating-point FIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ - void arm_fir_lattice_f32( - const arm_fir_lattice_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 IIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of stages in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - q15_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - q15_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ - } arm_iir_lattice_instance_q15; - - /** - * @brief Instance structure for the Q31 IIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of stages in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - q31_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - q31_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ - } arm_iir_lattice_instance_q31; - - /** - * @brief Instance structure for the floating-point IIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of stages in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - float32_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - float32_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ - } arm_iir_lattice_instance_f32; - - - /** - * @brief Processing function for the floating-point IIR lattice filter. - * @param[in] S points to an instance of the floating-point IIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_iir_lattice_f32( - const arm_iir_lattice_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Initialization function for the floating-point IIR lattice filter. - * @param[in] S points to an instance of the floating-point IIR lattice structure. - * @param[in] numStages number of stages in the filter. - * @param[in] pkCoeffs points to the reflection coefficient buffer. The array is of length numStages. - * @param[in] pvCoeffs points to the ladder coefficient buffer. The array is of length numStages+1. - * @param[in] pState points to the state buffer. The array is of length numStages+blockSize-1. - * @param[in] blockSize number of samples to process. - */ - void arm_iir_lattice_init_f32( - arm_iir_lattice_instance_f32 * S, - uint16_t numStages, - float32_t * pkCoeffs, - float32_t * pvCoeffs, - float32_t * pState, - uint32_t blockSize); - - - /** - * @brief Processing function for the Q31 IIR lattice filter. - * @param[in] S points to an instance of the Q31 IIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_iir_lattice_q31( - const arm_iir_lattice_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Initialization function for the Q31 IIR lattice filter. - * @param[in] S points to an instance of the Q31 IIR lattice structure. - * @param[in] numStages number of stages in the filter. - * @param[in] pkCoeffs points to the reflection coefficient buffer. The array is of length numStages. - * @param[in] pvCoeffs points to the ladder coefficient buffer. The array is of length numStages+1. - * @param[in] pState points to the state buffer. The array is of length numStages+blockSize. - * @param[in] blockSize number of samples to process. - */ - void arm_iir_lattice_init_q31( - arm_iir_lattice_instance_q31 * S, - uint16_t numStages, - q31_t * pkCoeffs, - q31_t * pvCoeffs, - q31_t * pState, - uint32_t blockSize); - - - /** - * @brief Processing function for the Q15 IIR lattice filter. - * @param[in] S points to an instance of the Q15 IIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_iir_lattice_q15( - const arm_iir_lattice_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q15 IIR lattice filter. - * @param[in] S points to an instance of the fixed-point Q15 IIR lattice structure. - * @param[in] numStages number of stages in the filter. - * @param[in] pkCoeffs points to reflection coefficient buffer. The array is of length numStages. - * @param[in] pvCoeffs points to ladder coefficient buffer. The array is of length numStages+1. - * @param[in] pState points to state buffer. The array is of length numStages+blockSize. - * @param[in] blockSize number of samples to process per call. - */ - void arm_iir_lattice_init_q15( - arm_iir_lattice_instance_q15 * S, - uint16_t numStages, - q15_t * pkCoeffs, - q15_t * pvCoeffs, - q15_t * pState, - uint32_t blockSize); - - - /** - * @brief Instance structure for the floating-point LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - float32_t mu; /**< step size that controls filter coefficient updates. */ - } arm_lms_instance_f32; - - - /** - * @brief Processing function for floating-point LMS filter. - * @param[in] S points to an instance of the floating-point LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ - void arm_lms_f32( - const arm_lms_instance_f32 * S, - const float32_t * pSrc, - float32_t * pRef, - float32_t * pOut, - float32_t * pErr, - uint32_t blockSize); - - - /** - * @brief Initialization function for floating-point LMS filter. - * @param[in] S points to an instance of the floating-point LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to the coefficient buffer. - * @param[in] pState points to state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - */ - void arm_lms_init_f32( - arm_lms_instance_f32 * S, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - float32_t mu, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q15_t mu; /**< step size that controls filter coefficient updates. */ - uint32_t postShift; /**< bit shift applied to coefficients. */ - } arm_lms_instance_q15; - - - /** - * @brief Initialization function for the Q15 LMS filter. - * @param[in] S points to an instance of the Q15 LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to the coefficient buffer. - * @param[in] pState points to the state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - * @param[in] postShift bit shift applied to coefficients. - */ - void arm_lms_init_q15( - arm_lms_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - q15_t mu, - uint32_t blockSize, - uint32_t postShift); - - - /** - * @brief Processing function for Q15 LMS filter. - * @param[in] S points to an instance of the Q15 LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ - void arm_lms_q15( - const arm_lms_instance_q15 * S, - const q15_t * pSrc, - q15_t * pRef, - q15_t * pOut, - q15_t * pErr, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q31 LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q31_t mu; /**< step size that controls filter coefficient updates. */ - uint32_t postShift; /**< bit shift applied to coefficients. */ - } arm_lms_instance_q31; - - - /** - * @brief Processing function for Q31 LMS filter. - * @param[in] S points to an instance of the Q15 LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ - void arm_lms_q31( - const arm_lms_instance_q31 * S, - const q31_t * pSrc, - q31_t * pRef, - q31_t * pOut, - q31_t * pErr, - uint32_t blockSize); - - - /** - * @brief Initialization function for Q31 LMS filter. - * @param[in] S points to an instance of the Q31 LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to coefficient buffer. - * @param[in] pState points to state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - * @param[in] postShift bit shift applied to coefficients. - */ - void arm_lms_init_q31( - arm_lms_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - q31_t mu, - uint32_t blockSize, - uint32_t postShift); - - - /** - * @brief Instance structure for the floating-point normalized LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - float32_t mu; /**< step size that control filter coefficient updates. */ - float32_t energy; /**< saves previous frame energy. */ - float32_t x0; /**< saves previous input sample. */ - } arm_lms_norm_instance_f32; - - - /** - * @brief Processing function for floating-point normalized LMS filter. - * @param[in] S points to an instance of the floating-point normalized LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ - void arm_lms_norm_f32( - arm_lms_norm_instance_f32 * S, - const float32_t * pSrc, - float32_t * pRef, - float32_t * pOut, - float32_t * pErr, - uint32_t blockSize); - - - /** - * @brief Initialization function for floating-point normalized LMS filter. - * @param[in] S points to an instance of the floating-point LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to coefficient buffer. - * @param[in] pState points to state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - */ - void arm_lms_norm_init_f32( - arm_lms_norm_instance_f32 * S, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - float32_t mu, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q31 normalized LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q31_t mu; /**< step size that controls filter coefficient updates. */ - uint8_t postShift; /**< bit shift applied to coefficients. */ - const q31_t *recipTable; /**< points to the reciprocal initial value table. */ - q31_t energy; /**< saves previous frame energy. */ - q31_t x0; /**< saves previous input sample. */ - } arm_lms_norm_instance_q31; - - - /** - * @brief Processing function for Q31 normalized LMS filter. - * @param[in] S points to an instance of the Q31 normalized LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ - void arm_lms_norm_q31( - arm_lms_norm_instance_q31 * S, - const q31_t * pSrc, - q31_t * pRef, - q31_t * pOut, - q31_t * pErr, - uint32_t blockSize); - - - /** - * @brief Initialization function for Q31 normalized LMS filter. - * @param[in] S points to an instance of the Q31 normalized LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to coefficient buffer. - * @param[in] pState points to state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - * @param[in] postShift bit shift applied to coefficients. - */ - void arm_lms_norm_init_q31( - arm_lms_norm_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - q31_t mu, - uint32_t blockSize, - uint8_t postShift); - - - /** - * @brief Instance structure for the Q15 normalized LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< Number of coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q15_t mu; /**< step size that controls filter coefficient updates. */ - uint8_t postShift; /**< bit shift applied to coefficients. */ - const q15_t *recipTable; /**< Points to the reciprocal initial value table. */ - q15_t energy; /**< saves previous frame energy. */ - q15_t x0; /**< saves previous input sample. */ - } arm_lms_norm_instance_q15; - - - /** - * @brief Processing function for Q15 normalized LMS filter. - * @param[in] S points to an instance of the Q15 normalized LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ - void arm_lms_norm_q15( - arm_lms_norm_instance_q15 * S, - const q15_t * pSrc, - q15_t * pRef, - q15_t * pOut, - q15_t * pErr, - uint32_t blockSize); - - - /** - * @brief Initialization function for Q15 normalized LMS filter. - * @param[in] S points to an instance of the Q15 normalized LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to coefficient buffer. - * @param[in] pState points to state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - * @param[in] postShift bit shift applied to coefficients. - */ - void arm_lms_norm_init_q15( - arm_lms_norm_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - q15_t mu, - uint32_t blockSize, - uint8_t postShift); - - - /** - * @brief Correlation of floating-point sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - */ - void arm_correlate_f32( - const float32_t * pSrcA, - uint32_t srcALen, - const float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst); - - -/** - @brief Correlation of Q15 sequences - @param[in] pSrcA points to the first input sequence - @param[in] srcALen length of the first input sequence - @param[in] pSrcB points to the second input sequence - @param[in] srcBLen length of the second input sequence - @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - @param[in] pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. -*/ -void arm_correlate_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch); - - -/** - @brief Correlation of Q15 sequences. - @param[in] pSrcA points to the first input sequence - @param[in] srcALen length of the first input sequence - @param[in] pSrcB points to the second input sequence - @param[in] srcBLen length of the second input sequence - @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - */ - void arm_correlate_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - -/** - @brief Correlation of Q15 sequences (fast version). - @param[in] pSrcA points to the first input sequence - @param[in] srcALen length of the first input sequence - @param[in] pSrcB points to the second input sequence - @param[in] srcBLen length of the second input sequence - @param[out] pDst points to the location where the output result is written. Length 2 * max(srcALen, srcBLen) - 1. - @return none - */ -void arm_correlate_fast_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - -/** - @brief Correlation of Q15 sequences (fast version). - @param[in] pSrcA points to the first input sequence. - @param[in] srcALen length of the first input sequence. - @param[in] pSrcB points to the second input sequence. - @param[in] srcBLen length of the second input sequence. - @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - @param[in] pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - */ -void arm_correlate_fast_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch); - - - /** - * @brief Correlation of Q31 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - */ - void arm_correlate_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - -/** - @brief Correlation of Q31 sequences (fast version). - @param[in] pSrcA points to the first input sequence - @param[in] srcALen length of the first input sequence - @param[in] pSrcB points to the second input sequence - @param[in] srcBLen length of the second input sequence - @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - */ -void arm_correlate_fast_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - - /** - * @brief Correlation of Q7 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - * @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen). - */ - void arm_correlate_opt_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - - /** - * @brief Correlation of Q7 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - */ - void arm_correlate_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst); - - - /** - * @brief Instance structure for the floating-point sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - float32_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } arm_fir_sparse_instance_f32; - - /** - * @brief Instance structure for the Q31 sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q31_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } arm_fir_sparse_instance_q31; - - /** - * @brief Instance structure for the Q15 sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q15_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } arm_fir_sparse_instance_q15; - - /** - * @brief Instance structure for the Q7 sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q7_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - const q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } arm_fir_sparse_instance_q7; - - - /** - * @brief Processing function for the floating-point sparse FIR filter. - * @param[in] S points to an instance of the floating-point sparse FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] pScratchIn points to a temporary buffer of size blockSize. - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_sparse_f32( - arm_fir_sparse_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - float32_t * pScratchIn, - uint32_t blockSize); - - - /** - * @brief Initialization function for the floating-point sparse FIR filter. - * @param[in,out] S points to an instance of the floating-point sparse FIR structure. - * @param[in] numTaps number of nonzero coefficients in the filter. - * @param[in] pCoeffs points to the array of filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] pTapDelay points to the array of offset times. - * @param[in] maxDelay maximum offset time supported. - * @param[in] blockSize number of samples that will be processed per block. - */ - void arm_fir_sparse_init_f32( - arm_fir_sparse_instance_f32 * S, - uint16_t numTaps, - const float32_t * pCoeffs, - float32_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - - /** - * @brief Processing function for the Q31 sparse FIR filter. - * @param[in] S points to an instance of the Q31 sparse FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] pScratchIn points to a temporary buffer of size blockSize. - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_sparse_q31( - arm_fir_sparse_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - q31_t * pScratchIn, - uint32_t blockSize); - - - /** - * @brief Initialization function for the Q31 sparse FIR filter. - * @param[in,out] S points to an instance of the Q31 sparse FIR structure. - * @param[in] numTaps number of nonzero coefficients in the filter. - * @param[in] pCoeffs points to the array of filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] pTapDelay points to the array of offset times. - * @param[in] maxDelay maximum offset time supported. - * @param[in] blockSize number of samples that will be processed per block. - */ - void arm_fir_sparse_init_q31( - arm_fir_sparse_instance_q31 * S, - uint16_t numTaps, - const q31_t * pCoeffs, - q31_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - - /** - * @brief Processing function for the Q15 sparse FIR filter. - * @param[in] S points to an instance of the Q15 sparse FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] pScratchIn points to a temporary buffer of size blockSize. - * @param[in] pScratchOut points to a temporary buffer of size blockSize. - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_sparse_q15( - arm_fir_sparse_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - q15_t * pScratchIn, - q31_t * pScratchOut, - uint32_t blockSize); - - - /** - * @brief Initialization function for the Q15 sparse FIR filter. - * @param[in,out] S points to an instance of the Q15 sparse FIR structure. - * @param[in] numTaps number of nonzero coefficients in the filter. - * @param[in] pCoeffs points to the array of filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] pTapDelay points to the array of offset times. - * @param[in] maxDelay maximum offset time supported. - * @param[in] blockSize number of samples that will be processed per block. - */ - void arm_fir_sparse_init_q15( - arm_fir_sparse_instance_q15 * S, - uint16_t numTaps, - const q15_t * pCoeffs, - q15_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - - /** - * @brief Processing function for the Q7 sparse FIR filter. - * @param[in] S points to an instance of the Q7 sparse FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] pScratchIn points to a temporary buffer of size blockSize. - * @param[in] pScratchOut points to a temporary buffer of size blockSize. - * @param[in] blockSize number of input samples to process per call. - */ - void arm_fir_sparse_q7( - arm_fir_sparse_instance_q7 * S, - const q7_t * pSrc, - q7_t * pDst, - q7_t * pScratchIn, - q31_t * pScratchOut, - uint32_t blockSize); - - - /** - * @brief Initialization function for the Q7 sparse FIR filter. - * @param[in,out] S points to an instance of the Q7 sparse FIR structure. - * @param[in] numTaps number of nonzero coefficients in the filter. - * @param[in] pCoeffs points to the array of filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] pTapDelay points to the array of offset times. - * @param[in] maxDelay maximum offset time supported. - * @param[in] blockSize number of samples that will be processed per block. - */ - void arm_fir_sparse_init_q7( - arm_fir_sparse_instance_q7 * S, - uint16_t numTaps, - const q7_t * pCoeffs, - q7_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - - - - - - /** - * @brief floating-point Circular write function. - */ - __STATIC_FORCEINLINE void arm_circularWrite_f32( - int32_t * circBuffer, - int32_t L, - uint16_t * writeOffset, - int32_t bufferInc, - const int32_t * src, - int32_t srcInc, - uint32_t blockSize) - { - uint32_t i = 0U; - int32_t wOffset; - - /* Copy the value of Index pointer that points - * to the current location where the input samples to be copied */ - wOffset = *writeOffset; - - /* Loop over the blockSize */ - i = blockSize; - - while (i > 0U) - { - /* copy the input sample to the circular buffer */ - circBuffer[wOffset] = *src; - - /* Update the input pointer */ - src += srcInc; - - /* Circularly update wOffset. Watch out for positive and negative value */ - wOffset += bufferInc; - if (wOffset >= L) - wOffset -= L; - - /* Decrement the loop counter */ - i--; - } - - /* Update the index pointer */ - *writeOffset = (uint16_t)wOffset; - } - - - - /** - * @brief floating-point Circular Read function. - */ - __STATIC_FORCEINLINE void arm_circularRead_f32( - int32_t * circBuffer, - int32_t L, - int32_t * readOffset, - int32_t bufferInc, - int32_t * dst, - int32_t * dst_base, - int32_t dst_length, - int32_t dstInc, - uint32_t blockSize) - { - uint32_t i = 0U; - int32_t rOffset; - int32_t* dst_end; - - /* Copy the value of Index pointer that points - * to the current location from where the input samples to be read */ - rOffset = *readOffset; - dst_end = dst_base + dst_length; - - /* Loop over the blockSize */ - i = blockSize; - - while (i > 0U) - { - /* copy the sample from the circular buffer to the destination buffer */ - *dst = circBuffer[rOffset]; - - /* Update the input pointer */ - dst += dstInc; - - if (dst == dst_end) - { - dst = dst_base; - } - - /* Circularly update rOffset. Watch out for positive and negative value */ - rOffset += bufferInc; - - if (rOffset >= L) - { - rOffset -= L; - } - - /* Decrement the loop counter */ - i--; - } - - /* Update the index pointer */ - *readOffset = rOffset; - } - - - /** - * @brief Q15 Circular write function. - */ - __STATIC_FORCEINLINE void arm_circularWrite_q15( - q15_t * circBuffer, - int32_t L, - uint16_t * writeOffset, - int32_t bufferInc, - const q15_t * src, - int32_t srcInc, - uint32_t blockSize) - { - uint32_t i = 0U; - int32_t wOffset; - - /* Copy the value of Index pointer that points - * to the current location where the input samples to be copied */ - wOffset = *writeOffset; - - /* Loop over the blockSize */ - i = blockSize; - - while (i > 0U) - { - /* copy the input sample to the circular buffer */ - circBuffer[wOffset] = *src; - - /* Update the input pointer */ - src += srcInc; - - /* Circularly update wOffset. Watch out for positive and negative value */ - wOffset += bufferInc; - if (wOffset >= L) - wOffset -= L; - - /* Decrement the loop counter */ - i--; - } - - /* Update the index pointer */ - *writeOffset = (uint16_t)wOffset; - } - - - /** - * @brief Q15 Circular Read function. - */ - __STATIC_FORCEINLINE void arm_circularRead_q15( - q15_t * circBuffer, - int32_t L, - int32_t * readOffset, - int32_t bufferInc, - q15_t * dst, - q15_t * dst_base, - int32_t dst_length, - int32_t dstInc, - uint32_t blockSize) - { - uint32_t i = 0; - int32_t rOffset; - q15_t* dst_end; - - /* Copy the value of Index pointer that points - * to the current location from where the input samples to be read */ - rOffset = *readOffset; - - dst_end = dst_base + dst_length; - - /* Loop over the blockSize */ - i = blockSize; - - while (i > 0U) - { - /* copy the sample from the circular buffer to the destination buffer */ - *dst = circBuffer[rOffset]; - - /* Update the input pointer */ - dst += dstInc; - - if (dst == dst_end) - { - dst = dst_base; - } - - /* Circularly update wOffset. Watch out for positive and negative value */ - rOffset += bufferInc; - - if (rOffset >= L) - { - rOffset -= L; - } - - /* Decrement the loop counter */ - i--; - } - - /* Update the index pointer */ - *readOffset = rOffset; - } - - - /** - * @brief Q7 Circular write function. - */ - __STATIC_FORCEINLINE void arm_circularWrite_q7( - q7_t * circBuffer, - int32_t L, - uint16_t * writeOffset, - int32_t bufferInc, - const q7_t * src, - int32_t srcInc, - uint32_t blockSize) - { - uint32_t i = 0U; - int32_t wOffset; - - /* Copy the value of Index pointer that points - * to the current location where the input samples to be copied */ - wOffset = *writeOffset; - - /* Loop over the blockSize */ - i = blockSize; - - while (i > 0U) - { - /* copy the input sample to the circular buffer */ - circBuffer[wOffset] = *src; - - /* Update the input pointer */ - src += srcInc; - - /* Circularly update wOffset. Watch out for positive and negative value */ - wOffset += bufferInc; - if (wOffset >= L) - wOffset -= L; - - /* Decrement the loop counter */ - i--; - } - - /* Update the index pointer */ - *writeOffset = (uint16_t)wOffset; - } - - - /** - * @brief Q7 Circular Read function. - */ - __STATIC_FORCEINLINE void arm_circularRead_q7( - q7_t * circBuffer, - int32_t L, - int32_t * readOffset, - int32_t bufferInc, - q7_t * dst, - q7_t * dst_base, - int32_t dst_length, - int32_t dstInc, - uint32_t blockSize) - { - uint32_t i = 0; - int32_t rOffset; - q7_t* dst_end; - - /* Copy the value of Index pointer that points - * to the current location from where the input samples to be read */ - rOffset = *readOffset; - - dst_end = dst_base + dst_length; - - /* Loop over the blockSize */ - i = blockSize; - - while (i > 0U) - { - /* copy the sample from the circular buffer to the destination buffer */ - *dst = circBuffer[rOffset]; - - /* Update the input pointer */ - dst += dstInc; - - if (dst == dst_end) - { - dst = dst_base; - } - - /* Circularly update rOffset. Watch out for positive and negative value */ - rOffset += bufferInc; - - if (rOffset >= L) - { - rOffset -= L; - } - - /* Decrement the loop counter */ - i--; - } - - /* Update the index pointer */ - *readOffset = rOffset; - } - - - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _FILTERING_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/interpolation_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/interpolation_functions.h deleted file mode 100644 index 81034cdf3..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/interpolation_functions.h +++ /dev/null @@ -1,318 +0,0 @@ -/****************************************************************************** - * @file interpolation_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _INTERPOLATION_FUNCTIONS_H_ -#define _INTERPOLATION_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - - -/** - * @defgroup groupInterpolation Interpolation Functions - * These functions perform 1- and 2-dimensional interpolation of data. - * Linear interpolation is used for 1-dimensional data and - * bilinear interpolation is used for 2-dimensional data. - */ - - - /** - * @brief Instance structure for the floating-point Linear Interpolate function. - */ - typedef struct - { - uint32_t nValues; /**< nValues */ - float32_t x1; /**< x1 */ - float32_t xSpacing; /**< xSpacing */ - float32_t *pYData; /**< pointer to the table of Y values */ - } arm_linear_interp_instance_f32; - - /** - * @brief Instance structure for the floating-point bilinear interpolation function. - */ - typedef struct - { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - float32_t *pData; /**< points to the data table. */ - } arm_bilinear_interp_instance_f32; - - /** - * @brief Instance structure for the Q31 bilinear interpolation function. - */ - typedef struct - { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - q31_t *pData; /**< points to the data table. */ - } arm_bilinear_interp_instance_q31; - - /** - * @brief Instance structure for the Q15 bilinear interpolation function. - */ - typedef struct - { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - q15_t *pData; /**< points to the data table. */ - } arm_bilinear_interp_instance_q15; - - /** - * @brief Instance structure for the Q15 bilinear interpolation function. - */ - typedef struct - { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - q7_t *pData; /**< points to the data table. */ - } arm_bilinear_interp_instance_q7; - - - /** - * @brief Struct for specifying cubic spline type - */ - typedef enum - { - ARM_SPLINE_NATURAL = 0, /**< Natural spline */ - ARM_SPLINE_PARABOLIC_RUNOUT = 1 /**< Parabolic runout spline */ - } arm_spline_type; - - /** - * @brief Instance structure for the floating-point cubic spline interpolation. - */ - typedef struct - { - arm_spline_type type; /**< Type (boundary conditions) */ - const float32_t * x; /**< x values */ - const float32_t * y; /**< y values */ - uint32_t n_x; /**< Number of known data points */ - float32_t * coeffs; /**< Coefficients buffer (b,c, and d) */ - } arm_spline_instance_f32; - - - - - /** - * @ingroup groupInterpolation - */ - - /** - * @addtogroup SplineInterpolate - * @{ - */ - - - /** - * @brief Processing function for the floating-point cubic spline interpolation. - * @param[in] S points to an instance of the floating-point spline structure. - * @param[in] xq points to the x values ot the interpolated data points. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples of output data. - */ - void arm_spline_f32( - arm_spline_instance_f32 * S, - const float32_t * xq, - float32_t * pDst, - uint32_t blockSize); - - /** - * @brief Initialization function for the floating-point cubic spline interpolation. - * @param[in,out] S points to an instance of the floating-point spline structure. - * @param[in] type type of cubic spline interpolation (boundary conditions) - * @param[in] x points to the x values of the known data points. - * @param[in] y points to the y values of the known data points. - * @param[in] n number of known data points. - * @param[in] coeffs coefficients array for b, c, and d - * @param[in] tempBuffer buffer array for internal computations - */ - void arm_spline_init_f32( - arm_spline_instance_f32 * S, - arm_spline_type type, - const float32_t * x, - const float32_t * y, - uint32_t n, - float32_t * coeffs, - float32_t * tempBuffer); - - - /** - * @} end of SplineInterpolate group - */ - - - - /** - * @addtogroup LinearInterpolate - * @{ - */ - - /** - * @brief Process function for the floating-point Linear Interpolation Function. - * @param[in,out] S is an instance of the floating-point Linear Interpolation structure - * @param[in] x input sample to process - * @return y processed output sample. - * - */ - float32_t arm_linear_interp_f32( - arm_linear_interp_instance_f32 * S, - float32_t x); - - /** - * - * @brief Process function for the Q31 Linear Interpolation Function. - * @param[in] pYData pointer to Q31 Linear Interpolation table - * @param[in] x input sample to process - * @param[in] nValues number of table values - * @return y processed output sample. - * - * \par - * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. - * This function can support maximum of table size 2^12. - * - */ - q31_t arm_linear_interp_q31( - q31_t * pYData, - q31_t x, - uint32_t nValues); - - /** - * - * @brief Process function for the Q15 Linear Interpolation Function. - * @param[in] pYData pointer to Q15 Linear Interpolation table - * @param[in] x input sample to process - * @param[in] nValues number of table values - * @return y processed output sample. - * - * \par - * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. - * This function can support maximum of table size 2^12. - * - */ - q15_t arm_linear_interp_q15( - q15_t * pYData, - q31_t x, - uint32_t nValues); - - /** - * - * @brief Process function for the Q7 Linear Interpolation Function. - * @param[in] pYData pointer to Q7 Linear Interpolation table - * @param[in] x input sample to process - * @param[in] nValues number of table values - * @return y processed output sample. - * - * \par - * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. - * This function can support maximum of table size 2^12. - */ -q7_t arm_linear_interp_q7( - q7_t * pYData, - q31_t x, - uint32_t nValues); - - /** - * @} end of LinearInterpolate group - */ - - - - - /** - * @ingroup groupInterpolation - */ - - - /** - * @addtogroup BilinearInterpolate - * @{ - */ - - /** - * @brief Floating-point bilinear interpolation. - * @param[in,out] S points to an instance of the interpolation structure. - * @param[in] X interpolation coordinate. - * @param[in] Y interpolation coordinate. - * @return out interpolated value. - */ - float32_t arm_bilinear_interp_f32( - const arm_bilinear_interp_instance_f32 * S, - float32_t X, - float32_t Y); - - /** - * @brief Q31 bilinear interpolation. - * @param[in,out] S points to an instance of the interpolation structure. - * @param[in] X interpolation coordinate in 12.20 format. - * @param[in] Y interpolation coordinate in 12.20 format. - * @return out interpolated value. - */ - q31_t arm_bilinear_interp_q31( - arm_bilinear_interp_instance_q31 * S, - q31_t X, - q31_t Y); - - - /** - * @brief Q15 bilinear interpolation. - * @param[in,out] S points to an instance of the interpolation structure. - * @param[in] X interpolation coordinate in 12.20 format. - * @param[in] Y interpolation coordinate in 12.20 format. - * @return out interpolated value. - */ - q15_t arm_bilinear_interp_q15( - arm_bilinear_interp_instance_q15 * S, - q31_t X, - q31_t Y); - - /** - * @brief Q7 bilinear interpolation. - * @param[in,out] S points to an instance of the interpolation structure. - * @param[in] X interpolation coordinate in 12.20 format. - * @param[in] Y interpolation coordinate in 12.20 format. - * @return out interpolated value. - */ - q7_t arm_bilinear_interp_q7( - arm_bilinear_interp_instance_q7 * S, - q31_t X, - q31_t Y); - /** - * @} end of BilinearInterpolate group - */ - - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _INTERPOLATION_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/matrix_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/matrix_functions.h deleted file mode 100644 index e2330c62f..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/matrix_functions.h +++ /dev/null @@ -1,597 +0,0 @@ -/****************************************************************************** - * @file matrix_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _MATRIX_FUNCTIONS_H_ -#define _MATRIX_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - * @defgroup groupMatrix Matrix Functions - * - * This set of functions provides basic matrix math operations. - * The functions operate on matrix data structures. For example, - * the type - * definition for the floating-point matrix structure is shown - * below: - *
- *     typedef struct
- *     {
- *       uint16_t numRows;     // number of rows of the matrix.
- *       uint16_t numCols;     // number of columns of the matrix.
- *       float32_t *pData;     // points to the data of the matrix.
- *     } arm_matrix_instance_f32;
- * 
- * There are similar definitions for Q15 and Q31 data types. - * - * The structure specifies the size of the matrix and then points to - * an array of data. The array is of size numRows X numCols - * and the values are arranged in row order. That is, the - * matrix element (i, j) is stored at: - *
- *     pData[i*numCols + j]
- * 
- * - * \par Init Functions - * There is an associated initialization function for each type of matrix - * data structure. - * The initialization function sets the values of the internal structure fields. - * Refer to \ref arm_mat_init_f32(), \ref arm_mat_init_q31() and \ref arm_mat_init_q15() - * for floating-point, Q31 and Q15 types, respectively. - * - * \par - * Use of the initialization function is optional. However, if initialization function is used - * then the instance structure cannot be placed into a const data section. - * To place the instance structure in a const data - * section, manually initialize the data structure. For example: - *
- * arm_matrix_instance_f32 S = {nRows, nColumns, pData};
- * arm_matrix_instance_q31 S = {nRows, nColumns, pData};
- * arm_matrix_instance_q15 S = {nRows, nColumns, pData};
- * 
- * where nRows specifies the number of rows, nColumns - * specifies the number of columns, and pData points to the - * data array. - * - * \par Size Checking - * By default all of the matrix functions perform size checking on the input and - * output matrices. For example, the matrix addition function verifies that the - * two input matrices and the output matrix all have the same number of rows and - * columns. If the size check fails the functions return: - *
- *     ARM_MATH_SIZE_MISMATCH
- * 
- * Otherwise the functions return - *
- *     ARM_MATH_SUCCESS
- * 
- * There is some overhead associated with this matrix size checking. - * The matrix size checking is enabled via the \#define - *
- *     ARM_MATH_MATRIX_CHECK
- * 
- * within the library project settings. By default this macro is defined - * and size checking is enabled. By changing the project settings and - * undefining this macro size checking is eliminated and the functions - * run a bit faster. With size checking disabled the functions always - * return ARM_MATH_SUCCESS. - */ - - /** - * @brief Instance structure for the floating-point matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - float32_t *pData; /**< points to the data of the matrix. */ - } arm_matrix_instance_f32; - - /** - * @brief Instance structure for the floating-point matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - float64_t *pData; /**< points to the data of the matrix. */ - } arm_matrix_instance_f64; - - /** - * @brief Instance structure for the Q7 matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - q7_t *pData; /**< points to the data of the matrix. */ - } arm_matrix_instance_q7; - - /** - * @brief Instance structure for the Q15 matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - q15_t *pData; /**< points to the data of the matrix. */ - } arm_matrix_instance_q15; - - /** - * @brief Instance structure for the Q31 matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - q31_t *pData; /**< points to the data of the matrix. */ - } arm_matrix_instance_q31; - - /** - * @brief Floating-point matrix addition. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_add_f32( - const arm_matrix_instance_f32 * pSrcA, - const arm_matrix_instance_f32 * pSrcB, - arm_matrix_instance_f32 * pDst); - - /** - * @brief Q15 matrix addition. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_add_q15( - const arm_matrix_instance_q15 * pSrcA, - const arm_matrix_instance_q15 * pSrcB, - arm_matrix_instance_q15 * pDst); - - /** - * @brief Q31 matrix addition. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_add_q31( - const arm_matrix_instance_q31 * pSrcA, - const arm_matrix_instance_q31 * pSrcB, - arm_matrix_instance_q31 * pDst); - - /** - * @brief Floating-point, complex, matrix multiplication. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_cmplx_mult_f32( - const arm_matrix_instance_f32 * pSrcA, - const arm_matrix_instance_f32 * pSrcB, - arm_matrix_instance_f32 * pDst); - - /** - * @brief Q15, complex, matrix multiplication. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_cmplx_mult_q15( - const arm_matrix_instance_q15 * pSrcA, - const arm_matrix_instance_q15 * pSrcB, - arm_matrix_instance_q15 * pDst, - q15_t * pScratch); - - /** - * @brief Q31, complex, matrix multiplication. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_cmplx_mult_q31( - const arm_matrix_instance_q31 * pSrcA, - const arm_matrix_instance_q31 * pSrcB, - arm_matrix_instance_q31 * pDst); - - /** - * @brief Floating-point matrix transpose. - * @param[in] pSrc points to the input matrix - * @param[out] pDst points to the output matrix - * @return The function returns either ARM_MATH_SIZE_MISMATCH - * or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_trans_f32( - const arm_matrix_instance_f32 * pSrc, - arm_matrix_instance_f32 * pDst); - - /** - * @brief Floating-point complex matrix transpose. - * @param[in] pSrc points to the input matrix - * @param[out] pDst points to the output matrix - * @return The function returns either ARM_MATH_SIZE_MISMATCH - * or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_cmplx_trans_f32( - const arm_matrix_instance_f32 * pSrc, - arm_matrix_instance_f32 * pDst); - - - /** - * @brief Q15 matrix transpose. - * @param[in] pSrc points to the input matrix - * @param[out] pDst points to the output matrix - * @return The function returns either ARM_MATH_SIZE_MISMATCH - * or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_trans_q15( - const arm_matrix_instance_q15 * pSrc, - arm_matrix_instance_q15 * pDst); - - /** - * @brief Q15 complex matrix transpose. - * @param[in] pSrc points to the input matrix - * @param[out] pDst points to the output matrix - * @return The function returns either ARM_MATH_SIZE_MISMATCH - * or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_cmplx_trans_q15( - const arm_matrix_instance_q15 * pSrc, - arm_matrix_instance_q15 * pDst); - - /** - * @brief Q7 matrix transpose. - * @param[in] pSrc points to the input matrix - * @param[out] pDst points to the output matrix - * @return The function returns either ARM_MATH_SIZE_MISMATCH - * or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_trans_q7( - const arm_matrix_instance_q7 * pSrc, - arm_matrix_instance_q7 * pDst); - - /** - * @brief Q31 matrix transpose. - * @param[in] pSrc points to the input matrix - * @param[out] pDst points to the output matrix - * @return The function returns either ARM_MATH_SIZE_MISMATCH - * or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_trans_q31( - const arm_matrix_instance_q31 * pSrc, - arm_matrix_instance_q31 * pDst); - - /** - * @brief Q31 complex matrix transpose. - * @param[in] pSrc points to the input matrix - * @param[out] pDst points to the output matrix - * @return The function returns either ARM_MATH_SIZE_MISMATCH - * or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_cmplx_trans_q31( - const arm_matrix_instance_q31 * pSrc, - arm_matrix_instance_q31 * pDst); - - /** - * @brief Floating-point matrix multiplication - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_mult_f32( - const arm_matrix_instance_f32 * pSrcA, - const arm_matrix_instance_f32 * pSrcB, - arm_matrix_instance_f32 * pDst); - - /** - * @brief Floating-point matrix and vector multiplication - * @param[in] pSrcMat points to the input matrix structure - * @param[in] pVec points to vector - * @param[out] pDst points to output vector - */ -void arm_mat_vec_mult_f32( - const arm_matrix_instance_f32 *pSrcMat, - const float32_t *pVec, - float32_t *pDst); - - /** - * @brief Q7 matrix multiplication - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @param[in] pState points to the array for storing intermediate results - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_mult_q7( - const arm_matrix_instance_q7 * pSrcA, - const arm_matrix_instance_q7 * pSrcB, - arm_matrix_instance_q7 * pDst, - q7_t * pState); - - /** - * @brief Q7 matrix and vector multiplication - * @param[in] pSrcMat points to the input matrix structure - * @param[in] pVec points to vector - * @param[out] pDst points to output vector - */ -void arm_mat_vec_mult_q7( - const arm_matrix_instance_q7 *pSrcMat, - const q7_t *pVec, - q7_t *pDst); - - /** - * @brief Q15 matrix multiplication - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @param[in] pState points to the array for storing intermediate results - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_mult_q15( - const arm_matrix_instance_q15 * pSrcA, - const arm_matrix_instance_q15 * pSrcB, - arm_matrix_instance_q15 * pDst, - q15_t * pState); - - /** - * @brief Q15 matrix and vector multiplication - * @param[in] pSrcMat points to the input matrix structure - * @param[in] pVec points to vector - * @param[out] pDst points to output vector - */ -void arm_mat_vec_mult_q15( - const arm_matrix_instance_q15 *pSrcMat, - const q15_t *pVec, - q15_t *pDst); - - /** - * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @param[in] pState points to the array for storing intermediate results - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_mult_fast_q15( - const arm_matrix_instance_q15 * pSrcA, - const arm_matrix_instance_q15 * pSrcB, - arm_matrix_instance_q15 * pDst, - q15_t * pState); - - /** - * @brief Q31 matrix multiplication - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_mult_q31( - const arm_matrix_instance_q31 * pSrcA, - const arm_matrix_instance_q31 * pSrcB, - arm_matrix_instance_q31 * pDst); - - /** - * @brief Q31 matrix and vector multiplication - * @param[in] pSrcMat points to the input matrix structure - * @param[in] pVec points to vector - * @param[out] pDst points to output vector - */ -void arm_mat_vec_mult_q31( - const arm_matrix_instance_q31 *pSrcMat, - const q31_t *pVec, - q31_t *pDst); - - /** - * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_mult_fast_q31( - const arm_matrix_instance_q31 * pSrcA, - const arm_matrix_instance_q31 * pSrcB, - arm_matrix_instance_q31 * pDst); - - /** - * @brief Floating-point matrix subtraction - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_sub_f32( - const arm_matrix_instance_f32 * pSrcA, - const arm_matrix_instance_f32 * pSrcB, - arm_matrix_instance_f32 * pDst); - - /** - * @brief Q15 matrix subtraction - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_sub_q15( - const arm_matrix_instance_q15 * pSrcA, - const arm_matrix_instance_q15 * pSrcB, - arm_matrix_instance_q15 * pDst); - - /** - * @brief Q31 matrix subtraction - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_sub_q31( - const arm_matrix_instance_q31 * pSrcA, - const arm_matrix_instance_q31 * pSrcB, - arm_matrix_instance_q31 * pDst); - - /** - * @brief Floating-point matrix scaling. - * @param[in] pSrc points to the input matrix - * @param[in] scale scale factor - * @param[out] pDst points to the output matrix - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_scale_f32( - const arm_matrix_instance_f32 * pSrc, - float32_t scale, - arm_matrix_instance_f32 * pDst); - - /** - * @brief Q15 matrix scaling. - * @param[in] pSrc points to input matrix - * @param[in] scaleFract fractional portion of the scale factor - * @param[in] shift number of bits to shift the result by - * @param[out] pDst points to output matrix - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_scale_q15( - const arm_matrix_instance_q15 * pSrc, - q15_t scaleFract, - int32_t shift, - arm_matrix_instance_q15 * pDst); - - /** - * @brief Q31 matrix scaling. - * @param[in] pSrc points to input matrix - * @param[in] scaleFract fractional portion of the scale factor - * @param[in] shift number of bits to shift the result by - * @param[out] pDst points to output matrix structure - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - */ -arm_status arm_mat_scale_q31( - const arm_matrix_instance_q31 * pSrc, - q31_t scaleFract, - int32_t shift, - arm_matrix_instance_q31 * pDst); - - /** - * @brief Q31 matrix initialization. - * @param[in,out] S points to an instance of the floating-point matrix structure. - * @param[in] nRows number of rows in the matrix. - * @param[in] nColumns number of columns in the matrix. - * @param[in] pData points to the matrix data array. - */ -void arm_mat_init_q31( - arm_matrix_instance_q31 * S, - uint16_t nRows, - uint16_t nColumns, - q31_t * pData); - - /** - * @brief Q15 matrix initialization. - * @param[in,out] S points to an instance of the floating-point matrix structure. - * @param[in] nRows number of rows in the matrix. - * @param[in] nColumns number of columns in the matrix. - * @param[in] pData points to the matrix data array. - */ -void arm_mat_init_q15( - arm_matrix_instance_q15 * S, - uint16_t nRows, - uint16_t nColumns, - q15_t * pData); - - /** - * @brief Floating-point matrix initialization. - * @param[in,out] S points to an instance of the floating-point matrix structure. - * @param[in] nRows number of rows in the matrix. - * @param[in] nColumns number of columns in the matrix. - * @param[in] pData points to the matrix data array. - */ -void arm_mat_init_f32( - arm_matrix_instance_f32 * S, - uint16_t nRows, - uint16_t nColumns, - float32_t * pData); - - - - /** - * @brief Floating-point matrix inverse. - * @param[in] src points to the instance of the input floating-point matrix structure. - * @param[out] dst points to the instance of the output floating-point matrix structure. - * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match. - * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR. - */ - arm_status arm_mat_inverse_f32( - const arm_matrix_instance_f32 * src, - arm_matrix_instance_f32 * dst); - - - /** - * @brief Floating-point matrix inverse. - * @param[in] src points to the instance of the input floating-point matrix structure. - * @param[out] dst points to the instance of the output floating-point matrix structure. - * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match. - * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR. - */ - arm_status arm_mat_inverse_f64( - const arm_matrix_instance_f64 * src, - arm_matrix_instance_f64 * dst); - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _MATRIX_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/none.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/none.h deleted file mode 100644 index 62f2d144a..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/none.h +++ /dev/null @@ -1,576 +0,0 @@ -/****************************************************************************** - * @file none.h - * @brief Intrinsincs when no DSP extension available - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - -Definitions in this file are allowing to reuse some versions of the -CMSIS-DSP to build on a core (M0 for instance) or a host where -DSP extension are not available. - -Ideally a pure C version should have been used instead. -But those are not always available or use a restricted set -of intrinsics. - -*/ - -#ifndef _NONE_H_ -#define _NONE_H_ - -#include "arm_math_types.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - - - -/* - -Normally those kind of definitions are in a compiler file -in Core or Core_A. - -But for MSVC compiler it is a bit special. The goal is very specific -to CMSIS-DSP and only to allow the use of this library from other -systems like Python or Matlab. - -MSVC is not going to be used to cross-compile to ARM. So, having a MSVC -compiler file in Core or Core_A would not make sense. - -*/ -#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) - __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data) - { - if (data == 0U) { return 32U; } - - uint32_t count = 0U; - uint32_t mask = 0x80000000U; - - while ((data & mask) == 0U) - { - count += 1U; - mask = mask >> 1U; - } - return count; - } - - __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat) - { - if ((sat >= 1U) && (sat <= 32U)) - { - const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U); - const int32_t min = -1 - max ; - if (val > max) - { - return max; - } - else if (val < min) - { - return min; - } - } - return val; - } - - __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat) - { - if (sat <= 31U) - { - const uint32_t max = ((1U << sat) - 1U); - if (val > (int32_t)max) - { - return max; - } - else if (val < 0) - { - return 0U; - } - } - return (uint32_t)val; - } - - /** - \brief Rotate Right in unsigned value (32 bit) - \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits. - \param [in] op1 Value to rotate - \param [in] op2 Number of Bits to rotate - \return Rotated value - */ -__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2) -{ - op2 %= 32U; - if (op2 == 0U) - { - return op1; - } - return (op1 >> op2) | (op1 << (32U - op2)); -} - - -#endif - -/** - * @brief Clips Q63 to Q31 values. - */ - __STATIC_FORCEINLINE q31_t clip_q63_to_q31( - q63_t x) - { - return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? - ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x; - } - - /** - * @brief Clips Q63 to Q15 values. - */ - __STATIC_FORCEINLINE q15_t clip_q63_to_q15( - q63_t x) - { - return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? - ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15); - } - - /** - * @brief Clips Q31 to Q7 values. - */ - __STATIC_FORCEINLINE q7_t clip_q31_to_q7( - q31_t x) - { - return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ? - ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x; - } - - /** - * @brief Clips Q31 to Q15 values. - */ - __STATIC_FORCEINLINE q15_t clip_q31_to_q15( - q31_t x) - { - return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ? - ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x; - } - - /** - * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format. - */ - __STATIC_FORCEINLINE q63_t mult32x64( - q63_t x, - q31_t y) - { - return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) + - (((q63_t) (x >> 32) * y) ) ); - } - -/* SMMLAR */ -#define multAcc_32x32_keep32_R(a, x, y) \ - a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32) - -/* SMMLSR */ -#define multSub_32x32_keep32_R(a, x, y) \ - a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32) - -/* SMMULR */ -#define mult_32x32_keep32_R(a, x, y) \ - a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32) - -/* SMMLA */ -#define multAcc_32x32_keep32(a, x, y) \ - a += (q31_t) (((q63_t) x * y) >> 32) - -/* SMMLS */ -#define multSub_32x32_keep32(a, x, y) \ - a -= (q31_t) (((q63_t) x * y) >> 32) - -/* SMMUL */ -#define mult_32x32_keep32(a, x, y) \ - a = (q31_t) (((q63_t) x * y ) >> 32) - -#ifndef ARM_MATH_DSP - /** - * @brief definition to pack two 16 bit values. - */ - #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \ - (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) ) - #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \ - (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) ) -#endif - - /** - * @brief definition to pack four 8 bit values. - */ -#ifndef ARM_MATH_BIG_ENDIAN - #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \ - (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ - (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \ - (((int32_t)(v3) << 24) & (int32_t)0xFF000000) ) -#else - #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) << 0) & (int32_t)0x000000FF) | \ - (((int32_t)(v2) << 8) & (int32_t)0x0000FF00) | \ - (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \ - (((int32_t)(v0) << 24) & (int32_t)0xFF000000) ) -#endif - - - - -/* - * @brief C custom defined intrinsic functions - */ -#if !defined (ARM_MATH_DSP) - - - /* - * @brief C custom defined QADD8 - */ - __STATIC_FORCEINLINE uint32_t __QADD8( - uint32_t x, - uint32_t y) - { - q31_t r, s, t, u; - - r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; - s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; - t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; - u = __SSAT(((((q31_t)x ) >> 24) + (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; - - return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); - } - - - /* - * @brief C custom defined QSUB8 - */ - __STATIC_FORCEINLINE uint32_t __QSUB8( - uint32_t x, - uint32_t y) - { - q31_t r, s, t, u; - - r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; - s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; - t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; - u = __SSAT(((((q31_t)x ) >> 24) - (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; - - return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); - } - - - /* - * @brief C custom defined QADD16 - */ - __STATIC_FORCEINLINE uint32_t __QADD16( - uint32_t x, - uint32_t y) - { -/* q31_t r, s; without initialisation 'arm_offset_q15 test' fails but 'intrinsic' tests pass! for armCC */ - q31_t r = 0, s = 0; - - r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; - s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; - - return ((uint32_t)((s << 16) | (r ))); - } - - - /* - * @brief C custom defined SHADD16 - */ - __STATIC_FORCEINLINE uint32_t __SHADD16( - uint32_t x, - uint32_t y) - { - q31_t r, s; - - r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; - s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; - - return ((uint32_t)((s << 16) | (r ))); - } - - - /* - * @brief C custom defined QSUB16 - */ - __STATIC_FORCEINLINE uint32_t __QSUB16( - uint32_t x, - uint32_t y) - { - q31_t r, s; - - r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; - s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; - - return ((uint32_t)((s << 16) | (r ))); - } - - - /* - * @brief C custom defined SHSUB16 - */ - __STATIC_FORCEINLINE uint32_t __SHSUB16( - uint32_t x, - uint32_t y) - { - q31_t r, s; - - r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; - s = (((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; - - return ((uint32_t)((s << 16) | (r ))); - } - - - /* - * @brief C custom defined QASX - */ - __STATIC_FORCEINLINE uint32_t __QASX( - uint32_t x, - uint32_t y) - { - q31_t r, s; - - r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; - s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; - - return ((uint32_t)((s << 16) | (r ))); - } - - - /* - * @brief C custom defined SHASX - */ - __STATIC_FORCEINLINE uint32_t __SHASX( - uint32_t x, - uint32_t y) - { - q31_t r, s; - - r = (((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; - s = (((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; - - return ((uint32_t)((s << 16) | (r ))); - } - - - /* - * @brief C custom defined QSAX - */ - __STATIC_FORCEINLINE uint32_t __QSAX( - uint32_t x, - uint32_t y) - { - q31_t r, s; - - r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; - s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; - - return ((uint32_t)((s << 16) | (r ))); - } - - - /* - * @brief C custom defined SHSAX - */ - __STATIC_FORCEINLINE uint32_t __SHSAX( - uint32_t x, - uint32_t y) - { - q31_t r, s; - - r = (((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; - s = (((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; - - return ((uint32_t)((s << 16) | (r ))); - } - - - /* - * @brief C custom defined SMUSDX - */ - __STATIC_FORCEINLINE uint32_t __SMUSDX( - uint32_t x, - uint32_t y) - { - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - - ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); - } - - /* - * @brief C custom defined SMUADX - */ - __STATIC_FORCEINLINE uint32_t __SMUADX( - uint32_t x, - uint32_t y) - { - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); - } - - - /* - * @brief C custom defined QADD - */ - __STATIC_FORCEINLINE int32_t __QADD( - int32_t x, - int32_t y) - { - return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y))); - } - - - /* - * @brief C custom defined QSUB - */ - __STATIC_FORCEINLINE int32_t __QSUB( - int32_t x, - int32_t y) - { - return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y))); - } - - - /* - * @brief C custom defined SMLAD - */ - __STATIC_FORCEINLINE uint32_t __SMLAD( - uint32_t x, - uint32_t y, - uint32_t sum) - { - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + - ( ((q31_t)sum ) ) )); - } - - - /* - * @brief C custom defined SMLADX - */ - __STATIC_FORCEINLINE uint32_t __SMLADX( - uint32_t x, - uint32_t y, - uint32_t sum) - { - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + - ( ((q31_t)sum ) ) )); - } - - - /* - * @brief C custom defined SMLSDX - */ - __STATIC_FORCEINLINE uint32_t __SMLSDX( - uint32_t x, - uint32_t y, - uint32_t sum) - { - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - - ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + - ( ((q31_t)sum ) ) )); - } - - - /* - * @brief C custom defined SMLALD - */ - __STATIC_FORCEINLINE uint64_t __SMLALD( - uint32_t x, - uint32_t y, - uint64_t sum) - { -/* return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */ - return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + - ( ((q63_t)sum ) ) )); - } - - - /* - * @brief C custom defined SMLALDX - */ - __STATIC_FORCEINLINE uint64_t __SMLALDX( - uint32_t x, - uint32_t y, - uint64_t sum) - { -/* return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */ - return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + - ( ((q63_t)sum ) ) )); - } - - - /* - * @brief C custom defined SMUAD - */ - __STATIC_FORCEINLINE uint32_t __SMUAD( - uint32_t x, - uint32_t y) - { - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); - } - - - /* - * @brief C custom defined SMUSD - */ - __STATIC_FORCEINLINE uint32_t __SMUSD( - uint32_t x, - uint32_t y) - { - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) - - ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); - } - - - /* - * @brief C custom defined SXTB16 - */ - __STATIC_FORCEINLINE uint32_t __SXTB16( - uint32_t x) - { - return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) | - ((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000) )); - } - - /* - * @brief C custom defined SMMLA - */ - __STATIC_FORCEINLINE int32_t __SMMLA( - int32_t x, - int32_t y, - int32_t sum) - { - return (sum + (int32_t) (((int64_t) x * y) >> 32)); - } - -#endif /* !defined (ARM_MATH_DSP) */ - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/statistics_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/statistics_functions.h deleted file mode 100644 index 68e145dda..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/statistics_functions.h +++ /dev/null @@ -1,483 +0,0 @@ -/****************************************************************************** - * @file statistics_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _STATISTICS_FUNCTIONS_H_ -#define _STATISTICS_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#include "dsp/basic_math_functions.h" -#include "dsp/fast_math_functions.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - - -/** - * @defgroup groupStats Statistics Functions - */ - -/** - * @brief Computation of the LogSumExp - * - * In probabilistic computations, the dynamic of the probability values can be very - * wide because they come from gaussian functions. - * To avoid underflow and overflow issues, the values are represented by their log. - * In this representation, multiplying the original exp values is easy : their logs are added. - * But adding the original exp values is requiring some special handling and it is the - * goal of the LogSumExp function. - * - * If the values are x1...xn, the function is computing: - * - * ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that - * rounding issues are minimised. - * - * The max xm of the values is extracted and the function is computing: - * xm + ln(exp(x1 - xm) + ... + exp(xn - xm)) - * - * @param[in] *in Pointer to an array of input values. - * @param[in] blockSize Number of samples in the input array. - * @return LogSumExp - * - */ - - -float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize); - -/** - * @brief Dot product with log arithmetic - * - * Vectors are containing the log of the samples - * - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] blockSize number of samples in each vector - * @param[in] pTmpBuffer temporary buffer of length blockSize - * @return The log of the dot product . - * - */ - - -float32_t arm_logsumexp_dot_prod_f32(const float32_t * pSrcA, - const float32_t * pSrcB, - uint32_t blockSize, - float32_t *pTmpBuffer); - -/** - * @brief Entropy - * - * @param[in] pSrcA Array of input values. - * @param[in] blockSize Number of samples in the input array. - * @return Entropy -Sum(p ln p) - * - */ - - -float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize); - - -/** - * @brief Entropy - * - * @param[in] pSrcA Array of input values. - * @param[in] blockSize Number of samples in the input array. - * @return Entropy -Sum(p ln p) - * - */ - - -float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize); - - -/** - * @brief Kullback-Leibler - * - * @param[in] pSrcA Pointer to an array of input values for probability distribution A. - * @param[in] pSrcB Pointer to an array of input values for probability distribution B. - * @param[in] blockSize Number of samples in the input array. - * @return Kullback-Leibler Divergence D(A || B) - * - */ -float32_t arm_kullback_leibler_f32(const float32_t * pSrcA - ,const float32_t * pSrcB - ,uint32_t blockSize); - - -/** - * @brief Kullback-Leibler - * - * @param[in] pSrcA Pointer to an array of input values for probability distribution A. - * @param[in] pSrcB Pointer to an array of input values for probability distribution B. - * @param[in] blockSize Number of samples in the input array. - * @return Kullback-Leibler Divergence D(A || B) - * - */ -float64_t arm_kullback_leibler_f64(const float64_t * pSrcA, - const float64_t * pSrcB, - uint32_t blockSize); - - - /** - * @brief Sum of the squares of the elements of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_power_q31( - const q31_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - - /** - * @brief Sum of the squares of the elements of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_power_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - - /** - * @brief Sum of the squares of the elements of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_power_q15( - const q15_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - - /** - * @brief Sum of the squares of the elements of a Q7 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_power_q7( - const q7_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - - /** - * @brief Mean value of a Q7 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_mean_q7( - const q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult); - - - /** - * @brief Mean value of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_mean_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - - /** - * @brief Mean value of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_mean_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - - /** - * @brief Mean value of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_mean_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - - /** - * @brief Variance of the elements of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_var_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - - /** - * @brief Variance of the elements of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_var_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - - /** - * @brief Variance of the elements of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_var_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - - /** - * @brief Root Mean Square of the elements of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_rms_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - - /** - * @brief Root Mean Square of the elements of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_rms_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - - /** - * @brief Root Mean Square of the elements of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_rms_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - - /** - * @brief Standard deviation of the elements of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_std_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - - /** - * @brief Standard deviation of the elements of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_std_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - - /** - * @brief Standard deviation of the elements of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ - void arm_std_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - - - /** - * @brief Minimum value of a Q7 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] result is output pointer - * @param[in] index is the array index of the minimum value in the input buffer. - */ - void arm_min_q7( - const q7_t * pSrc, - uint32_t blockSize, - q7_t * result, - uint32_t * index); - - - /** - * @brief Minimum value of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output pointer - * @param[in] pIndex is the array index of the minimum value in the input buffer. - */ - void arm_min_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult, - uint32_t * pIndex); - - - /** - * @brief Minimum value of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output pointer - * @param[out] pIndex is the array index of the minimum value in the input buffer. - */ - void arm_min_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex); - - - /** - * @brief Minimum value of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output pointer - * @param[out] pIndex is the array index of the minimum value in the input buffer. - */ - void arm_min_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex); - - -/** - * @brief Maximum value of a Q7 vector. - * @param[in] pSrc points to the input buffer - * @param[in] blockSize length of the input vector - * @param[out] pResult maximum value returned here - * @param[out] pIndex index of maximum value returned here - */ - void arm_max_q7( - const q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult, - uint32_t * pIndex); - - -/** - * @brief Maximum value of a Q15 vector. - * @param[in] pSrc points to the input buffer - * @param[in] blockSize length of the input vector - * @param[out] pResult maximum value returned here - * @param[out] pIndex index of maximum value returned here - */ - void arm_max_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult, - uint32_t * pIndex); - - -/** - * @brief Maximum value of a Q31 vector. - * @param[in] pSrc points to the input buffer - * @param[in] blockSize length of the input vector - * @param[out] pResult maximum value returned here - * @param[out] pIndex index of maximum value returned here - */ - void arm_max_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex); - - -/** - * @brief Maximum value of a floating-point vector. - * @param[in] pSrc points to the input buffer - * @param[in] blockSize length of the input vector - * @param[out] pResult maximum value returned here - * @param[out] pIndex index of maximum value returned here - */ - void arm_max_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex); - - /** - @brief Maximum value of a floating-point vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult maximum value returned here - @return none - */ - void arm_max_no_idx_f32( - const float32_t *pSrc, - uint32_t blockSize, - float32_t *pResult); - - - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _STATISTICS_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/support_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/support_functions.h deleted file mode 100644 index f4f3b880f..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/support_functions.h +++ /dev/null @@ -1,426 +0,0 @@ -/****************************************************************************** - * @file support_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _SUPPORT_FUNCTIONS_H_ -#define _SUPPORT_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - * @defgroup groupSupport Support Functions - */ - - -/** - * @brief Converts the elements of the floating-point vector to Q31 vector. - * @param[in] pSrc points to the floating-point input vector - * @param[out] pDst points to the Q31 output vector - * @param[in] blockSize length of the input vector - */ - void arm_float_to_q31( - const float32_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the floating-point vector to Q15 vector. - * @param[in] pSrc points to the floating-point input vector - * @param[out] pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - */ - void arm_float_to_q15( - const float32_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the floating-point vector to Q7 vector. - * @param[in] pSrc points to the floating-point input vector - * @param[out] pDst points to the Q7 output vector - * @param[in] blockSize length of the input vector - */ - void arm_float_to_q7( - const float32_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the Q31 vector to floating-point vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ - void arm_q31_to_float( - const q31_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the Q31 vector to Q15 vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ - void arm_q31_to_q15( - const q31_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the Q31 vector to Q7 vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ - void arm_q31_to_q7( - const q31_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the Q15 vector to floating-point vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ - void arm_q15_to_float( - const q15_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the Q15 vector to Q31 vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ - void arm_q15_to_q31( - const q15_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the Q15 vector to Q7 vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ - void arm_q15_to_q7( - const q15_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the Q7 vector to floating-point vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ - void arm_q7_to_float( - const q7_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the Q7 vector to Q31 vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ - void arm_q7_to_q31( - const q7_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Converts the elements of the Q7 vector to Q15 vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ - void arm_q7_to_q15( - const q7_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - - - - - /** - * @brief Struct for specifying sorting algorithm - */ - typedef enum - { - ARM_SORT_BITONIC = 0, - /**< Bitonic sort */ - ARM_SORT_BUBBLE = 1, - /**< Bubble sort */ - ARM_SORT_HEAP = 2, - /**< Heap sort */ - ARM_SORT_INSERTION = 3, - /**< Insertion sort */ - ARM_SORT_QUICK = 4, - /**< Quick sort */ - ARM_SORT_SELECTION = 5 - /**< Selection sort */ - } arm_sort_alg; - - /** - * @brief Struct for specifying sorting algorithm - */ - typedef enum - { - ARM_SORT_DESCENDING = 0, - /**< Descending order (9 to 0) */ - ARM_SORT_ASCENDING = 1 - /**< Ascending order (0 to 9) */ - } arm_sort_dir; - - /** - * @brief Instance structure for the sorting algorithms. - */ - typedef struct - { - arm_sort_alg alg; /**< Sorting algorithm selected */ - arm_sort_dir dir; /**< Sorting order (direction) */ - } arm_sort_instance_f32; - - /** - * @param[in] S points to an instance of the sorting structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ - void arm_sort_f32( - const arm_sort_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - /** - * @param[in,out] S points to an instance of the sorting structure. - * @param[in] alg Selected algorithm. - * @param[in] dir Sorting order. - */ - void arm_sort_init_f32( - arm_sort_instance_f32 * S, - arm_sort_alg alg, - arm_sort_dir dir); - - /** - * @brief Instance structure for the sorting algorithms. - */ - typedef struct - { - arm_sort_dir dir; /**< Sorting order (direction) */ - float32_t * buffer; /**< Working buffer */ - } arm_merge_sort_instance_f32; - - /** - * @param[in] S points to an instance of the sorting structure. - * @param[in,out] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ - void arm_merge_sort_f32( - const arm_merge_sort_instance_f32 * S, - float32_t *pSrc, - float32_t *pDst, - uint32_t blockSize); - - /** - * @param[in,out] S points to an instance of the sorting structure. - * @param[in] dir Sorting order. - * @param[in] buffer Working buffer. - */ - void arm_merge_sort_init_f32( - arm_merge_sort_instance_f32 * S, - arm_sort_dir dir, - float32_t * buffer); - - - - /** - * @brief Copies the elements of a floating-point vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ - void arm_copy_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Copies the elements of a Q7 vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ - void arm_copy_q7( - const q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Copies the elements of a Q15 vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ - void arm_copy_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Copies the elements of a Q31 vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ - void arm_copy_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - /** - * @brief Fills a constant value into a floating-point vector. - * @param[in] value input value to be filled - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ - void arm_fill_f32( - float32_t value, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Fills a constant value into a Q7 vector. - * @param[in] value input value to be filled - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ - void arm_fill_q7( - q7_t value, - q7_t * pDst, - uint32_t blockSize); - - - /** - * @brief Fills a constant value into a Q15 vector. - * @param[in] value input value to be filled - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ - void arm_fill_q15( - q15_t value, - q15_t * pDst, - uint32_t blockSize); - - - /** - * @brief Fills a constant value into a Q31 vector. - * @param[in] value input value to be filled - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ - void arm_fill_q31( - q31_t value, - q31_t * pDst, - uint32_t blockSize); - - - - - - - -/** - * @brief Weighted sum - * - * - * @param[in] *in Array of input values. - * @param[in] *weigths Weights - * @param[in] blockSize Number of samples in the input array. - * @return Weighted sum - * - */ -float32_t arm_weighted_sum_f32(const float32_t *in - , const float32_t *weigths - , uint32_t blockSize); - - -/** - * @brief Barycenter - * - * - * @param[in] in List of vectors - * @param[in] weights Weights of the vectors - * @param[out] out Barycenter - * @param[in] nbVectors Number of vectors - * @param[in] vecDim Dimension of space (vector dimension) - * @return None - * - */ -void arm_barycenter_f32(const float32_t *in - , const float32_t *weights - , float32_t *out - , uint32_t nbVectors - , uint32_t vecDim); - - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _SUPPORT_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/svm_defines.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/svm_defines.h deleted file mode 100644 index 71ad2f738..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/svm_defines.h +++ /dev/null @@ -1,42 +0,0 @@ -/****************************************************************************** - * @file svm_defines.h - * @brief Public header file for CMSIS DSP Library - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _SVM_DEFINES_H_ -#define _SVM_DEFINES_H_ - -/** - * @brief Struct for specifying SVM Kernel - */ -typedef enum -{ - ARM_ML_KERNEL_LINEAR = 0, - /**< Linear kernel */ - ARM_ML_KERNEL_POLYNOMIAL = 1, - /**< Polynomial kernel */ - ARM_ML_KERNEL_RBF = 2, - /**< Radial Basis Function kernel */ - ARM_ML_KERNEL_SIGMOID = 3 - /**< Sigmoid kernel */ -} arm_ml_kernel_type; - -#endif diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/svm_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/svm_functions.h deleted file mode 100644 index c50deb55f..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/svm_functions.h +++ /dev/null @@ -1,298 +0,0 @@ -/****************************************************************************** - * @file svm_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _SVM_FUNCTIONS_H_ -#define _SVM_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" -#include "dsp/svm_defines.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -#define STEP(x) (x) <= 0 ? 0 : 1 - -/** - * @defgroup groupSVM SVM Functions - * This set of functions is implementing SVM classification on 2 classes. - * The training must be done from scikit-learn. The parameters can be easily - * generated from the scikit-learn object. Some examples are given in - * DSP/Testing/PatternGeneration/SVM.py - * - * If more than 2 classes are needed, the functions in this folder - * will have to be used, as building blocks, to do multi-class classification. - * - * No multi-class classification is provided in this SVM folder. - * - */ - -/** - * @brief Integer exponentiation - * @param[in] x value - * @param[in] nb integer exponent >= 1 - * @return x^nb - * - */ -__STATIC_INLINE float32_t arm_exponent_f32(float32_t x, int32_t nb) -{ - float32_t r = x; - nb --; - while(nb > 0) - { - r = r * x; - nb--; - } - return(r); -} - - - - - -/** - * @brief Instance structure for linear SVM prediction function. - */ -typedef struct -{ - uint32_t nbOfSupportVectors; /**< Number of support vectors */ - uint32_t vectorDimension; /**< Dimension of vector space */ - float32_t intercept; /**< Intercept */ - const float32_t *dualCoefficients; /**< Dual coefficients */ - const float32_t *supportVectors; /**< Support vectors */ - const int32_t *classes; /**< The two SVM classes */ -} arm_svm_linear_instance_f32; - - -/** - * @brief Instance structure for polynomial SVM prediction function. - */ -typedef struct -{ - uint32_t nbOfSupportVectors; /**< Number of support vectors */ - uint32_t vectorDimension; /**< Dimension of vector space */ - float32_t intercept; /**< Intercept */ - const float32_t *dualCoefficients; /**< Dual coefficients */ - const float32_t *supportVectors; /**< Support vectors */ - const int32_t *classes; /**< The two SVM classes */ - int32_t degree; /**< Polynomial degree */ - float32_t coef0; /**< Polynomial constant */ - float32_t gamma; /**< Gamma factor */ -} arm_svm_polynomial_instance_f32; - -/** - * @brief Instance structure for rbf SVM prediction function. - */ -typedef struct -{ - uint32_t nbOfSupportVectors; /**< Number of support vectors */ - uint32_t vectorDimension; /**< Dimension of vector space */ - float32_t intercept; /**< Intercept */ - const float32_t *dualCoefficients; /**< Dual coefficients */ - const float32_t *supportVectors; /**< Support vectors */ - const int32_t *classes; /**< The two SVM classes */ - float32_t gamma; /**< Gamma factor */ -} arm_svm_rbf_instance_f32; - -/** - * @brief Instance structure for sigmoid SVM prediction function. - */ -typedef struct -{ - uint32_t nbOfSupportVectors; /**< Number of support vectors */ - uint32_t vectorDimension; /**< Dimension of vector space */ - float32_t intercept; /**< Intercept */ - const float32_t *dualCoefficients; /**< Dual coefficients */ - const float32_t *supportVectors; /**< Support vectors */ - const int32_t *classes; /**< The two SVM classes */ - float32_t coef0; /**< Independant constant */ - float32_t gamma; /**< Gamma factor */ -} arm_svm_sigmoid_instance_f32; - -/** - * @brief SVM linear instance init function - * @param[in] S Parameters for SVM functions - * @param[in] nbOfSupportVectors Number of support vectors - * @param[in] vectorDimension Dimension of vector space - * @param[in] intercept Intercept - * @param[in] dualCoefficients Array of dual coefficients - * @param[in] supportVectors Array of support vectors - * @param[in] classes Array of 2 classes ID - * @return none. - * - */ - - -void arm_svm_linear_init_f32(arm_svm_linear_instance_f32 *S, - uint32_t nbOfSupportVectors, - uint32_t vectorDimension, - float32_t intercept, - const float32_t *dualCoefficients, - const float32_t *supportVectors, - const int32_t *classes); - -/** - * @brief SVM linear prediction - * @param[in] S Pointer to an instance of the linear SVM structure. - * @param[in] in Pointer to input vector - * @param[out] pResult Decision value - * @return none. - * - */ - -void arm_svm_linear_predict_f32(const arm_svm_linear_instance_f32 *S, - const float32_t * in, - int32_t * pResult); - - -/** - * @brief SVM polynomial instance init function - * @param[in] S points to an instance of the polynomial SVM structure. - * @param[in] nbOfSupportVectors Number of support vectors - * @param[in] vectorDimension Dimension of vector space - * @param[in] intercept Intercept - * @param[in] dualCoefficients Array of dual coefficients - * @param[in] supportVectors Array of support vectors - * @param[in] classes Array of 2 classes ID - * @param[in] degree Polynomial degree - * @param[in] coef0 coeff0 (scikit-learn terminology) - * @param[in] gamma gamma (scikit-learn terminology) - * @return none. - * - */ - - -void arm_svm_polynomial_init_f32(arm_svm_polynomial_instance_f32 *S, - uint32_t nbOfSupportVectors, - uint32_t vectorDimension, - float32_t intercept, - const float32_t *dualCoefficients, - const float32_t *supportVectors, - const int32_t *classes, - int32_t degree, - float32_t coef0, - float32_t gamma - ); - -/** - * @brief SVM polynomial prediction - * @param[in] S Pointer to an instance of the polynomial SVM structure. - * @param[in] in Pointer to input vector - * @param[out] pResult Decision value - * @return none. - * - */ -void arm_svm_polynomial_predict_f32(const arm_svm_polynomial_instance_f32 *S, - const float32_t * in, - int32_t * pResult); - - -/** - * @brief SVM radial basis function instance init function - * @param[in] S points to an instance of the polynomial SVM structure. - * @param[in] nbOfSupportVectors Number of support vectors - * @param[in] vectorDimension Dimension of vector space - * @param[in] intercept Intercept - * @param[in] dualCoefficients Array of dual coefficients - * @param[in] supportVectors Array of support vectors - * @param[in] classes Array of 2 classes ID - * @param[in] gamma gamma (scikit-learn terminology) - * @return none. - * - */ - -void arm_svm_rbf_init_f32(arm_svm_rbf_instance_f32 *S, - uint32_t nbOfSupportVectors, - uint32_t vectorDimension, - float32_t intercept, - const float32_t *dualCoefficients, - const float32_t *supportVectors, - const int32_t *classes, - float32_t gamma - ); - -/** - * @brief SVM rbf prediction - * @param[in] S Pointer to an instance of the rbf SVM structure. - * @param[in] in Pointer to input vector - * @param[out] pResult decision value - * @return none. - * - */ -void arm_svm_rbf_predict_f32(const arm_svm_rbf_instance_f32 *S, - const float32_t * in, - int32_t * pResult); - -/** - * @brief SVM sigmoid instance init function - * @param[in] S points to an instance of the rbf SVM structure. - * @param[in] nbOfSupportVectors Number of support vectors - * @param[in] vectorDimension Dimension of vector space - * @param[in] intercept Intercept - * @param[in] dualCoefficients Array of dual coefficients - * @param[in] supportVectors Array of support vectors - * @param[in] classes Array of 2 classes ID - * @param[in] coef0 coeff0 (scikit-learn terminology) - * @param[in] gamma gamma (scikit-learn terminology) - * @return none. - * - */ - -void arm_svm_sigmoid_init_f32(arm_svm_sigmoid_instance_f32 *S, - uint32_t nbOfSupportVectors, - uint32_t vectorDimension, - float32_t intercept, - const float32_t *dualCoefficients, - const float32_t *supportVectors, - const int32_t *classes, - float32_t coef0, - float32_t gamma - ); - -/** - * @brief SVM sigmoid prediction - * @param[in] S Pointer to an instance of the rbf SVM structure. - * @param[in] in Pointer to input vector - * @param[out] pResult Decision value - * @return none. - * - */ -void arm_svm_sigmoid_predict_f32(const arm_svm_sigmoid_instance_f32 *S, - const float32_t * in, - int32_t * pResult); - - - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _SVM_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/transform_functions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/transform_functions.h deleted file mode 100644 index b6e7284de..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/transform_functions.h +++ /dev/null @@ -1,591 +0,0 @@ -/****************************************************************************** - * @file transform_functions.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef _TRANSFORM_FUNCTIONS_H_ -#define _TRANSFORM_FUNCTIONS_H_ - -#include "arm_math_types.h" -#include "arm_math_memory.h" - -#include "dsp/none.h" -#include "dsp/utils.h" - -#include "dsp/basic_math_functions.h" -#include "dsp/complex_math_functions.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - - -/** - * @defgroup groupTransforms Transform Functions - */ - - - /** - * @brief Instance structure for the Q15 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const q15_t *pTwiddle; /**< points to the Sin twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } arm_cfft_radix2_instance_q15; - -/* Deprecated */ - arm_status arm_cfft_radix2_init_q15( - arm_cfft_radix2_instance_q15 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -/* Deprecated */ - void arm_cfft_radix2_q15( - const arm_cfft_radix2_instance_q15 * S, - q15_t * pSrc); - - - /** - * @brief Instance structure for the Q15 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const q15_t *pTwiddle; /**< points to the twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } arm_cfft_radix4_instance_q15; - -/* Deprecated */ - arm_status arm_cfft_radix4_init_q15( - arm_cfft_radix4_instance_q15 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -/* Deprecated */ - void arm_cfft_radix4_q15( - const arm_cfft_radix4_instance_q15 * S, - q15_t * pSrc); - - /** - * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const q31_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } arm_cfft_radix2_instance_q31; - -/* Deprecated */ - arm_status arm_cfft_radix2_init_q31( - arm_cfft_radix2_instance_q31 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -/* Deprecated */ - void arm_cfft_radix2_q31( - const arm_cfft_radix2_instance_q31 * S, - q31_t * pSrc); - - /** - * @brief Instance structure for the Q31 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const q31_t *pTwiddle; /**< points to the twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } arm_cfft_radix4_instance_q31; - -/* Deprecated */ - void arm_cfft_radix4_q31( - const arm_cfft_radix4_instance_q31 * S, - q31_t * pSrc); - -/* Deprecated */ - arm_status arm_cfft_radix4_init_q31( - arm_cfft_radix4_instance_q31 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - /** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - float32_t onebyfftLen; /**< value of 1/fftLen. */ - } arm_cfft_radix2_instance_f32; - - -/* Deprecated */ - arm_status arm_cfft_radix2_init_f32( - arm_cfft_radix2_instance_f32 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -/* Deprecated */ - void arm_cfft_radix2_f32( - const arm_cfft_radix2_instance_f32 * S, - float32_t * pSrc); - - /** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - float32_t onebyfftLen; /**< value of 1/fftLen. */ - } arm_cfft_radix4_instance_f32; - - - -/* Deprecated */ - arm_status arm_cfft_radix4_init_f32( - arm_cfft_radix4_instance_f32 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -/* Deprecated */ - void arm_cfft_radix4_f32( - const arm_cfft_radix4_instance_f32 * S, - float32_t * pSrc); - - /** - * @brief Instance structure for the fixed-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - const q15_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ -#if defined(ARM_MATH_MVEI) - const uint32_t *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */ \ - const uint32_t *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */ \ - const uint32_t *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */ \ - const q15_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */ \ - const q15_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */ \ - const q15_t *rearranged_twiddle_stride3; -#endif - } arm_cfft_instance_q15; - -arm_status arm_cfft_init_q15( - arm_cfft_instance_q15 * S, - uint16_t fftLen); - -void arm_cfft_q15( - const arm_cfft_instance_q15 * S, - q15_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - /** - * @brief Instance structure for the fixed-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - const q31_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ -#if defined(ARM_MATH_MVEI) - const uint32_t *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */ \ - const uint32_t *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */ \ - const uint32_t *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */ \ - const q31_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */ \ - const q31_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */ \ - const q31_t *rearranged_twiddle_stride3; -#endif - } arm_cfft_instance_q31; - -arm_status arm_cfft_init_q31( - arm_cfft_instance_q31 * S, - uint16_t fftLen); - -void arm_cfft_q31( - const arm_cfft_instance_q31 * S, - q31_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - /** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) - const uint32_t *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */ \ - const uint32_t *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */ \ - const uint32_t *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */ \ - const float32_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */ \ - const float32_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */ \ - const float32_t *rearranged_twiddle_stride3; -#endif - } arm_cfft_instance_f32; - - - - arm_status arm_cfft_init_f32( - arm_cfft_instance_f32 * S, - uint16_t fftLen); - - void arm_cfft_f32( - const arm_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - - /** - * @brief Instance structure for the Double Precision Floating-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - const float64_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ - } arm_cfft_instance_f64; - - arm_status arm_cfft_init_f64( - arm_cfft_instance_f64 * S, - uint16_t fftLen); - - void arm_cfft_f64( - const arm_cfft_instance_f64 * S, - float64_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - /** - * @brief Instance structure for the Q15 RFFT/RIFFT function. - */ - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - const q15_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ - const q15_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ -#if defined(ARM_MATH_MVEI) - arm_cfft_instance_q15 cfftInst; -#else - const arm_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */ -#endif - } arm_rfft_instance_q15; - - arm_status arm_rfft_init_q15( - arm_rfft_instance_q15 * S, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - - void arm_rfft_q15( - const arm_rfft_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst); - - /** - * @brief Instance structure for the Q31 RFFT/RIFFT function. - */ - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - const q31_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ - const q31_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ -#if defined(ARM_MATH_MVEI) - arm_cfft_instance_q31 cfftInst; -#else - const arm_cfft_instance_q31 *pCfft; /**< points to the complex FFT instance. */ -#endif - } arm_rfft_instance_q31; - - arm_status arm_rfft_init_q31( - arm_rfft_instance_q31 * S, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - - void arm_rfft_q31( - const arm_rfft_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst); - - /** - * @brief Instance structure for the floating-point RFFT/RIFFT function. - */ - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint16_t fftLenBy2; /**< length of the complex FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - const float32_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ - const float32_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ - arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */ - } arm_rfft_instance_f32; - - arm_status arm_rfft_init_f32( - arm_rfft_instance_f32 * S, - arm_cfft_radix4_instance_f32 * S_CFFT, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - - void arm_rfft_f32( - const arm_rfft_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst); - - /** - * @brief Instance structure for the Double Precision Floating-point RFFT/RIFFT function. - */ -typedef struct - { - arm_cfft_instance_f64 Sint; /**< Internal CFFT structure. */ - uint16_t fftLenRFFT; /**< length of the real sequence */ - const float64_t * pTwiddleRFFT; /**< Twiddle factors real stage */ - } arm_rfft_fast_instance_f64 ; - -arm_status arm_rfft_fast_init_f64 ( - arm_rfft_fast_instance_f64 * S, - uint16_t fftLen); - - -void arm_rfft_fast_f64( - arm_rfft_fast_instance_f64 * S, - float64_t * p, float64_t * pOut, - uint8_t ifftFlag); - - - /** - * @brief Instance structure for the floating-point RFFT/RIFFT function. - */ -typedef struct - { - arm_cfft_instance_f32 Sint; /**< Internal CFFT structure. */ - uint16_t fftLenRFFT; /**< length of the real sequence */ - const float32_t * pTwiddleRFFT; /**< Twiddle factors real stage */ - } arm_rfft_fast_instance_f32 ; - -arm_status arm_rfft_fast_init_f32 ( - arm_rfft_fast_instance_f32 * S, - uint16_t fftLen); - - - void arm_rfft_fast_f32( - const arm_rfft_fast_instance_f32 * S, - float32_t * p, float32_t * pOut, - uint8_t ifftFlag); - - /** - * @brief Instance structure for the floating-point DCT4/IDCT4 function. - */ - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - float32_t normalize; /**< normalizing factor. */ - const float32_t *pTwiddle; /**< points to the twiddle factor table. */ - const float32_t *pCosFactor; /**< points to the cosFactor table. */ - arm_rfft_instance_f32 *pRfft; /**< points to the real FFT instance. */ - arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */ - } arm_dct4_instance_f32; - - - /** - * @brief Initialization function for the floating-point DCT4/IDCT4. - * @param[in,out] S points to an instance of floating-point DCT4/IDCT4 structure. - * @param[in] S_RFFT points to an instance of floating-point RFFT/RIFFT structure. - * @param[in] S_CFFT points to an instance of floating-point CFFT/CIFFT structure. - * @param[in] N length of the DCT4. - * @param[in] Nby2 half of the length of the DCT4. - * @param[in] normalize normalizing factor. - * @return arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if fftLenReal is not a supported transform length. - */ - arm_status arm_dct4_init_f32( - arm_dct4_instance_f32 * S, - arm_rfft_instance_f32 * S_RFFT, - arm_cfft_radix4_instance_f32 * S_CFFT, - uint16_t N, - uint16_t Nby2, - float32_t normalize); - - - /** - * @brief Processing function for the floating-point DCT4/IDCT4. - * @param[in] S points to an instance of the floating-point DCT4/IDCT4 structure. - * @param[in] pState points to state buffer. - * @param[in,out] pInlineBuffer points to the in-place input and output buffer. - */ - void arm_dct4_f32( - const arm_dct4_instance_f32 * S, - float32_t * pState, - float32_t * pInlineBuffer); - - - /** - * @brief Instance structure for the Q31 DCT4/IDCT4 function. - */ - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q31_t normalize; /**< normalizing factor. */ - const q31_t *pTwiddle; /**< points to the twiddle factor table. */ - const q31_t *pCosFactor; /**< points to the cosFactor table. */ - arm_rfft_instance_q31 *pRfft; /**< points to the real FFT instance. */ - arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */ - } arm_dct4_instance_q31; - - - /** - * @brief Initialization function for the Q31 DCT4/IDCT4. - * @param[in,out] S points to an instance of Q31 DCT4/IDCT4 structure. - * @param[in] S_RFFT points to an instance of Q31 RFFT/RIFFT structure - * @param[in] S_CFFT points to an instance of Q31 CFFT/CIFFT structure - * @param[in] N length of the DCT4. - * @param[in] Nby2 half of the length of the DCT4. - * @param[in] normalize normalizing factor. - * @return arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if N is not a supported transform length. - */ - arm_status arm_dct4_init_q31( - arm_dct4_instance_q31 * S, - arm_rfft_instance_q31 * S_RFFT, - arm_cfft_radix4_instance_q31 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q31_t normalize); - - - /** - * @brief Processing function for the Q31 DCT4/IDCT4. - * @param[in] S points to an instance of the Q31 DCT4 structure. - * @param[in] pState points to state buffer. - * @param[in,out] pInlineBuffer points to the in-place input and output buffer. - */ - void arm_dct4_q31( - const arm_dct4_instance_q31 * S, - q31_t * pState, - q31_t * pInlineBuffer); - - - /** - * @brief Instance structure for the Q15 DCT4/IDCT4 function. - */ - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q15_t normalize; /**< normalizing factor. */ - const q15_t *pTwiddle; /**< points to the twiddle factor table. */ - const q15_t *pCosFactor; /**< points to the cosFactor table. */ - arm_rfft_instance_q15 *pRfft; /**< points to the real FFT instance. */ - arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */ - } arm_dct4_instance_q15; - - - /** - * @brief Initialization function for the Q15 DCT4/IDCT4. - * @param[in,out] S points to an instance of Q15 DCT4/IDCT4 structure. - * @param[in] S_RFFT points to an instance of Q15 RFFT/RIFFT structure. - * @param[in] S_CFFT points to an instance of Q15 CFFT/CIFFT structure. - * @param[in] N length of the DCT4. - * @param[in] Nby2 half of the length of the DCT4. - * @param[in] normalize normalizing factor. - * @return arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if N is not a supported transform length. - */ - arm_status arm_dct4_init_q15( - arm_dct4_instance_q15 * S, - arm_rfft_instance_q15 * S_RFFT, - arm_cfft_radix4_instance_q15 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q15_t normalize); - - - /** - * @brief Processing function for the Q15 DCT4/IDCT4. - * @param[in] S points to an instance of the Q15 DCT4 structure. - * @param[in] pState points to state buffer. - * @param[in,out] pInlineBuffer points to the in-place input and output buffer. - */ - void arm_dct4_q15( - const arm_dct4_instance_q15 * S, - q15_t * pState, - q15_t * pInlineBuffer); - - - -#ifdef __cplusplus -} -#endif - -#endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/utils.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/utils.h deleted file mode 100644 index 794023c57..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/dsp/utils.h +++ /dev/null @@ -1,239 +0,0 @@ -/****************************************************************************** - * @file arm_math_utils.h - * @brief Public header file for CMSIS DSP Library - * @version V1.9.0 - * @date 20. July 2020 - ******************************************************************************/ -/* - * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _ARM_MATH_UTILS_H_ - -#define _ARM_MATH_UTILS_H_ - -#include "arm_math_types.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - - /** - * @brief Macros required for reciprocal calculation in Normalized LMS - */ - -#define INDEX_MASK 0x0000003F - - -#define SQ(x) ((x) * (x)) - - - - /** - * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type. - */ - __STATIC_FORCEINLINE uint32_t arm_recip_q31( - q31_t in, - q31_t * dst, - const q31_t * pRecipTable) - { - q31_t out; - uint32_t tempVal; - uint32_t index, i; - uint32_t signBits; - - if (in > 0) - { - signBits = ((uint32_t) (__CLZ( in) - 1)); - } - else - { - signBits = ((uint32_t) (__CLZ(-in) - 1)); - } - - /* Convert input sample to 1.31 format */ - in = (in << signBits); - - /* calculation of index for initial approximated Val */ - index = (uint32_t)(in >> 24); - index = (index & INDEX_MASK); - - /* 1.31 with exp 1 */ - out = pRecipTable[index]; - - /* calculation of reciprocal value */ - /* running approximation for two iterations */ - for (i = 0U; i < 2U; i++) - { - tempVal = (uint32_t) (((q63_t) in * out) >> 31); - tempVal = 0x7FFFFFFFu - tempVal; - /* 1.31 with exp 1 */ - /* out = (q31_t) (((q63_t) out * tempVal) >> 30); */ - out = clip_q63_to_q31(((q63_t) out * tempVal) >> 30); - } - - /* write output */ - *dst = out; - - /* return num of signbits of out = 1/in value */ - return (signBits + 1U); - } - - - /** - * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type. - */ - __STATIC_FORCEINLINE uint32_t arm_recip_q15( - q15_t in, - q15_t * dst, - const q15_t * pRecipTable) - { - q15_t out = 0; - uint32_t tempVal = 0; - uint32_t index = 0, i = 0; - uint32_t signBits = 0; - - if (in > 0) - { - signBits = ((uint32_t)(__CLZ( in) - 17)); - } - else - { - signBits = ((uint32_t)(__CLZ(-in) - 17)); - } - - /* Convert input sample to 1.15 format */ - in = (in << signBits); - - /* calculation of index for initial approximated Val */ - index = (uint32_t)(in >> 8); - index = (index & INDEX_MASK); - - /* 1.15 with exp 1 */ - out = pRecipTable[index]; - - /* calculation of reciprocal value */ - /* running approximation for two iterations */ - for (i = 0U; i < 2U; i++) - { - tempVal = (uint32_t) (((q31_t) in * out) >> 15); - tempVal = 0x7FFFu - tempVal; - /* 1.15 with exp 1 */ - out = (q15_t) (((q31_t) out * tempVal) >> 14); - /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */ - } - - /* write output */ - *dst = out; - - /* return num of signbits of out = 1/in value */ - return (signBits + 1); - } - - -/** - * @brief 64-bit to 32-bit unsigned normalization - * @param[in] in is input unsigned long long value - * @param[out] normalized is the 32-bit normalized value - * @param[out] norm is norm scale - */ -__STATIC_INLINE void arm_norm_64_to_32u(uint64_t in, int32_t * normalized, int32_t *norm) -{ - int32_t n1; - int32_t hi = (int32_t) (in >> 32); - int32_t lo = (int32_t) ((in << 32) >> 32); - - n1 = __CLZ(hi) - 32; - if (!n1) - { - /* - * input fits in 32-bit - */ - n1 = __CLZ(lo); - if (!n1) - { - /* - * MSB set, need to scale down by 1 - */ - *norm = -1; - *normalized = (((uint32_t) lo) >> 1); - } else - { - if (n1 == 32) - { - /* - * input is zero - */ - *norm = 0; - *normalized = 0; - } else - { - /* - * 32-bit normalization - */ - *norm = n1 - 1; - *normalized = lo << *norm; - } - } - } else - { - /* - * input fits in 64-bit - */ - n1 = 1 - n1; - *norm = -n1; - /* - * 64 bit normalization - */ - *normalized = (((uint32_t) lo) >> n1) | (hi << (32 - n1)); - } -} - -__STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den) -{ - q31_t result; - uint64_t absNum; - int32_t normalized; - int32_t norm; - - /* - * if sum fits in 32bits - * avoid costly 64-bit division - */ - absNum = num > 0 ? num : -num; - arm_norm_64_to_32u(absNum, &normalized, &norm); - if (norm > 0) - /* - * 32-bit division - */ - result = (q31_t) num / den; - else - /* - * 64-bit division - */ - result = (q31_t) (num / den); - - return result; -} - - -#ifdef __cplusplus -} -#endif - -#endif /*ifndef _ARM_MATH_UTILS_H_ */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nn_tables.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nn_tables.h deleted file mode 100644 index 36be5a832..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nn_tables.h +++ /dev/null @@ -1,56 +0,0 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_tables.h - * Description: Extern declaration for NN tables - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _ARM_NN_TABLES_H -#define _ARM_NN_TABLES_H - -#include "arm_math.h" - -/** -* @brief tables for various activation functions -* -*/ - -extern const q15_t sigmoidTable_q15[256]; -extern const q7_t sigmoidTable_q7[256]; - -extern const q7_t tanhTable_q7[256]; -extern const q15_t tanhTable_q15[256]; - - /** - * @brief 2-way tables for various activation functions - * - * 2-way table, H table for value larger than 1/4 - * L table for value smaller than 1/4, H table for remaining - * We have this only for the q15_t version. It does not make - * sense to have it for q7_t type - */ -extern const q15_t sigmoidHTable_q15[192]; -extern const q15_t sigmoidLTable_q15[128]; - -#endif /* ARM_NN_TABLES_H */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nn_types.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nn_types.h deleted file mode 100644 index 4e825c52d..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nn_types.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_types.h - * Description: Public header file to contain the CMSIS-NN structs for the - * TensorFlowLite micro compliant functions - * - * $Date: April 23, 2020 - * $Revision: V.0.5.0 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - - -#ifndef _ARM_NN_TYPES_H -#define _ARM_NN_TYPES_H - -/** CMSIS-NN object to contain the width and height of a tile */ -typedef struct -{ - int32_t w; /**< Width */ - int32_t h; /**< Height */ -} cmsis_nn_tile; - -/** CMSIS-NN object used for the function context. */ -typedef struct -{ - void *buf; /**< Pointer to a buffer needed for the optimization */ - int32_t size; /**< Buffer size */ -} cmsis_nn_context; - -/** CMSIS-NN object to contain the dimensions of the tensors */ -typedef struct -{ - int32_t n; /**< Generic dimension to contain either the batch size or output channels. Please refer to the function documentation for more information */ - int32_t h; /**< Height */ - int32_t w; /**< Width */ - int32_t c; /**< Input channels */ -} cmsis_nn_dims; - -/** CMSIS-NN object for the per-channel quantization parameters */ -typedef struct -{ - int32_t *multiplier; /**< Multiplier values */ - int32_t *shift; /**< Shift values */ -} cmsis_nn_per_channel_quant_params; - -/** CMSIS-NN object for the per-tensor quantization parameters */ -typedef struct -{ - int32_t multiplier; /**< Multiplier value */ - int32_t shift; /**< Shift value */ -} cmsis_nn_per_tensor_quant_params; - -/** CMSIS-NN object for the quantized Relu activation */ -typedef struct -{ - int32_t min; /**< Min value used to clamp the result */ - int32_t max; /**< Max value used to clamp the result */ -} cmsis_nn_activation; - -/** CMSIS-NN object for the convolution layer parameters */ -typedef struct -{ - int32_t input_offset; /**< Zero value for the input tensor */ - int32_t output_offset; /**< Zero value for the output tensor */ - cmsis_nn_tile stride; - cmsis_nn_tile padding; - cmsis_nn_tile dilation; - cmsis_nn_activation activation; -} cmsis_nn_conv_params; - -/** CMSIS-NN object for Depthwise convolution layer parameters */ -typedef struct -{ - int32_t input_offset; /**< Zero value for the input tensor */ - int32_t output_offset; /**< Zero value for the output tensor */ - int32_t ch_mult; /**< Channel Multiplier. ch_mult * in_ch = out_ch */ - cmsis_nn_tile stride; - cmsis_nn_tile padding; - cmsis_nn_tile dilation; - cmsis_nn_activation activation; -} cmsis_nn_dw_conv_params; -/** CMSIS-NN object for pooling layer parameters */ -typedef struct -{ - cmsis_nn_tile stride; - cmsis_nn_tile padding; - cmsis_nn_activation activation; -} cmsis_nn_pool_params; - -/** CMSIS-NN object for Fully Connected layer parameters */ -typedef struct -{ - int32_t input_offset; /**< Zero value for the input tensor */ - int32_t filter_offset; /**< Zero value for the filter tensor */ - int32_t output_offset; /**< Zero value for the output tensor */ - cmsis_nn_activation activation; -} cmsis_nn_fc_params; - -/** CMSIS-NN object for SVDF layer parameters */ -typedef struct -{ - int32_t rank; - int32_t input_offset; /**< Zero value for the input tensor */ - int32_t output_offset; /**< Zero value for the output tensor */ - cmsis_nn_activation input_activation; - cmsis_nn_activation output_activation; -} cmsis_nn_svdf_params; - -#endif // _ARM_NN_TYPES_H - - diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nnfunctions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nnfunctions.h deleted file mode 100644 index 45ee891d8..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nnfunctions.h +++ /dev/null @@ -1,2124 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nnfunctions.h - * Description: Public header file for CMSIS NN Library - * - * $Date: August 21, 2020 - * $Revision: V.6.5.2 - * - * Target Processor: Cortex-M CPUs - * -------------------------------------------------------------------- */ - -/** - \mainpage CMSIS NN Software Library - * - * Introduction - * ------------ - * - * This user manual describes the CMSIS NN software library, - * a collection of efficient neural network kernels developed to maximize the - * performance and minimize the memory footprint of neural networks on Cortex-M processor cores. - * - * The library is divided into a number of functions each covering a specific category: - * - Convolution Functions - * - Activation Functions - * - Fully-connected Layer Functions - * - SVDF Layer Functions - * - Pooling Functions - * - Softmax Functions - * - Basic math Functions - * - * The library has separate functions for operating on different weight and activation data - * types including 8-bit integers (q7_t) and 16-bit integers (q15_t). The descrition of the - * kernels are included in the function description. The implementation details are also - * described in this paper [1]. - * - * Function Classification - * -------- - * The functions can be classified into two segments - * - Legacy functions supporting ARM's internal symmetric quantization(8 bits). - * - Functions that support TensorFlow Lite framework with symmetric quantization(8 bits). - * - * The legacy functions can be identified with their suffix of _q7 or _q15 and are no new development is done there. The article in [2] describes in detail - * how to run a network using the legacy functions. - * - * The functions supporting TensorFlow Lite framework is identified by the _s8 suffix and can be invoked from TFL micro. The functions are bit exact to - * TensorFlow Lite. Refer to the TensorFlow's documentation in [3] on how to run a TensorFlow Lite model using optimized CMSIS-NN kernels. - * - * Block Diagram - * -------- - * \image html CMSIS-NN-OVERVIEW.PNG - * - * Examples - * -------- - * - * The library ships with a number of examples which demonstrate how to use the library functions. - * - * Pre-processor Macros - * ------------ - * - * Each library project have different pre-processor macros. - * - * - ARM_MATH_DSP: - * - * Define macro ARM_MATH_DSP, If the silicon supports DSP instructions(DSP extension). - * - * - ARM_MATH_MVEI: - * - * Define macro ARM_MATH_MVEI, If the silicon supports M-Profile Vector Extension. - - * - ARM_MATH_AUTOVECTORIZE - * Used in conjucture with ARM_MATH_MVEI to let the compiler auto vectorize for the functions that uses inline assembly. - * It does not affect functions that use C or intrinsics. - * - ARM_MATH_BIG_ENDIAN: - * - * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. This is supported only for the legacy functions i.e, functions targetted at - * TensorFlow Lite do not support big endianness. By default library builds for little endian targets. - * - * - ARM_NN_TRUNCATE: - * - * Define macro ARM_NN_TRUNCATE to use floor instead of round-to-the-nearest-int for the computation. - * - * Upcoming Interface Change - * -------- - * Starting from the 1.4.0 next release, CMSIS-NN will gradually switch to a new API interface to: - * - * -# have a stable API - * -# avoid passing many variables by value - * -# improve security - * -# improve validation - * -# improve code readability - * - * The upcoming API interface change will be based on "struct" and only affect the TensorFlowLite micro compliant APIs [4] (functions with _s8 suffix) - * - * Below you can find a snapshot of how the new API interface will look like (names can change) - * - * i.e. arm_convolve_1x1_s8_fast - * - * Current API interface | New API interface proposal - * ------------- | ------------- - * const q7_t *input | const cmsis_nn_context &ctx - * const uint16_t input_x | const cmsis_nn_conv_params ¶ms - * const uint16_t input_y | const cmsis_nn_dims &input_dims - * const uint16_t input_ch | const q7_t *input_data - * const uint16_t input_batches | const cmsis_nn_dims &filter_dims - * const q7_t *kernel | const q7_t *filter_data - * const uint16_t output_ch | const cmsis_nn_dims &bias_dims - * const uint16_t pad_x | const q31_t *bias_data - * const uint16_t pad_y | const cmsis_nn_dims &output_dims - * const uint16_t stride_x | q7_t *output_data - * const uint16_t stride_y |
- * const int32_t *bias |
- * q7_t *output |
- * const int32_t *output_shift |
- * const int32_t *output_mult |
- * const int32_t out_offset |
- * const int32_t input_offset |
- * const int32_t out_activation_min |
- * const int32_t out_activation_max |
- * const uint16_t output_x |
- * const uint16_t output_y |
- * q15_t *buffer_a |
- * - * Copyright Notice - * ------------ - * - * Copyright (C) 2010-2019 Arm Limited. All rights reserved. - * - * [1] CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs https://arxiv.org/abs/1801.06601 - * - * [2] Converting a Neural Network for Arm Cortex-M with CMSIS-NN - * https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides/converting-a-neural-network-for-arm-cortex-m-with-cmsis-nn/single-page - * [3] https://www.tensorflow.org/lite/microcontrollers/library - * - * [4] https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN#legacy-vs-tfl-micro-compliant-apis - */ - -/** - * @defgroup groupNN Neural Network Functions - * A collection of functions to perform basic operations for neural network layers. Functions with a _s8 suffix support - * TensorFlow Lite framework. - */ - -#ifndef _ARM_NNFUNCTIONS_H -#define _ARM_NNFUNCTIONS_H - -#include "arm_nnsupportfunctions.h" -#include "arm_nn_tables.h" -#include "arm_nn_types.h" - -#define USE_INTRINSIC - -//#define ARM_NN_TRUNCATE /* This config the rounding model to floor or round to the nearest int */ - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - * @defgroup NNConv Convolution Functions - * - * Collection of convolution, depthwise convolution functions and their variants. - * - * The convolution is implemented in 2 steps: im2col and GEMM - * - * im2col is a process of converting each patch of image data into - * a column. After im2col, the convolution is computed as matrix-matrix - * multiplication. - * - * To reduce the memory footprint, the im2col is performed partially. - * Each iteration, only a few column (i.e., patches) are generated and - * computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions. - * - */ - - /** - * @brief s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in cmsis-nn to perform the convolution. - * - * @param[in, out] ctx Function context that contains the additional buffer if required by the implementation. - arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * Range of conv_params->input_offset : [-127, 128] - * Range of conv_params->output_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int8 - * - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, - * ARM_MATH_SUCCESS on successful completion. - * - */ - arm_status arm_convolve_wrapper_s8(const cmsis_nn_context* ctx, - const cmsis_nn_conv_params* conv_params, - const cmsis_nn_per_channel_quant_params* quant_params, - const cmsis_nn_dims* input_dims, - const q7_t *input_data, - const cmsis_nn_dims* filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims* bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims* output_dims, - q7_t *output_data); - - /** - * @brief Get the required buffer size for arm_convolve_wrapper_s8 - * - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * Range of conv_params->input_offset : [-127, 128] - * Range of conv_params->output_offset : [-128, 127] - * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN] - * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * - * @return The function returns required buffer size(bytes) - * - */ - int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params* conv_params, - const cmsis_nn_dims* input_dims, - const cmsis_nn_dims* filter_dims, - const cmsis_nn_dims* output_dims); - - /** - * @brief Basic s8 convolution function - * @param[in, out] ctx Function context that contains the additional buffer if required by the implementation. - arm_convolve_s8_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * Range of conv_params->input_offset : [-127, 128] - * Range of conv_params->output_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Optional bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int8 - - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * 1. Supported framework: TensorFlow Lite micro - * 2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - * 3. Additional memory is required for optimization. Refer to argument 'ctx' for details. - * - */ - arm_status arm_convolve_s8(const cmsis_nn_context* ctx, - const cmsis_nn_conv_params* conv_params, - const cmsis_nn_per_channel_quant_params* quant_params, - const cmsis_nn_dims* input_dims, - const q7_t *input_data, - const cmsis_nn_dims* filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims* bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims* output_dims, - q7_t *output_data); - - /** - * @brief Get the required buffer size for s8 convolution function - * - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions - * @return The function returns required buffer size(bytes) - * - */ - int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims* input_dims, - const cmsis_nn_dims* filter_dims); - - /** - * @brief Basic Q7 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - * - */ - arm_status arm_convolve_HWC_q7_basic(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Basic Q7 convolution function (non-square shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - */ - arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Basic Q15 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - * - */ - arm_status arm_convolve_HWC_q15_basic(const q15_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Fast Q7 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ - arm_status arm_convolve_HWC_q7_fast(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Fast Q7 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ - - arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, - * ARM_MATH_SUCCESS on successful completion. - * - * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1 - * and dim_kernel_y=1). It can be used for - * second half of MobileNets after depthwise separable convolution. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ - arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Fast s8 version for 1x1 convolution (non-square shape) - * - * @param[in, out] ctx Function context that contains the additional buffer if required by the implementation. - arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * Range of conv_params->input_offset : [-127, 128] - * Range of conv_params->output_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Optional bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int8 - * - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, - * ARM_MATH_SUCCESS on successful completion. - * - * @details - * - Supported framework : TensorFlow Lite Micro - * - The following constrains on the arguments apply - * -# input_dims->c is a multiple of 4 - * -# conv_params->padding.w = conv_params->padding.h = 0 - * -# conv_params->stride.w = conv_params->stride.h = 1 - * - */ - arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context* ctx, - const cmsis_nn_conv_params* conv_params, - const cmsis_nn_per_channel_quant_params* quant_params, - const cmsis_nn_dims* input_dims, - const q7_t *input_data, - const cmsis_nn_dims* filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims* bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims* output_dims, - q7_t *output_data); - - /** - * @brief Get the required buffer size for arm_convolve_1x1_s8_fast - * - * @param[in] input_dims Input (activation) dimensions - * @return The function returns the required buffer size in bytes - * - */ - int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims* input_dims); - - /** - * @brief 1xn convolution - * - * @param[in, out] ctx Function context that contains the additional buffer if required by the implementation. - arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * Range of conv_params->input_offset : [-127, 128] - * Range of conv_params->output_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Optional bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int8 - * - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, - * ARM_MATH_SUCCESS on successful completion. - * - * @details - * - Supported framework : TensorFlow Lite Micro - * - The following constrains on the arguments apply - * -# input_dims->n equals 1 - * -# ouput_dims->w is a multiple of 4 - * -# Explicit constraints(since it is for 1xN convolution) - * -## input_dims->h equals 1 - * -## output_dims->h equals 1 - * -## filter_dims->h equals 1 - *@todo Remove constraint on output_dims->w to make the function generic. - * - */ - arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context* ctx, - const cmsis_nn_conv_params* conv_params, - const cmsis_nn_per_channel_quant_params* quant_params, - const cmsis_nn_dims* input_dims, - const q7_t *input_data, - const cmsis_nn_dims* filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims* bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims* output_dims, - q7_t *output_data); - - /** - * @brief Get the required additional buffer size for 1xn convolution - * - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension - * @return The function returns required buffer size(bytes) - * - */ - int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_dims* input_dims, - const cmsis_nn_dims* filter_dims); - - /** - * @brief Q7 version of convolution for RGB image - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This kernel is written exclusively for convolution with ch_im_in - * equals 3. This applies on the first layer of CNNs which has input - * image with RGB format. - */ - - arm_status arm_convolve_HWC_q7_RGB(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Fast Q15 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 2 - * ch_im_out is multiple of 2 - */ - - arm_status arm_convolve_HWC_q15_fast(const q15_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Fast Q15 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in is multiple of 2 - * - * ch_im_out is multipe of 2 - * - */ - - arm_status - arm_convolve_HWC_q15_fast_nonsquare(const q15_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q15_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q15_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Q7 depthwise separable convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 2 - * ch_im_out is multiple of 2 - */ - - arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Q7 depthwise separable convolution function (non-square shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding sizes x - * @param[in] padding_y padding sizes y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 2 - * ch_im_out is multiple of 2 - */ - arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, - q7_t * bufferB); - - /** - * @brief Wrapper function to pick the right optimized s8 depthwise convolution function - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if required. - * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) - * dw_conv_params->dilation is not used. - * Range of dw_conv_params->input_offset : [-127, 128] - * Range of dw_conv_params->output_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each - * output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] - * Batch argument N is not used and assumed to be 1. - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] - * @param[in, out] output_data Output data pointer. Data type: int8 - * @return The function returns - * ARM_MATH_SUCCESS - Successful completion. - * - * @details - * - Supported framework: TensorFlow Lite - * - Picks one of the the following functions - * -# arm_depthwise_conv_s8() - * -# arm_depthwise_conv_3x3_s8() - Cortex-M CPUs with DSP extension only - * -# arm_depthwise_conv_s8_opt() - * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - * - Check details of arm_depthwise_conv_s8_opt() for potential data that can be accessed outside of the boundary. - */ - arm_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - - /** - * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() - * - * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) - * dw_conv_params->dilation is not used. - * Range of dw_conv_params->input_offset : [-127, 128] - * Range of dw_conv_params->input_offset : [-128, 127] - * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] - * Batch argument N is not used and assumed to be 1. - * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] - * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] - * @return Size of additional memory required for optimizations in bytes. - * - */ - int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims); - - /** - * @brief Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * exists if additional memory is. - * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) - * dw_conv_params->dilation is not used. - * Range of dw_conv_params->input_offset : [-127, 128] - * Range of dw_conv_params->input_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each - * output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [1, H, W, C_IN] - * Batch argument N is not used. - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] - * @param[in, out] output_data Output data pointer. Data type: int8 - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - Supported framework: TensorFlow Lite - * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - */ - arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - - /** - * @brief Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on - * the input arguments(documented below). Refer arm_depthwise_conv_s8() for function - * argument details. - * - * @return The function returns one of the following - * ARM_MATH_SIZE_MISMATCH - Unsupported dimension of tensors - * ARM_MATH_ARGUMENT_ERROR - Unsupported pad size along the x axis - * ARM_MATH_SUCCESS - Successful operation - * - * @details - * - Supported framework : TensorFlow Lite Micro - * - The following constrains on the arguments apply - * -# Number of input channel equals number of output channels - * -# Filter height and width equals 3 - * -# Padding along x is either 0 or 1. - * - */ - arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - - /** - * @brief Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. - * Refer arm_depthwise_conv_s8() for function argument details. - * - * @return The function returns one of the following - * ARM_MATH_SIZE_MISMATCH - input channel != output channel or - * ch_mult != 1 - * ARM_MATH_SUCCESS - Successful operation - * - * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out - * for the following if MVE optimizations(Arm Helium Technology) are used. - * - Output shift - * - Output multiplier - * - Output bias - * - kernel - * @details - * - Supported framework: TensorFlow Lite - * - The following constrains on the arguments apply - * -# Number of input channel equals number of output channels or ch_mult equals 1 - * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - * - Reccomended when number of channels is 4 or greater. - * - */ - arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - - /** - * @brief Get the required buffer size for optimized s8 depthwise convolution - * function with constraint that in_channel equals out_channel. - * @param[in] input_dims Input (activation) tensor dimensions. Format: [1, H, W, C_IN] - * Batch argument N is not used. - * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] - * @return The function returns required buffer size in bytes - * - */ - int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims* input_dims, - const cmsis_nn_dims* filter_dims); - - /** - * @defgroup FC Fully-connected Layer Functions - * - * Collection of fully-connected and matrix multiplication functions. - * - * Fully-connected layer is basically a matrix-vector multiplication - * with bias. The matrix is the weights and the input/output vectors - * are the activation values. Supported {weight, activation} precisions - * include {8-bit, 8-bit}, {16-bit, 16-bit}, and {8-bit, 16-bit}. - * - * Here we have two types of kernel functions. The basic function - * implements the function using regular GEMV approach. The opt functions - * operates with weights in interleaved formats. - * - */ - - /** - * @brief Q7 basic fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - - arm_status arm_fully_connected_q7(const q7_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut, - q15_t * vec_buffer); - - /** - * @brief Basic s8 Fully Connected function. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * @param[in] fc_params Fully Connected layer parameters (e.g. strides, dilations, pads,...) - * Range of fc_params->input_offset : [-127, 128] - * Range of fc_params->filter_offset : [-127, 128] - * Range of fc_params->output_offset : [-128, 127] - * @param[in] quant_params Per-tensor quantization info. - * It contains the multiplier and shift values to be applied to the output tensor. - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * Input dimension is taken as Nx(H * W * C_IN) - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C] - * N : accumulation depth and equals (H * W * C_IN) from input_dims - * C : output depth and equals C_OUT in output_dims - * H & W : Not used - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * N, H, W : Not used - * @param[in] bias_data Bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT] - * N : Batches - * C_OUT : Output depth - * H & W : Not used. - * @param[in, out] output_data Output data pointer. Data type: int8 - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - Supported framework: TensorFlow Lite - * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - */ - arm_status - arm_fully_connected_s8(const cmsis_nn_context *ctx, - const cmsis_nn_fc_params *fc_params, - const cmsis_nn_per_tensor_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - - /** - * @brief Get the required buffer size for S8 basic fully-connected and - * matrix multiplication layer function for TF Lite - * @param[in] filter_dims dimension of filter - * @return The function returns required buffer size in bytes - * - */ - int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims); - - /** - * @brief Q7 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - - arm_status arm_fully_connected_q7_opt(const q7_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut, - q15_t * vec_buffer); - - /** - * @brief Q15 basic fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - - arm_status arm_fully_connected_q15(const q15_t * pV, - const q15_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t * bias, - q15_t * pOut, - q15_t * vec_buffer); - - /** - * @brief Q15 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - - arm_status arm_fully_connected_q15_opt(const q15_t * pV, - const q15_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t * bias, - q15_t * pOut, - q15_t * vec_buffer); - - /** - * @brief Mixed Q15-Q7 fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - - arm_status arm_fully_connected_mat_q7_vec_q15(const q15_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q15_t * pOut, - q15_t * vec_buffer); - - /** - * @brief Mixed Q15-Q7 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - - arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q15_t * pOut, - q15_t * vec_buffer); - -/** - * @brief Matrix-Multiplication Kernels for Convolution - * - * These functions are used within convolution layer functions for - * matrix multiplication. - * - * The implementation is similar to CMSIS-DSP arm_mat_mult functions - * with one Q7 and one Q15 operands. The Q15 operand is the im2col - * output which is always with 2 columns. - * - */ - - /** - * @brief Matrix-multiplication function for convolution - * @param[in] pA pointer to operand A - * @param[in] pInBuffer pointer to operand B, always conssists of 2 vectors - * @param[in] ch_im_out numRow of A - * @param[in] numCol_A numCol of A - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias the bias - * @param[in,out] pOut pointer to output - * @return The function returns the incremented output pointer - */ - - q7_t *arm_nn_mat_mult_kernel_q7_q15(const q7_t * pA, - const q15_t * pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut); - /** - * @brief Matrix-multiplication function for convolution with per-channel requantization. - * @param[in] input_a pointer to operand A - * @param[in] input_b pointer to operand B, always consists of 2 vectors. - * @param[in] output_ch number of rows of A - * @param[in] out_shift pointer to per output channel requantization shift parameter. - * @param[in] out_mult pointer to per output channel requantization multiplier parameter. - * @param[in] out_offset output tensor offset. - * @param[in] activation_min minimum value to clamp the output to. Range : int8 - * @param[in] activation_max maximum value to clamp the output to. Range : int8 - * @param[in] num_col_a number of columns of A - * @param[in] output_bias per output channel bias. Range : int32 - * @param[in,out] out_0 pointer to output - * @return The function returns one of the two - * 1. The incremented output pointer for a successful operation or - * 2. NULL if implementation is not available. - * - * @details This function does the matrix multiplication of weight matrix for all output channels - * with 2 columns from im2col and produces two elements/output_channel. The outputs are - * clamped in the range provided by activation min and max. - * Supported framework: TensorFlow Lite micro. - */ - q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a, - const q15_t *input_b, - const uint16_t output_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int16_t activation_min, - const int16_t activation_max, - const uint16_t num_col_a, - const int32_t *const output_bias, - q7_t *out_0); - - /** - * @brief Matrix-multiplication of re-ordered input B with A. - * - * @details For arguments, refer arm_nn_mat_mult_kernel_s8_s16. The re-ordering is a consequence - * of sign extension done by the SXTB16 command on input_b. The outputs are clamped in the range - * provided by activation min and max. - * * @details - * - Supported framework : TensorFlow Lite Micro - * - The following constrains on the arguments apply - * -# num_col_a is a multiple of 4 - * -# output_ch is a multiple of 2 - * - */ - q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a, - const q15_t *input_b, - const uint16_t output_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int16_t activation_min, - const int16_t activation_max, - const uint16_t num_col_a, - const int32_t *const output_bias, - q7_t *out_0); - - /** - * @brief Matrix-multiplication function for convolution with reordered columns - * @param[in] pA pointer to operand A - * @param[in] pInBuffer pointer to operand B, always conssists of 2 vectors - * @param[in] ch_im_out numRow of A - * @param[in] numCol_A numCol of A - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias the bias - * @param[in,out] pOut pointer to output - * @return The function returns the incremented output pointer - * - * @details This function assumes that data in pInBuffer are reordered - */ - q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t * pA, - const q15_t * pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut); - -#ifdef __cplusplus -} -#endif - -/* - * Other functions - * These layers are typically not timing critical - * Basic implementation is supported here - */ - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - * @defgroup BasicMath Basic math functions - * - * Element wise add and multiplication functions. - * - */ - -/** - * @brief s8 element wise add of two vectors - * @param[in] input_1_vect pointer to input vector 1 - * @param[in] input_2_vect pointer to input vector 2 - * @param[in] input_1_offset offset for input 1. Range: Range: -127 to 128 - * @param[in] input_1_mult multiplier for input 1 - * @param[in] input_1_shift shift for input 1 - * @param[in] input_2_offset offset for input 2. Range: Range: -127 to 128 - * @param[in] input_2_mult multiplier for input 2 - * @param[in] input_2_shift shift for input 2 - * @param[in] left_shift input left shift - * @param[in,out] output pointer to output vector - * @param[in] out_offset output offset - * @param[in] out_mult output multiplier - * @param[in] out_shift output shift - * @param[in] out_activation_min minimum value to clamp output to - * @param[in] out_activation_max maximum value to clamp output to - * @param[in] block_size number of samples - * @return The function returns ARM_MATH_SUCCESS - */ - arm_status arm_elementwise_add_s8(const int8_t *input_1_vect, - const int8_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_1_mult, - const int32_t input_1_shift, - const int32_t input_2_offset, - const int32_t input_2_mult, - const int32_t input_2_shift, - const int32_t left_shift, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const uint32_t block_size); - -/** - * @brief s8 element wise multiplication - * @param[in] input_1_vect pointer to input vector 1 - * @param[in] input_2_vect pointer to input vector 2 - * @param[in] input_1_offset offset for input 1. Range: Range: -127 to 128 - * @param[in] input_2_offset offset for input 2. Range: Range: -127 to 128 - * @param[in,out] output pointer to output vector - * @param[in] out_offset output offset - * @param[in] out_mult output multiplier - * @param[in] out_shift output shift - * @param[in] out_activation_min minimum value to clamp output to - * @param[in] out_activation_max maximum value to clamp output to - * @param[in] block_size number of samples - * @return The function returns ARM_MATH_SUCCESS - * - * @details Supported framework: TensorFlow Lite micro - */ - arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect, - const int8_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_2_offset, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const uint32_t block_size); -/** - * @defgroup Acti Activation Functions - * - * Perform activation layers, including ReLU (Rectified Linear Unit), - * sigmoid and tanh - * - */ - - /** - * @brief Q7 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @return none. - */ - - void arm_relu_q7(q7_t *data, uint16_t size); - - /** - * @brief s8 ReLU6 function - * @param[in,out] data pointer to input - * @param[in] size number of elements - */ - - void arm_relu6_s8(q7_t *data, uint16_t size); - - /** - * @brief Q15 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @return none. - */ - - void arm_relu_q15(q15_t *data, uint16_t size); - - /** - * @brief Q7 neural network activation function using direct table look-up - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 - * @param[in] type type of activation functions - * @return none. - */ - - void arm_nn_activations_direct_q7(q7_t * data, uint16_t size, uint16_t int_width, - arm_nn_activation_type type); - - /** - * @brief Q15 neural network activation function using direct table look-up - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 - * @param[in] type type of activation functions - * @return none. - * - * @details - * - * This is the direct table look-up approach. - * - * Assume here the integer part of the fixed-point is <= 3. - * More than 3 just not making much sense, makes no difference with - * saturation followed by any of these activation functions. - */ - - void arm_nn_activations_direct_q15(q15_t * data, uint16_t size, uint16_t int_width, - arm_nn_activation_type type); - -/** - * @defgroup Pooling Pooling Functions - * - * Perform pooling functions, including max pooling and average pooling - * - */ - - /** - * @brief Q7 max pooling function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] Im_out pointer to output tensor - * @return none. - * - */ - - void arm_maxpool_q7_HWC(q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t * bufferA, - q7_t * Im_out); - - /** - * @brief Q7 average pooling function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] Im_out pointer to output tensor - * @return none. - * - */ - - void arm_avepool_q7_HWC(q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t * bufferA, - q7_t * Im_out); - - /** - * @brief s8 average pooling function. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * @param[in] pool_params Pooling parameters - * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] - * Argument 'N' is not used. - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [H, W] - * Argument N and C are not used. - * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT] - * Argument N is not used. - * C_OUT equals C_IN. - * @param[in, out] output_data Output data pointer. Data type: int8 - * @return The function returns - * ARM_MATH_SUCCESS - Successful operation - * - * @details - * - Supported Framework: TensorFlow Lite - * - */ - arm_status arm_avgpool_s8(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - - /** - * @brief Get the required buffer size for S8 average pooling function - * @param[in] dim_dst_width output tensor dimension - * @param[in] ch_src number of input tensor channels - * @return The function returns required buffer size in bytes - * - */ - int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width, - const int ch_src); - - /** - * @brief s8 max pooling function. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * @param[in] pool_params Pooling parameters - * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] - * Argument 'N' is not used. - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [H, W] - * Argument N and C are not used. - * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT] - * Argument N is not used. - * C_OUT equals C_IN. - * @param[in, out] output_data Output data pointer. Data type: int8 - * @return The function returns - * ARM_MATH_SUCCESS - Successful operation - * - * @details - * - Supported Framework: TensorFlow Lite - * - */ - arm_status arm_max_pool_s8(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q7_t *output_data); -/** - * @defgroup Softmax Softmax Functions - * - * EXP(2) based softmax functions. - * - */ - - /** - * @brief Q7 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimension - * @param[out] p_out pointer to output vector - * - * @note This function is an optimized version which is not bit-accurate with - * TensorFlow Lite's kernel - * - */ - -void arm_softmax_q7(const q7_t * vec_in, const uint16_t dim_vec, q7_t * p_out); - - /** - * @brief Q7 softmax function with batch parameter - * @param[in] vec_in pointer to input vector - * @param[in] nb_batches number of batches - * @param[in] dim_vec input vector dimension - * @param[out] p_out pointer to output vector - * @return none. - * - * @note This function is an optimized version which is not bit-accurate with - * TensorFlow Lite's kernel - * - */ - -void arm_softmax_with_batch_q7(const q7_t * vec_in, const uint16_t nb_batches,const uint16_t dim_vec, q7_t * p_out ); - /** - * @brief Q15 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimension - * @param[out] p_out pointer to output vector - * @return none. - * - * @note This function is an optimized version which is not bit-accurate with - * TensorFlow Lite's kernel - * - */ - -void arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out); - - /** - * @brief S8 softmax function - * @param[in] input Pointer to the input tensor - * @param[in] num_rows Number of rows in the input tensor - * @param[in] row_size Number of elements in each input row - * @param[in] mult Input quantization multiplier - * @param[in] shift Input quantization shift within the range [0, 31] - * @param[in] diff_min Minimum difference with max in row. Used to check if - * the quantized exponential operation can be performed - * @param[out] output Pointer to the output tensor - * - * @note Supported framework: TensorFlow Lite micro (bit-accurate) - * - */ - -void arm_softmax_s8(const int8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - int8_t *output); - - /** - * @brief U8 softmax function - * @param[in] input Pointer to the input tensor - * @param[in] num_rows Number of rows in the input tensor - * @param[in] row_size Number of elements in each input row - * @param[in] mult Input quantization multiplier - * @param[in] shift Input quantization shift within the range [0, 31] - * @param[in] diff_min Minimum difference with max in row. Used to check if - * the quantized exponential operation can be performed - * @param[out] output Pointer to the output tensor - * - * @note Supported framework: TensorFlow Lite micro (bit-accurate) - * - */ - -void arm_softmax_u8(const uint8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - uint8_t *output); - - /** - * @brief uint8 depthwise convolution function with asymmetric quantization - * Unless specified otherwise, arguments are mandatory. - * - * @param[in] input Pointer to input tensor - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_ch Channels in input tensor - * @param[in] kernel Pointer to kernel weights - * @param[in] kernel_x Width of kernel - * @param[in] kernel_y Height of kernel - * @param[in] ch_mult Number of channel multiplier - * @param[in] pad_x Padding sizes x - * @param[in] pad_y Padding sizes y - * @param[in] stride_x stride along the width - * @param[in] stride_y stride along the height - * @param[in] dilation_x Dilation along width. Not used and intended for future enhancement. - * @param[in] dilation_y Dilation along height. Not used and intended for future enhancement. - * @param[in] bias Pointer to optional bias values. If no bias is - * availble, NULL is expected - * @param[in] input_offset Input tensor zero offset - * @param[in] filter_offset Kernel tensor zero offset - * @param[in] output_offset Output tensor zero offset - * @param[in,out] output Pointer to output tensor - * @param[in] output_x Width of output tensor - * @param[in] output_y Height of output tensor - * @param[in] output_activation_min Minimum value to clamp the output to. Range : {0, 255} - * @param[in] output_activation_max Minimum value to clamp the output to. Range : {0, 255} - * @param[in] out_shift Amount of right-shift for output - * @param[in] out_mult Output multiplier for requantization - * @return The function returns the following - * ARM_MATH_SUCCESS - Successful operation - * - */ - arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_ch, - const uint8_t *kernel, - const uint16_t kernel_x, - const uint16_t kernel_y, - const int16_t ch_mult, - const int16_t pad_x, - const int16_t pad_y, - const int16_t stride_x, - const int16_t stride_y, - const int16_t dilation_x, - const int16_t dilation_y, - const int32_t *bias, - const int32_t input_offset, - const int32_t filter_offset, - const int32_t output_offset, - uint8_t *output, - const uint16_t output_x, - const uint16_t output_y, - const int32_t output_activation_min, - const int32_t output_activation_max, - const int32_t out_shift, - const int32_t out_mult); - -/** - * @defgroup Reshape Reshape Functions - * - */ - - /** - * @brief Reshape a s8 vector into another with different shape - * @param[in] input points to the s8 input vector - * @param[out] output points to the s8 output vector - * @param[in] total_size total size of the input and output vectors in bytes - * - * @note The output is expected to be in a memory area that does not overlap with the input's - * - */ - void arm_reshape_s8(const int8_t *input, - int8_t *output, - const uint32_t total_size); - -/** - * @defgroup Concatenation Concatenation Functions - * - */ - - /** - * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis - * This function should be called for each input tensor to concatenate. The argument offset_x - * will be used to store the input tensor in the correct position in the output tensor - * - * i.e. offset_x = 0 - * for(i = 0 i < num_input_tensors; ++i) - * { - * arm_concatenation_s8_x(&input[i], ..., &output, ..., ..., offset_x) - * offset_x += input_x[i] - * } - * - * This function assumes that the output tensor has: - * -# The same height of the input tensor - * -# The same number of channels of the input tensor - * -# The same batch size of the input tensor - * - * Unless specified otherwise, arguments are mandatory. - * - * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because does not involve any arithmetic operation - * - * @param[in] input Pointer to input tensor - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_z Channels in input tensor - * @param[in] input_w Batch size in input tensor - * @param[out] output Pointer to output tensor - * @param[in] output_x Width of output tensor - * @param[in] offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor - * It is user responsibility to provide the correct value - * - * Input constraints - * offset_x is less than output_x - * - */ - void arm_concatenation_s8_x(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_x, - const uint32_t offset_x); - - /** - * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Y axis - * This function should be called for each input tensor to concatenate. The argument offset_y - * will be used to store the input tensor in the correct position in the output tensor - * - * i.e. offset_y = 0 - * for(i = 0 i < num_input_tensors; ++i) - * { - * arm_concatenation_s8_y(&input[i], ..., &output, ..., ..., offset_y) - * offset_y += input_y[i] - * } - * - * This function assumes that the output tensor has: - * -# The same width of the input tensor - * -# The same number of channels of the input tensor - * -# The same batch size of the input tensor - * - * Unless specified otherwise, arguments are mandatory. - * - * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because does not involve any arithmetic operation - * - * @param[in] input Pointer to input tensor - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_z Channels in input tensor - * @param[in] input_w Batch size in input tensor - * @param[out] output Pointer to output tensor - * @param[in] output_y Height of output tensor - * @param[in] offset_y The offset on the Y axis to start concatenating the input tensor - * It is user responsibility to provide the correct value - * - * Input constraints - * offset_y is less than output_y - * - */ - void arm_concatenation_s8_y(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_y, - const uint32_t offset_y); - - /** - * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Z axis - * This function should be called for each input tensor to concatenate. The argument offset_z - * will be used to store the input tensor in the correct position in the output tensor - * - * i.e. offset_z = 0 - * for(i = 0 i < num_input_tensors; ++i) - * { - * arm_concatenation_s8_z(&input[i], ..., &output, ..., ..., offset_z) - * offset_z += input_z[i] - * } - * - * This function assumes that the output tensor has: - * -# The same width of the input tensor - * -# The same height of the input tensor - * -# The same batch size of the input tensor - * - * Unless specified otherwise, arguments are mandatory. - * - * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because does not involve any arithmetic operation - * - * @param[in] input Pointer to input tensor - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_z Channels in input tensor - * @param[in] input_w Batch size in input tensor - * @param[out] output Pointer to output tensor - * @param[in] output_z Channels in output tensor - * @param[in] offset_z The offset on the Z axis to start concatenating the input tensor - * It is user responsibility to provide the correct value - * - * Input constraints - * offset_z is less than output_z - * - */ - void arm_concatenation_s8_z(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_z, - const uint32_t offset_z); - - /** - * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the W axis (Batch size) - * This function should be called for each input tensor to concatenate. The argument offset_w - * will be used to store the input tensor in the correct position in the output tensor - * - * i.e. offset_w = 0 - * for(i = 0 i < num_input_tensors; ++i) - * { - * arm_concatenation_s8_w(&input[i], ..., &output, ..., ..., offset_w) - * offset_w += input_w[i] - * } - * - * This function assumes that the output tensor has: - * -# The same width of the input tensor - * -# The same height of the input tensor - * -# The same number o channels of the input tensor - * - * Unless specified otherwise, arguments are mandatory. - * - * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because does not involve any arithmetic operation - * - * @param[in] input Pointer to input tensor - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_z Channels in input tensor - * @param[in] input_w Batch size in input tensor - * @param[out] output Pointer to output tensor - * @param[in] offset_w The offset on the W axis to start concatenating the input tensor - * It is user responsibility to provide the correct value - * - */ - void arm_concatenation_s8_w(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint32_t offset_w); -/** - * @defgroup SVDF SVDF Layer Functions - * - */ - - /** - * @brief s8 SVDF function - * - * @param[in] input_ctx Temporary scratch buffer - * @param[in] output_ctx Temporary output scratch buffer - * @param[in] svdf_params SVDF Parameters - * Range of svdf_params->input_offset : [-128, 127] - * Range of svdf_params->output_offset : [-128, 127] - * @param[in] input_quant_params Input quantization parameters - * @param[in] output_quant_params Output quantization parameters - * @param[in] input_dims Input tensor dimensions - * @param[in] input_data Pointer to input tensor - * @param[in] state_dims State tensor dimensions - * @param[in] state_data Pointer to state tensor - * @param[in] weights_feature_dims Weights (feature) tensor dimensions - * @param[in] weights_feature_data Pointer to the weights (feature) tensor - * @param[in] weights_time_dims Weights (time) tensor dimensions - * @param[in] weights_time_data Pointer to the weights (time) tensor - * @param[in] bias_dims Bias tensor dimensions - * @param[in] bias_data Pointer to bias tensor - * @param[in] output_dims Output tensor dimensions - * @param[out] output_data Pointer to the output tensor - * - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * 1. Supported framework: TensorFlow Lite micro - * 2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - * - */ - arm_status - arm_svdf_s8(const cmsis_nn_context *input_ctx, - const cmsis_nn_context *output_ctx, - const cmsis_nn_svdf_params *svdf_params, - const cmsis_nn_per_tensor_quant_params *input_quant_params, - const cmsis_nn_per_tensor_quant_params *output_quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *state_dims, - q15_t *state_data, - const cmsis_nn_dims *weights_feature_dims, - const q7_t *weights_feature_data, - const cmsis_nn_dims *weights_time_dims, - const q15_t *weights_time_data, - const cmsis_nn_dims *bias_dims, - const q31_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nnsupportfunctions.h b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nnsupportfunctions.h deleted file mode 100644 index 2f9364ca6..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include/arm_nnsupportfunctions.h +++ /dev/null @@ -1,954 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nnsupportfunctions.h - * Description: Public header file of support functions for CMSIS NN Library - * - * $Date: July 31, 2020 - * $Revision: V.4.5.4 - * - * Target Processor: Cortex-M CPUs - * -------------------------------------------------------------------- */ - -#ifndef _ARM_NNSUPPORTFUNCTIONS_H_ -#define _ARM_NNSUPPORTFUNCTIONS_H_ - -#include "arm_math.h" -#include "arm_common_tables.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -#define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0) -#define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift) -#define MASK_IF_ZERO(x) (x) == 0 ? ~0 : 0 -#define MASK_IF_NON_ZERO(x) (x) != 0 ? ~0 : 0 -#define SELECT_USING_MASK(mask, a, b) ((mask) & (a)) ^ (~(mask) & (b)) - -#define MAX(A,B) ((A) > (B) ? (A) : (B)) -#define MIN(A,B) ((A) < (B) ? (A) : (B)) -#define CLAMP(x, h, l) MAX(MIN((x), (h)), (l)) - -/** - * @brief Union for SIMD access of q31/q15/q7 types - */ -union arm_nnword -{ - q31_t word; - /**< q31 type */ - q15_t half_words[2]; - /**< q15 type */ - q7_t bytes[4]; - /**< q7 type */ -}; - -/** - * @brief Union for data type long long - */ -struct arm_nn_double -{ - uint32_t low; - int32_t high; -}; - -union arm_nn_long_long -{ - int64_t long_long; - struct arm_nn_double word; -}; - -/** - * @brief Struct for specifying activation function types - * - */ -typedef enum -{ - ARM_SIGMOID = 0, - /**< Sigmoid activation function */ - ARM_TANH = 1, - /**< Tanh activation function */ -} arm_nn_activation_type; - -/** - * @defgroup nndata_convert Neural Network Data Conversion Functions - * - * Perform data type conversion in-between neural network operations - * - */ - -/** - * @brief Converts the elements of the q7 vector to q15 vector without left-shift - * @param[in] *pSrc points to the q7 input vector - * @param[out] *pDst points to the q15 output vector - * @param[in] blockSize length of the input vector - * - */ -void arm_q7_to_q15_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize); - -/** - * @brief Non-saturating addition of elements of a q7 vector - * @param[in] *input Pointer to the q7 input vector - * @param[out] *output Pointer to the q31 output variable. - * @param[in] block_size length of the input vector - * \par Description: - * - * 2^24 samples can be added without saturating the result. - * - * The equation used for the conversion process is: - * - *
- *  sum = input[0] + input[1] + .. + input[block_size -1]
- * 
- * - * */ -void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size); - -/** - * @brief Converts the elements of the q7 vector to reordered q15 vector without left-shift - * @param[in] *pSrc points to the q7 input vector - * @param[out] *pDst points to the q15 output vector - * @param[in] blockSize length of the input vector - * @return none. - * - */ -void arm_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize); - -/** - * @brief Converts the elements from a q7 vector to a q15 vector with an added offset - * @param[in] src pointer to the q7 input vector - * @param[out] dst pointer to the q15 output vector - * @param[in] block_size length of the input vector - * @param[in] offset q7 offset to be added to each input vector element. - * - * \par Description: - * - * The equation used for the conversion process is: - * - *
- *  dst[n] = (q15_t) src[n] + offset;   0 <= n < block_size.
- * 
- * - */ -void arm_q7_to_q15_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset); - -/** - * @brief Converts the elements of the q7 vector to reordered q15 vector with an added offset - * @param[in] src pointer to the q7 input vector - * @param[out] dst pointer to the q15 output vector - * @param[in] block_size length of the input vector - * @param[in] offset offset to be added to each input vector element. - * @return none. - * - * @details This function does the q7 to q15 expansion with re-ordering of bytes. Re-ordering is a consequence of - * the sign extension intrinsic(DSP extension). The tail (i.e., last (N % 4) elements) retains its original - * order. - * - */ -void arm_q7_to_q15_reordered_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset); - -/** - * @brief Converts the elements from a q7 vector and accumulate to a q15 vector - * @param[in] *src points to the q7 input vector - * @param[out] *dst points to the q15 output vector - * @param[in] block_size length of the input vector - * - * \par Description: - * - * The equation used for the conversion process is: - * - *
- *  dst[n] += (q15_t) src[n] ;   0 <= n < block_size.
- * 
- * - */ -void arm_nn_accumulate_q7_to_q15(q15_t *dst, const q7_t *src, uint32_t block_size); - -/** - * @brief Depthwise conv on an im2col buffer where the input channel equals output channel. - * @param[in] row pointer to row - * @param[in] col pointer to im2col buffer, always consists of 2 columns. - * @param[in] num_ch number of channels - * @param[in] out_shift pointer to per output channel requantization shift parameter. - * @param[in] out_mult pointer to per output channel requantization multiplier parameter. - * @param[in] out_offset output tensor offset. - * @param[in] activation_min minimum value to clamp the output to. Range : int8 - * @param[in] activation_max maximum value to clamp the output to. Range : int8 - * @param[in] kernel_size number of elements in one column. - * @param[in] output_bias per output channel bias. Range : int32 - * @param[out] out pointer to output - * @return The function returns one of the two - * 1. The incremented output pointer for a successful operation or - * 2. NULL if implementation is not available. - * - * @details Supported framework: TensorFlow Lite micro. - */ -q7_t *arm_nn_depthwise_conv_s8_core(const q7_t *row, - const q15_t *col, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t kernel_size, - const int32_t *const output_bias, - q7_t *out); - -/** - * @brief General Matrix-multiplication function with per-channel requantization. - * @param[in] input_row pointer to row operand - * @param[in] input_col pointer to col operand - * @param[in] output_ch number of rows of input_row - * @param[in] col_batches number of column batches. Range: 1 to 4 - * @param[in] output_shift pointer to per output channel requantization shift parameter. - * @param[in] output_mult pointer to per output channel requantization multiplier parameter. - * @param[in] out_offset output tensor offset. - * @param[in] col_offset input tensor(col) offset. - * @param[in] row_offset kernel offset(row). Not used. - * @param[in] out_activation_min minimum value to clamp the output to. Range : int8 - * @param[in] out_activation_max maximum value to clamp the output to. Range : int8 - * @param[in] row_len number of elements in each row - * @param[in] bias per output channel bias. Range : int32 - * @param[in,out] out pointer to output - * @return The function returns one of the two - * 1. The incremented output pointer for a successful operation or - * 2. NULL if implementation is not available. - * - * @details Supported framework: TensorFlow Lite -*/ -q7_t *arm_nn_mat_mult_s8(const q7_t *input_row, - const q7_t *input_col, - const uint16_t output_ch, - const uint16_t col_batches, - const int32_t *output_shift, - const int32_t *output_mult, - const int32_t out_offset, - const int32_t col_offset, - const int32_t row_offset, - const int16_t out_activation_min, - const int16_t out_activation_max, - const uint16_t row_len, - const int32_t *const bias, - q7_t *out); - -/** - * @brief General Matrix-multiplication without requantization for one row & one column - * @param[in] row_elements number of row elements - * @param[in] row_base pointer to row operand - * @param[in] col_base pointer to col operand - * @param[out] sum_col pointer to store sum of column elements - * @param[out] output pointer to store result of multiply-accumulate - * @return The function returns the multiply-accumulated result of the row by column. - * - * @details Pseudo-code - * *output = 0 - * sum_col = 0 - * for (i = 0; i < row_elements; i++) - * *output += row_base[i] * col_base[i] - * sum_col += col_base[i] - * -*/ -arm_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements, - const int8_t *row_base, - const int8_t *col_base, - int32_t *const sum_col, - int32_t *const output); - -/** - * @brief General Matrix-multiplication without requantization for four rows and one column - * @param[in] row_elements number of row elements - * @param[in] offset offset between rows. Can be the same as row_elements. - * For e.g, in a 1x1 conv scenario with stride as 1. - * @param[in] row_base pointer to row operand - * @param[in] col_base pointer to col operand - * @param[out] sum_col pointer to store sum of column elements - * @param[out] output pointer to store result(4 int32's) of multiply-accumulate - * @return The function returns the multiply-accumulated result of the row by column - * - * @details Pseudo-code - * output[0] = 0 - * .. - * output[3] = 0 - * sum_col = 0 - * for (i = 0; i < row_elements; i++) - * output[0] += row_base[i] * col_base[i] - * .. - * output[3] += row_base[i + (row_elements * 3)] * col_base[i] - * sum_col += col_base[i] -*/ -arm_status arm_nn_mat_mul_core_4x_s8(const int32_t row_elements, - const int32_t offset, - const int8_t *row_base, - const int8_t *col_base, - int32_t *const sum_col, - int32_t *const output); - -/** -* @brief General Matrix-multiplication function with per-channel requantization. -* This function assumes: -* - LHS input matrix NOT transposed (nt) -* - RHS input matrix transposed (t) -* -* @note This operation also performs the broadcast bias addition before the requantization -* -* @param[in] lhs Pointer to the LHS input matrix -* @param[in] rhs Pointer to the RHS input matrix -* @param[in] bias Pointer to the bias vector. The length of this vector is equal to the number of output columns (or RHS input rows) -* @param[out] dst Pointer to the output matrix with "m" rows and "n" columns -* @param[in] dst_multipliers Pointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to -* the number of output columns (or RHS input rows) -* @param[in] dst_shifts Pointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to -* the number of output columns (or RHS input rows) -* @param[in] lhs_rows Number of LHS input rows -* @param[in] rhs_rows Number of RHS input rows -* @param[in] rhs_cols Number of LHS/RHS input columns -* @param[in] lhs_offset Offset to be applied to the LHS input value -* @param[in] dst_offset Offset to be applied the output result -* @param[in] activation_min Minimum value to clamp down the output. Range : int8 -* @param[in] activation_max Maximum value to clamp up the output. Range : int8 -* -* @return The function returns ARM_MATH_SUCCESS -* -*/ -arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs, - const q7_t *rhs, - const q31_t *bias, - q7_t *dst, - const int32_t *dst_multipliers, - const int32_t *dst_shifts, - const int32_t lhs_rows, - const int32_t rhs_rows, - const int32_t rhs_cols, - const int32_t lhs_offset, - const int32_t dst_offset, - const int32_t activation_min, - const int32_t activation_max); - -/** - * @brief s8 Vector by Matrix (transposed) multiplication - * - * @param[in] lhs Input left-hand side vector - * @param[in] rhs Input right-hand side matrix (transposed) - * @param[in] bias Input bias - * @param[out] dst Output vector - * @param[in] lhs_offset Offset to be added to the input values of the left-hand side vector. Range: -127 to 128 - * @param[in] rhs_offset Offset to be added to the input values of the right-hand side matrix. Range: -127 to 128 - * @param[in] dst_offset Offset to be added to the output values. Range: -127 to 128 - * @param[in] dst_multiplier Output multiplier - * @param[in] dst_shift Output shift - * @param[in] rhs_cols Number of columns in the right-hand side input matrix - * @param[in] rhs_rows Number of rows in the right-hand side input matrix - * @param[in] activation_min Minimum value to clamp the output to. Range: int8 - * @param[in] activation_max Maximum value to clamp the output to. Range: int8 - * - * @return The function returns ARM_MATH_SUCCESS - * - */ -arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs, - const q7_t *rhs, - const q31_t *bias, - q7_t *dst, - const int32_t lhs_offset, - const int32_t rhs_offset, - const int32_t dst_offset, - const int32_t dst_multiplier, - const int32_t dst_shift, - const int32_t rhs_cols, - const int32_t rhs_rows, - const int32_t activation_min, - const int32_t activation_max); - -/** - * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where - * the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs. - * - * @param[in] lhs Input left-hand side matrix - * @param[in] rhs Input right-hand side matrix (transposed) - * @param[in] lhs_offset LHS matrix offset(input offset). Range: -127 to 128 - * @param[in] num_ch Number of channels in LHS/RHS - * @param[in] out_shift Per channel output shift. Length of vector is equal to number of channels - * @param[in] out_mult Per channel output multiplier. Length of vector is equal to number of channels - * @param[in] out_offset Offset to be added to the output values. Range: -127 to 128 - * @param[in] activation_min Minimum value to clamp the output to. Range: int8 - * @param[in] activation_max Maximum value to clamp the output to. Range: int8 - * @param[in] row_x_col (row_dimension * col_dimension) of LHS/RHS matrix - * @param[in] output_bias Per channel output bias. Length of vector is equal to number of channels - * @param[in] out Output pointer - * - * @return The function returns one of the two - * - Updated output pointer if an implementaiton is available - * - NULL if no implementation is available. - * - * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out - * for the following. - * - Output shift - * - Output multiplier - * - Output bias - * - rhs - */ -q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs, - const q7_t *rhs, - const int32_t lhs_offset, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t row_x_col, - const int32_t *const output_bias, - q7_t *out); - -/** - * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. - * Dimensions are the same for lhs and rhs. - * - * @param[in] lhs Input left-hand side matrix - * @param[in] rhs Input right-hand side matrix (transposed) - * @param[in] lhs_offset LHS matrix offset(input offset). Range: -127 to 128 - * @param[in] num_ch Number of channels in LHS/RHS - * @param[in] out_shift Per channel output shift. Length of vector is equal to number of channels. - * @param[in] out_mult Per channel output multiplier. Length of vector is equal to number of channels. - * @param[in] out_offset Offset to be added to the output values. Range: -127 to 128 - * @param[in] activation_min Minimum value to clamp the output to. Range: int8 - * @param[in] activation_max Maximum value to clamp the output to. Range: int8 - * @param[in] row_x_col (row_dimension * col_dimension) of LHS/RHS matrix - * @param[in] output_bias Per channel output bias. Length of vector is equal to number of channels. - * @param[in] out Output pointer - * - * @return The function returns one of the two - * - Updated output pointer if an implementaiton is available - * - NULL if no implementation is available. - * - * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out - * for the following. - * - Output shift - * - Output multiplier - * - Output bias - * - rhs - */ -q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs, - const q7_t *rhs, - const int32_t lhs_offset, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t row_x_col, - const int32_t *const output_bias, - q7_t *out); - -/** - @brief Read 2 q15 elements and post increment pointer. - @param[in] in_q15 Pointer to pointer that holds address of input. - @return q31 value - */ -__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2_ia(const q15_t **in_q15) -{ - q31_t val; - - memcpy(&val, *in_q15, 4); - *in_q15 += 2; - - return (val); -} - -/** - @brief Read 4 q7 from q7 pointer and post increment pointer. - @param[in] in_q7 Pointer to pointer that holds address of input. - @return q31 value - */ -__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4_ia(const q7_t **in_q7) -{ - q31_t val; - memcpy(&val, *in_q7, 4); - *in_q7 += 4; - - return (val); -} - -/** - @brief Read 2 q15 from q15 pointer. - @param[in] in_q15 pointer to address of input. - @return q31 value - */ -__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2(const q15_t *in_q15) -{ - q31_t val; - memcpy(&val, in_q15, 4); - - return (val); -} - -/** - @brief Read 4 q7 values. - @param[in] in_q7 pointer to address of input. - @return q31 value - */ -__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4(const q7_t *in_q7) -{ - q31_t val; - memcpy(&val, in_q7, 4); - - return (val); -} - -/** - * @brief memset optimized for MVE - * @param[in, out] dst Destination pointer - * @param[in] val Value to set - * @param[in] block_size Number of bytes to copy. - * - */ -__STATIC_FORCEINLINE void arm_memset_q7(q7_t *dst, - const q7_t val, - uint32_t block_size) -{ -#if defined(ARM_MATH_MVEI) - __asm volatile ( - " vdup.8 q0, %[set_val] \n" - " wlstp.8 lr, %[cnt], 1f \n" - "2: \n" - " vstrb.8 q0, [%[in]], 16 \n" - " letp lr, 2b \n" - "1: \n" - :[in] "+r"(dst) - :[cnt] "r"(block_size), [set_val] "r"(val) - :"q0", "memory", "r14"); -#else - memset(dst, val, block_size); -#endif -} - -#if defined (ARM_MATH_DSP) - -/** - * @brief read and expand one q7 word into two q15 words - */ - -__STATIC_FORCEINLINE const q7_t *read_and_pad(const q7_t *source, q31_t * out1, q31_t * out2) -{ - q31_t inA = arm_nn_read_q7x4_ia(&source); - q31_t inAbuf1 = __SXTB16(__ROR((uint32_t)inA, 8)); - q31_t inAbuf2 = __SXTB16(inA); - -#ifndef ARM_MATH_BIG_ENDIAN - *out2 = (int32_t) (__PKHTB (inAbuf1, inAbuf2, 16)); - *out1 = (int32_t) (__PKHBT (inAbuf2, inAbuf1, 16)); -#else - *out1 = (int32_t) (__PKHTB(inAbuf1, inAbuf2, 16)); - *out2 = (int32_t) (__PKHBT(inAbuf2, inAbuf1, 16)); -#endif - - return source; -} - -/** - * @brief read and expand one q7 word into two q15 words with reordering - */ - -__STATIC_FORCEINLINE const q7_t *read_and_pad_reordered(const q7_t *source, q31_t * out1, q31_t * out2) -{ - q31_t inA = arm_nn_read_q7x4_ia(&source); -#ifndef ARM_MATH_BIG_ENDIAN - *out2 = __SXTB16(__ROR((uint32_t)inA, 8)); - *out1 = __SXTB16(inA); -#else - *out1 = __SXTB16(__ROR((uint32_t)inA, 8)); - *out2 = __SXTB16(inA); -#endif - - return source; -} - -/** - * @brief read and expand one q7 word into two q15 words with reordering and add an offset - */ -__STATIC_FORCEINLINE const q7_t *read_and_pad_reordered_with_offset(const q7_t *source, q31_t * out1, q31_t * out2, q31_t offset) -{ - q31_t inA = arm_nn_read_q7x4_ia(&source); - -#ifndef ARM_MATH_BIG_ENDIAN - *out2 = __SXTB16(__ROR((uint32_t)inA, 8)); - *out1 = __SXTB16(inA); -#else - *out1 = __SXTB16(__ROR((uint32_t)inA, 8)); - *out2 = __SXTB16(inA); -#endif - *out1 = __QADD16(*out1,offset); - *out2 = __QADD16(*out2,offset); - - return source; -} - -#endif - - - -/** - * @defgroup NNBasicMath Basic Math Functions for Neural Network Computation - * - * Basic Math Functions for Neural Network Computation - * - */ - -/** - * @brief q7 vector multiplication with variable output shifts - * @param[in] *pSrcA pointer to the first input vector - * @param[in] *pSrcB pointer to the second input vector - * @param[out] *pDst pointer to the output vector - * @param[in] out_shift amount of right-shift for output - * @param[in] blockSize number of samples in each vector - * @return none. - * - * Scaling and Overflow Behavior: - * \par - * The function uses saturating arithmetic. - * Results outside of the allowable q15 range [0x8000 0x7FFF] will be saturated. - */ - -void arm_nn_mult_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - const uint16_t out_shift, - uint32_t blockSize); - -/** - * @brief q7 vector multiplication with variable output shifts - * @param[in] *pSrcA pointer to the first input vector - * @param[in] *pSrcB pointer to the second input vector - * @param[out] *pDst pointer to the output vector - * @param[in] out_shift amount of right-shift for output - * @param[in] blockSize number of samples in each vector - * @return none. - * - * Scaling and Overflow Behavior: - * \par - * The function uses saturating arithmetic. - * Results outside of the allowable q7 range [0x80 0x7F] will be saturated. - */ - -void arm_nn_mult_q7( - q7_t * pSrcA, - q7_t * pSrcB, - q7_t * pDst, - const uint16_t out_shift, - uint32_t blockSize); - -/** - * @brief macro for adding rounding offset - */ -#ifndef ARM_NN_TRUNCATE - #define NN_ROUND(out_shift) ( (0x1u << out_shift) >> 1 ) -#else - #define NN_ROUND(out_shift) 0 -#endif - -// Macros for shortening quantization functions' names and avoid long lines -#define MUL_SAT(a, b) arm_nn_doubling_high_mult((a), (b)) -#define MUL_SAT_MVE(a, b) arm_doubling_high_mult_mve_32x4((a), (b)) -#define MUL_POW2(a, b) arm_nn_mult_by_power_of_two((a), (b)) - - -#define DIV_POW2(a, b) arm_nn_divide_by_power_of_two((a), (b)) -#define DIV_POW2_MVE(a, b) arm_divide_by_power_of_two_mve((a), (b)) - - -#define EXP_ON_NEG(x) arm_nn_exp_on_negative_values((x)) -#define ONE_OVER1(x) arm_nn_one_over_one_plus_x_for_x_in_0_1((x)) - -/** - * @brief Saturating doubling high multiply. Result matches - * NEON instruction VQRDMULH. - * @param[in] m1 Multiplicand. Range: {Q31_MIN, Q31_MAX} - * @param[in] m2 Multiplier. Range: {Q31_MIN, Q31_MAX} - * @return Result of multiplication. - * - */ -__STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult(const q31_t m1, const q31_t m2) -{ - q31_t result = 0; - // Rounding offset to add for a right shift of 31 - q63_t mult = 1 << 30; - - if ((m1 < 0) ^ (m2 < 0)) - { - mult = 1 - mult; - } - // Gets resolved as a SMLAL instruction - mult = mult + (q63_t)m1 * m2; - - // Utilize all of the upper 32 bits. This is the doubling step - // as well. - result = (int32_t) (mult / (1ll << 31)); - - if ((m1 == m2) && (m1 == (int32_t)Q31_MIN)) - { - result = Q31_MAX; - } - return result; -} - -/** - * @brief Doubling high multiply without saturation. This is intended - * for requantization where the scale is a positive integer - * - * @param[in] m1 Multiplicand. Range: {Q31_MIN, Q31_MAX} - * @param[in] m2 Multiplier Range: {Q31_MIN, Q31_MAX} - * @return Result of multiplication. - * @note The result of this matches that of neon instruction - * VQRDMULH for m1 in range {Q31_MIN, Q31_MAX} and m2 in - * range {Q31_MIN + 1, Q31_MAX}. Saturation occurs when - * m1 equals m2 equals Q31_MIN and that is not handled by - * this function. - * - */ -__STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult_no_sat(const q31_t m1, const q31_t m2) -{ - q31_t result = 0; - union arm_nn_long_long mult; - - // Rounding offset to add for a right shift of 31 - mult.word.low = 1 << 30; - mult.word.high = 0; - - // Gets resolved as a SMLAL instruction - mult.long_long = mult.long_long + (q63_t)m1 * m2; - - // Utilize all of the upper 32 bits. This is the doubling step - // as well. - result = (int32_t)(mult.long_long >> 31); - - return result; -} - -/** - * @brief Rounding divide by power of two. - * @param[in] dividend - Dividend - * @param[in] exponent - Divisor = power(2, exponent) - * Range: [0, 31] - * @return Rounded result of division. Midpoint is rounded away from zero. - * - */ -__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent) -{ - q31_t result = 0; - const q31_t remainder_mask = (1 << exponent) - 1; - int32_t remainder = remainder_mask & dividend; - - // Basic division - result = dividend >> exponent; - - // Adjust 'result' for rounding (mid point away from zero) - q31_t threshold = remainder_mask >> 1; - if (result < 0) - { - threshold++; - } - if (remainder > threshold) - { - result++; - } - - return result; -} - -/** - * @brief Requantize a given value. - * @param[in] val Value to be requantized - * @param[in] multiplier multiplier. Range {Q31_MIN + 1, Q32_MAX} - * @param[in] shift left or right shift for 'val * multiplier' - * - * @return Returns (val * multiplier)/(2 ^ shift) - * - */ -__STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift) -{ - return arm_nn_divide_by_power_of_two( - arm_nn_doubling_high_mult_no_sat(val * (1 << LEFT_SHIFT(shift)), multiplier), - RIGHT_SHIFT(shift)); -} - -/** - * @brief memcpy optimized for MVE - * @param[in, out] dst Destination pointer - * @param[in] src Source pointer. - * @param[in] block_size Number of bytes to copy. - * - */ -__STATIC_FORCEINLINE void arm_memcpy_q7(q7_t *__RESTRICT dst, - const q7_t *__RESTRICT src, - uint32_t block_size) -{ -#if defined(ARM_MATH_MVEI) - __asm volatile ( - " wlstp.8 lr, %[cnt], 1f \n" - "2: \n" - " vldrb.8 q0, [%[in]], 16 \n" - " vstrb.8 q0, [%[out]], 16 \n" - " letp lr, 2b \n" - "1: \n" - :[in] "+r"(src) - ,[out] "+r"(dst) - :[cnt] "r"(block_size) - :"q0", "memory", "r14"); -#else - memcpy(dst, src, block_size); -#endif -} - -#if defined(ARM_MATH_MVEI) -/** - * @brief Vector saturating doubling high multiply returning high half. - * @param[in] m1 Multiplicand - * @param[in] m2 Multiplier - * @return Result of multiplication. - * - */ -__STATIC_FORCEINLINE int32x4_t arm_doubling_high_mult_mve(const int32x4_t m1, const q31_t m2) -{ - return vqrdmulhq_n_s32(m1, m2); -} - -/** - * @brief Vector rounding divide by power of two. - * @param[in] dividend - Dividend vector - * @param[in] exponent - Divisor = power(2, exponent) - * Range: [0, 31] - * @return Rounded result of division. Midpoint is rounded away from zero. - * - */ -__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve(const int32x4_t dividend, const q31_t exponent) -{ - const int32x4_t shift = vdupq_n_s32(-exponent); - const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31); - const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup); - return vrshlq_s32(fixed_up_dividend, shift); -} - -/** - * @brief Requantize a given vector. - * @param[in] val Vector to be requantized - * @param[in] multiplier multiplier - * @param[in] shift shift - * - * @return Returns (val * multiplier)/(2 ^ shift) - * - */ -__STATIC_FORCEINLINE int32x4_t arm_requantize_mve(const int32x4_t val, const q31_t multiplier, const q31_t shift) -{ - return arm_divide_by_power_of_two_mve( - arm_doubling_high_mult_mve(vshlq_s32(val, vdupq_n_s32(LEFT_SHIFT(shift))), multiplier), - RIGHT_SHIFT(shift)); -} - -__STATIC_FORCEINLINE int32x4_t arm_doubling_high_mult_mve_32x4(const int32x4_t m1, const int32x4_t m2) -{ - return vqrdmulhq_s32(m1, m2); -} - -__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve_32x4(const int32x4_t dividend, const int32x4_t exponent) -{ - const int32x4_t shift = -exponent; - const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31); - const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup); - return vrshlq_s32(fixed_up_dividend, shift); -} - -__STATIC_FORCEINLINE int32x4_t arm_requantize_mve_32x4(const int32x4_t val, const int32x4_t multiplier, const int32x4_t shift) -{ - const int32x4_t zz = vdupq_n_s32(0); - const mve_pred16_t p = vcmpgtq_n_s32(shift, 0); - - const int32x4_t left_shift = vpselq_s32(shift, zz, p); - const int32x4_t right_shift = -vpselq_s32(zz, shift, p); - - return arm_divide_by_power_of_two_mve_32x4(arm_doubling_high_mult_mve_32x4(vshlq_s32(val, left_shift), multiplier), right_shift); -} -#endif - -// @note The following functions are used only for softmax layer, scaled bits = 5 assumed - -__STATIC_FORCEINLINE int32_t arm_nn_exp_on_negative_values(int32_t val) -{ - int32_t mask = 0; - int32_t shift = 24; - - const int32_t val_mod_minus_quarter = (val & ((1 << shift) - 1)) - (1 << shift); - const int32_t remainder = val_mod_minus_quarter - val; - const int32_t x = (val_mod_minus_quarter << 5) + (1 << 28); - const int32_t x2 = MUL_SAT(x, x); - - int32_t result = 1895147668 + MUL_SAT(1895147668, x + - DIV_POW2(MUL_SAT(DIV_POW2(MUL_SAT(x2, x2), 2) + MUL_SAT(x2, x), 715827883) + x2, 1)); - -#define SELECT_IF_NON_ZERO(x) \ -{ \ - mask = MASK_IF_NON_ZERO(remainder & (1 << shift++)); \ - result = SELECT_USING_MASK(mask, MUL_SAT(result, x), result); \ -} - - SELECT_IF_NON_ZERO(1672461947) - SELECT_IF_NON_ZERO(1302514674) - SELECT_IF_NON_ZERO(790015084) - SELECT_IF_NON_ZERO(290630308) - SELECT_IF_NON_ZERO(39332535) - SELECT_IF_NON_ZERO(720401) - SELECT_IF_NON_ZERO(242) - -#undef SELECT_IF_NON_ZERO - - mask = MASK_IF_ZERO(val); - return SELECT_USING_MASK(mask, Q31_MAX, result); -} - -__STATIC_FORCEINLINE q31_t arm_nn_mult_by_power_of_two(const int32_t val, const int32_t exp) -{ - const int32_t thresh = ((1 << (31 - exp)) - 1); - int32_t result = val << exp; - result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), Q31_MAX, result); - result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), Q31_MIN, result); - return result; -} - -__STATIC_FORCEINLINE int32_t arm_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val) -{ - const int64_t sum = (int64_t)val + (int64_t)Q31_MAX; - const int32_t half_denominator = (int32_t)((sum + (sum >= 0 ? 1 : -1)) / 2L); - int32_t x = 1515870810 + MUL_SAT(half_denominator, -1010580540); - - const int32_t shift = (1 << 29); - x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2); - x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2); - x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2); - - return MUL_POW2(x, 1); -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c deleted file mode 100644 index 7f46074ec..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_activations_q15.c - * Description: Q15 neural network activation function using direct table look-up - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_common_tables.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Acti - * @{ - */ - -/** - * @brief neural network activation function using direct table look-up - * - * @note Refer header file for details. - * - */ - -void arm_nn_activations_direct_q15(q15_t * data, uint16_t size, uint16_t int_width, arm_nn_activation_type type) -{ - uint16_t i = size; - q15_t *pIn = data; - q15_t *pOut = data; - uint16_t shift_size = 8 + 3 - int_width; - uint32_t bit_mask = 0x7FF >> int_width; - uint32_t full_frac = bit_mask + 1; - const q15_t *lookup_table; - - switch (type) - { - case ARM_SIGMOID: - lookup_table = sigmoidTable_q15; - break; - case ARM_TANH: - default: - lookup_table = tanhTable_q15; - break; - } - - while (i) - { - q15_t out; - q15_t in = *pIn++; - q15_t frac = (uint32_t) in & bit_mask; - q15_t value = lookup_table[(uint8_t)(in >> shift_size)]; - if ((in >> shift_size) != 0x7f) - { - q15_t value2 = lookup_table[(uint8_t)(1 + ((uint8_t)(in >> shift_size)))]; - /* doing the interpolation here for better accuracy */ - out = ((q31_t) (full_frac - frac) * value + (q31_t) value2 * frac) >> shift_size; - } else - { - /* the largest positive value does not have a right side for linear interpolation */ - out = value; - } - - *pOut++ = out; - i--; - } - -} - -/** - * @} end of Acti group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c deleted file mode 100644 index 40ab67880..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_activations_q7.c - * Description: Q7 neural network activation function using direct table look-up - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_common_tables.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Acti - * @{ - */ - - /** - * @brief Q7 neural network activation function using direct table look-up - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 - * @param[in] type type of activation functions - * - * @details - * - * This is the direct table look-up approach. - * - * Assume here the integer part of the fixed-point is <= 3. - * More than 3 just not making much sense, makes no difference with - * saturation followed by any of these activation functions. - */ - -void arm_nn_activations_direct_q7(q7_t * data, uint16_t size, uint16_t int_width, arm_nn_activation_type type) -{ - uint16_t i = size; - q7_t *pIn = data; - q7_t *pOut = data; - q7_t in; - q7_t out; - uint16_t shift_size = 3 - int_width; - const q7_t *lookup_table; - switch (type) - { - case ARM_SIGMOID: - lookup_table = sigmoidTable_q7; - break; - case ARM_TANH: - default: - lookup_table = tanhTable_q7; - break; - } - while (i) - { - in = *pIn++; - out = lookup_table[(uint8_t) (in >> shift_size)]; - *pOut++ = out; - i--; - } -} - -/** - * @} end of Acti group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c deleted file mode 100644 index 6114aa8d6..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_relu6_s8.c - * Description: Basic s8 version of ReLU6 - * - * $Date: Spetember 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Acti - * @{ - */ - - /* - * Basic ReLU6 function - * - * Refer to header file for details. - * - */ - -void arm_relu6_s8(q7_t *data, uint16_t size) -{ - int32_t i; - - for (i = 0; i < size; i++) - { - int32_t ip = data[i]; - - ip = MAX(ip, 0); - data[i] = MIN(ip, 6); - } -} - -/** - * @} end of Acti group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c deleted file mode 100644 index 711518d9c..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_relu_q15.c - * Description: Q15 version of ReLU - * - * $Date: February 27, 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Acti - * @{ - */ - -/** - * @brief Q15 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * - * @details - * - * Optimized relu with QSUB instructions. - * - */ - -void arm_relu_q15(q15_t *data, uint16_t size) -{ - -#if defined(ARM_MATH_DSP) - /* Run the following code for M cores with DSP extension */ - - uint16_t i = size >> 1; - q15_t *input = data; - q15_t *output = data; - q31_t in; - q31_t buf; - q31_t mask; - - while (i) - { - in = read_q15x2_ia(&input); - - /* extract the first bit */ - buf = __ROR(in & 0x80008000, 15); - - /* if MSB=1, mask will be 0xFF, 0x0 otherwise */ - mask = __QSUB16(0x00000000, buf); - - write_q15x2_ia(&output, in & (~mask)); - i--; - } - - if (size & 0x1) - { - if (*input < 0) - { - *input = 0; - } - input++; - } -#else - /* Run the following code as reference implementation for M cores without DSP extension */ - uint16_t i; - - for (i = 0; i < size; i++) - { - if (data[i] < 0) - data[i] = 0; - } - -#endif /* ARM_MATH_DSP */ -} - -/** - * @} end of Acti group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c deleted file mode 100644 index e007fd16b..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_relu_q7.c - * Description: Q7 version of ReLU - * - * $Date: May 29, 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Acti - * @{ - */ - - /** - * @brief Q7 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * - * @details - * - * Optimized relu with QSUB instructions. - * - */ - -void arm_relu_q7(q7_t *data, uint16_t size) -{ - -#if defined(ARM_MATH_DSP) - /* Run the following code for M cores with DSP extension */ - - uint16_t i = size >> 2; - q7_t *input = data; - q7_t *output = data; - q31_t in; - q31_t buf; - q31_t mask; - - while (i) - { - in = read_q7x4_ia(&input); - - /* extract the first bit */ - buf = (int32_t)__ROR((uint32_t)in & 0x80808080, 7); - - /* if MSB=1, mask will be 0xFF, 0x0 otherwise */ - mask = __QSUB8(0x00000000, buf); - - write_q7x4_ia(&output, in & (~mask)); - - i--; - } - - i = size & 0x3; - while (i) - { - if (*input < 0) - { - *input = 0; - } - input++; - i--; - } - -#else - /* Run the following code as reference implementation for cores without DSP extension */ - - uint16_t i; - - for (i = 0; i < size; i++) - { - if (data[i] < 0) - data[i] = 0; - } - -#endif -} - -/** - * @} end of Acti group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c deleted file mode 100644 index 6248c153c..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_elementwise_add_s8 - * Description: Element wise add - * - * $Date: July 31, 2020 - * $Revision: V.2.5.1 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" -#if defined(ARM_MATH_MVEI) -#include "arm_helium_utils.h" -#endif - -#if defined(ARM_MATH_MVEI) -#define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT) \ - __INPUT_V = arm_doubling_high_mult_mve(__INPUT_V, __MULT); \ - __INPUT_V = arm_divide_by_power_of_two_mve(__INPUT_V, -__SHIFT); -#endif - - -/** - * @note The *_no_sat API does not mean that the input not saturated, Since - * __MULT is a positive integer, it is saturated. The API definition - * has more info about it. - */ -#define SAT_INPUT(__INPUT, __MULT, __SHIFT) \ - __INPUT = arm_nn_doubling_high_mult_no_sat(__INPUT, __MULT); \ - __INPUT = arm_nn_divide_by_power_of_two(__INPUT, -__SHIFT); - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup BasicMath - * @{ - */ - -/* - * s8 element wise add - * - * Refer header file for details. - * - */ - -/* Note: __SHIFT is expected to be <=0 */ - - -arm_status -arm_elementwise_add_s8(const int8_t *input_1_vect, - const int8_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_1_mult, - const int32_t input_1_shift, - const int32_t input_2_offset, - const int32_t input_2_mult, - const int32_t input_2_shift, - const int32_t left_shift, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const uint32_t block_size) -{ -#if defined(ARM_MATH_MVEI) - int32_t count = (int32_t)block_size; - - while (count > 0) - { - int32x4_t vect_1; - int32x4_t vect_2; - - mve_pred16_t p = vctp32q((uint32_t)count); - - vect_1 = vldrbq_z_s32(input_1_vect, p); - vect_2 = vldrbq_z_s32(input_2_vect, p); - - vect_1 = vaddq_s32(vect_1, vdupq_n_s32(input_1_offset)); - vect_2 = vaddq_s32(vect_2, vdupq_n_s32(input_2_offset)); - - vect_1 = vshlq_r_s32(vect_1, left_shift); - vect_2 = vshlq_r_s32(vect_2, left_shift); - - SAT_INPUT_VECT(vect_1, input_1_mult, input_1_shift); - SAT_INPUT_VECT(vect_2, input_2_mult, input_2_shift); - - vect_1 = vaddq_s32(vect_1, vect_2); - SAT_INPUT_VECT(vect_1, out_mult, out_shift); - - vect_1 = vaddq_n_s32(vect_1, out_offset); - - vect_1 = vmaxq_s32(vect_1, vdupq_n_s32(out_activation_min)); - vect_1 = vminq_s32(vect_1, vdupq_n_s32(out_activation_max)); - - input_1_vect += 4; - input_2_vect += 4; - vstrbq_p_s32(output, vect_1, p); - - output += 4; - count -= 4; - } -#else - uint32_t loop_count; - int32_t input_1; - int32_t input_2; - int32_t sum; - -#if defined(ARM_MATH_DSP) - int32_t a_1, b_1, a_2, b_2; - - int32_t offset_1_packed, offset_2_packed; - - int8_t r1, r2, r3, r4; - - offset_1_packed = (input_1_offset << 16U) | (input_1_offset & 0x0FFFFL); - offset_2_packed = (input_2_offset << 16U) | (input_2_offset & 0x0FFFFL); - - loop_count = block_size >> 2; - - while (loop_count > 0U) - { - /* 4 outputs are calculated in one loop. The order of calculation is follows the order of output sign extension - intrinsic */ - input_1_vect = read_and_pad_reordered(input_1_vect, &b_1, &a_1); - input_2_vect = read_and_pad_reordered(input_2_vect, &b_2, &a_2); - - a_1 = __SADD16(a_1, offset_1_packed); - b_1 = __SADD16(b_1, offset_1_packed); - - a_2 = __SADD16(a_2, offset_2_packed); - b_2 = __SADD16(b_2, offset_2_packed); - - /* Sum 1 */ - input_1 = (int16_t)(b_1 & 0x0FFFFL) << left_shift; - SAT_INPUT(input_1, input_1_mult, input_1_shift); - - input_2 = (int16_t)(b_2 & 0x0FFFFL) << left_shift; - SAT_INPUT(input_2, input_2_mult, input_2_shift); - - sum = input_1 + input_2; - SAT_INPUT(sum, out_mult, out_shift); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - r1 = (q7_t)sum; - - /* Sum 3 */ - input_1 = (int16_t)((b_1 >> 16) & 0x0FFFFL) << left_shift; - SAT_INPUT(input_1, input_1_mult, input_1_shift); - - input_2 = (int16_t)((b_2 >> 16) & 0x0FFFFL) << left_shift; - SAT_INPUT(input_2, input_2_mult, input_2_shift); - - sum = input_1 + input_2; - SAT_INPUT(sum, out_mult, out_shift); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - r3 = (q7_t)sum; - - /* Sum 2 */ - input_1 = (int16_t)(a_1 & 0x0FFFFL) << left_shift; - SAT_INPUT(input_1, input_1_mult, input_1_shift); - - input_2 = (int16_t)(a_2 & 0x0FFFFL) << left_shift; - SAT_INPUT(input_2, input_2_mult, input_2_shift); - - sum = input_1 + input_2; - SAT_INPUT(sum, out_mult, out_shift); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - r2 = (q7_t)sum; - - /* Sum 4 */ - input_1 = (int16_t)((a_1 >> 16) & 0x0FFFFL) << left_shift; - SAT_INPUT(input_1, input_1_mult, input_1_shift); - - input_2 = (int16_t)((a_2 >> 16) & 0x0FFFFL) << left_shift; - SAT_INPUT(input_2, input_2_mult, input_2_shift); - - sum = input_1 + input_2; - SAT_INPUT(sum, out_mult, out_shift); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - r4 = (q7_t)sum; - - write_q7x4_ia(&output, __PACKq7(r1, r2, r3, r4)); - - loop_count--; - } - - loop_count = block_size & 0x3; -#else - loop_count = block_size; -#endif - - while (loop_count > 0U) - { - /* C = A + B */ - - input_1 = (*input_1_vect++ + input_1_offset) << left_shift; - input_2 = (*input_2_vect++ + input_2_offset) << left_shift; - - input_1 = arm_nn_doubling_high_mult(input_1, input_1_mult); - input_1 = arm_nn_divide_by_power_of_two(input_1, -input_1_shift); - - input_2 = arm_nn_doubling_high_mult(input_2, input_2_mult); - input_2 = arm_nn_divide_by_power_of_two(input_2, -input_2_shift); - - sum = input_1 + input_2; - SAT_INPUT(sum, out_mult, out_shift); - sum += out_offset; - - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - - *output++ = (q7_t)sum; - - /* Decrement loop counter */ - loop_count--; - } - -#endif /* ARM_MATH_MVEI */ - - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of BasicMath group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c deleted file mode 100644 index 4923e9c32..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_elementwise_mul_s8 - * Description: Element wise multiplication - * - * $Date: May 29, 2020 - * $Revision: V.1.0.3 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup BasicMath - * @{ - */ - -/** - * @brief s8 element wise multiplication of two vectors - * - * @note Refer header file for details. - * - */ - -arm_status -arm_elementwise_mul_s8(const int8_t *input_1_vect, - const int8_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_2_offset, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const uint32_t block_size) -{ - - int32_t loop_count; -#if defined(ARM_MATH_MVEI) - - loop_count = (block_size + 3) / 4; - uint32_t num_elements = block_size; - - for (int i = 0; i < loop_count; i++) - { - mve_pred16_t p = vctp32q(num_elements); - - int32x4_t input_1 = vldrbq_z_s32(input_1_vect, p); - input_1 = vaddq_n_s32(input_1, input_1_offset); - - int32x4_t input_2 = vldrbq_z_s32(input_2_vect, p); - input_2 = vaddq_n_s32(input_2, input_2_offset); - - int32x4_t res_0 = vmulq_s32(input_1, input_2); - - res_0 = arm_requantize_mve_32x4(res_0, vdupq_n_s32(out_mult), vdupq_n_s32(out_shift)); - - res_0 += vdupq_n_s32(out_offset); - - res_0 = vmaxq_s32(res_0, vdupq_n_s32(out_activation_min)); - res_0 = vminq_s32(res_0, vdupq_n_s32(out_activation_max)); - - vstrbq_p_s32(output, res_0, p); - input_1_vect += 4; - input_2_vect += 4; - output += 4; - num_elements -= 4; - } - -#else - int32_t input_1; - int32_t input_2; - int32_t mul_res; - -#if defined(ARM_MATH_DSP) - int32_t a_1, b_1, a_2, b_2; - - int32_t offset_1_packed, offset_2_packed; - - int8_t r1, r2, r3, r4; - - offset_1_packed = (input_1_offset << 16U) | (input_1_offset & 0x0FFFFL); - offset_2_packed = (input_2_offset << 16U) | (input_2_offset & 0x0FFFFL); - - loop_count = block_size >> 2; - - while (loop_count > 0U) - { - /* 4 outputs are calculated in one loop. The order of calculation is follows the order of output sign extension - intrinsic */ - input_1_vect = read_and_pad_reordered(input_1_vect, &b_1, &a_1); - input_2_vect = read_and_pad_reordered(input_2_vect, &b_2, &a_2); - - a_1 = __SADD16(a_1, offset_1_packed); - b_1 = __SADD16(b_1, offset_1_packed); - - a_2 = __SADD16(a_2, offset_2_packed); - b_2 = __SADD16(b_2, offset_2_packed); - - /* Mul 1 */ - input_1 = (int16_t)(b_1 & 0x0FFFFL); - input_2 = (int16_t)(b_2 & 0x0FFFFL); - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; - - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - r1 = (q7_t)mul_res; - - /* Mul 3 */ - input_1 = (int16_t)((b_1 >> 16U) & 0x0FFFFL); - input_2 = (int16_t)((b_2 >> 16U) & 0x0FFFFL); - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - r3 = (q7_t)mul_res; - - /* Mul 2 */ - input_1 = (int16_t)(a_1 & 0x0FFFFL); - input_2 = (int16_t)(a_2 & 0x0FFFFL); - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - r2 = (q7_t)mul_res; - - /* Mul 4 */ - input_1 = (int16_t)((a_1 >> 16U) & 0x0FFFFL); - input_2 = (int16_t)((a_2 >> 16U) & 0x0FFFFL); - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - r4 = (q7_t)mul_res; - - write_q7x4_ia(&output, __PACKq7(r1, r2, r3, r4)); - - loop_count--; - } - - loop_count = block_size & 0x3; -#else - loop_count = block_size; -#endif - - while (loop_count > 0U) - { - /* C = A * B */ - - input_1 = *input_1_vect++ + input_1_offset; - input_2 = *input_2_vect++ + input_2_offset; - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; - - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - - *output++ = (q7_t)mul_res; - - /* Decrement loop counter */ - loop_count--; - } -#endif - return ARM_MATH_SUCCESS; -} - -/** - * @} end of BasicMath group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c deleted file mode 100644 index 3c71bcc1f..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_concatenation_s8_w.c - * Description: s8 version of concatenation along the W axis - * - * $Date: October 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Concatenation - * @{ - */ - - /* - * s8 version of concatenation along the W axis - * - * Refer to header file for details. - * - */ -void arm_concatenation_s8_w(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint32_t offset_w) -{ - const uint32_t input_copy_size = input_x * input_y * input_z * input_w; - - output += offset_w * (input_x * input_y * input_z); - - memcpy(output, input, input_copy_size); -} - -/** - * @} end of Concatenation group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c deleted file mode 100644 index 555893e9c..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_concatenation_s8_x.c - * Description: s8 version of concatenation along the X axis - * - * $Date: October 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Concatenation - * @{ - */ - - /* - * s8 version of concatenation along the X axis - * - * Refer to header file for details. - * - */ -void arm_concatenation_s8_x(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_x, - const uint32_t offset_x) -{ - const uint32_t num_iterations = input_y * input_z * input_w; - - output += offset_x; - - uint32_t i; - - // Copy per row - for (i = 0; i < num_iterations; ++i) - { - memcpy(output, input, input_x); - input += input_x; - output += output_x; - } -} - -/** - * @} end of Concatenation group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c deleted file mode 100644 index ab7cdeba0..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_concatenation_s8_y.c - * Description: s8 version of concatenation along the Y axis - * - * $Date: October 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Concatenation - * @{ - */ - - /* - * s8 version of concatenation along the Y axis - * - * Refer to header file for details. - * - */ -void arm_concatenation_s8_y(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_y, - const uint32_t offset_y) -{ - const uint32_t num_iterations = input_z * input_w; - const uint32_t input_copy_size = input_x * input_y; - const uint32_t output_stride = input_x * output_y; - - output += offset_y * input_x; - uint32_t i; - - // Copy per tile - for (i = 0; i < num_iterations; ++i) - { - memcpy(output, input, input_copy_size); - input += input_copy_size; - output += output_stride; - } -} - -/** - * @} end of Concatenation group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c deleted file mode 100644 index 2ab005f73..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_concatenation_s8_z.c - * Description: s8 version of concatenation along the Z axis - * - * $Date: October 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Concatenation - * @{ - */ - - /* - * s8 version of concatenation along the Z axis - * - * Refer to header file for details. - * - */ -void arm_concatenation_s8_z(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_z, - const uint32_t offset_z) -{ - const uint32_t input_copy_size = input_x * input_y * input_z; - const uint32_t output_stride = input_x * input_y * output_z; - - output += offset_z * (input_x * input_y); - - uint32_t i; - - for (i = 0; i < input_w; ++i) - { - memcpy(output, input, input_copy_size); - input += input_copy_size; - output += output_stride; - } -} - -/** - * @} end of Concatenation group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c deleted file mode 100644 index bda119842..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_1_x_n_s8.c - * Description: s8 version of 1xN convolution using symmetric quantization. - * - * $Date: July 27, 2020 - * $Revision: V.2.0.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ -#include "arm_math.h" -#include "arm_nn_types.h" -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * 1xN s8 convolution function. - * - * Refer header file for details. - * - */ - -arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context* ctx, - const cmsis_nn_conv_params* conv_params, - const cmsis_nn_per_channel_quant_params* quant_params, - const cmsis_nn_dims* input_dims, - const q7_t *input_data, - const cmsis_nn_dims* filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims* bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims* output_dims, - q7_t *output_data) -{ - (void)bias_dims; - arm_status status = ARM_MATH_SUCCESS; - if (output_dims->w % 4 != 0) - { - status = ARM_MATH_SIZE_MISMATCH; - goto out; - } - -#if defined(ARM_MATH_MVEI) - q15_t *buffer_a = (q15_t *)ctx->buf; - - const uint16_t input_x = input_dims->w; - const uint16_t kernel_x = filter_dims->w; - const uint16_t output_x = output_dims->w; - const uint16_t output_ch = output_dims->c; - const uint16_t input_ch = input_dims->c; - const uint16_t pad_x = conv_params->padding.w; - const uint16_t stride_x = conv_params->stride.w; - - const int32_t input_offset = conv_params->input_offset; - const int32_t out_offset = conv_params->output_offset; - const int32_t out_activation_min = conv_params->activation.min; - const int32_t out_activation_max = conv_params->activation.max; - int32_t *output_mult = quant_params->multiplier; - int32_t *output_shift = quant_params->shift; - - for (int i_out_x = 0; i_out_x <= (output_x - 4); i_out_x += 4) - { - int32_t input_begin_idx[4]; - int32_t ker_begin_idx[4]; - int32_t ker_end_idx[4]; - - for (int i = 0; i < 4; i++) - { - const int32_t est_input_x_idx = stride_x * (i_out_x + i) - pad_x; - input_begin_idx[i] = MAX(0, est_input_x_idx); - ker_begin_idx[i] = MAX(0, -est_input_x_idx); - ker_end_idx[i] = MIN(kernel_x, input_x - est_input_x_idx); - } - - for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - int32x4_t s_offset; - int32_t acc[4]; - if ((ker_begin_idx[0] != 0) || (ker_end_idx[3] != kernel_x)) - { - int32_t sum_row[4]; - - (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[0] - ker_begin_idx[0]) * input_ch, - input_data + input_begin_idx[0] * input_ch, - filter_data + (input_ch * kernel_x * i_out_ch) + (ker_begin_idx[0] * input_ch), - &sum_row[0], - &acc[0]); - (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[1] - ker_begin_idx[1]) * input_ch, - input_data + input_begin_idx[1] * input_ch, - filter_data + (input_ch * kernel_x * i_out_ch) + (ker_begin_idx[1] * input_ch), - &sum_row[1], - &acc[1]); - - (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[2] - ker_begin_idx[2]) * input_ch, - input_data + input_begin_idx[2] * input_ch, - filter_data + (input_ch * kernel_x * i_out_ch) + (ker_begin_idx[2] * input_ch), - &sum_row[2], - &acc[2]); - - (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[3] - ker_begin_idx[3]) * input_ch, - input_data + input_begin_idx[3] * input_ch, - filter_data + (input_ch * kernel_x * i_out_ch) + (ker_begin_idx[3] * input_ch), - &sum_row[3], - &acc[3]); - - s_offset = vldrwq_s32(sum_row); - } - else - { - int32_t sum_row; - (void)arm_nn_mat_mul_core_4x_s8(kernel_x * input_ch, - stride_x * input_ch, - input_data + input_begin_idx[0] * input_ch, - filter_data + (input_ch * kernel_x * i_out_ch), - &sum_row, - acc); - - s_offset = vdupq_n_s32(sum_row); - } - int32x4_t res = vldrwq_s32(acc); - s_offset = vmulq_n_s32(s_offset, input_offset); - res = vaddq_s32(res, s_offset); - if (bias_data) - { - res = vaddq_n_s32(res, bias_data[i_out_ch]); - } - res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]); - res = vaddq_n_s32(res, out_offset); - - res = vmaxq_s32(res, vdupq_n_s32(out_activation_min)); - res = vminq_s32(res, vdupq_n_s32(out_activation_max)); - - const uint32x4_t scatter_offset = {0, output_ch, output_ch * 2, output_ch * 3}; - vstrbq_scatter_offset_s32(output_data, scatter_offset, res); - output_data++; - } - output_data += (3 * output_ch); - } - -#else - status = arm_convolve_s8(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); -#endif - -out: - /* Return to application */ - return status; -} - -int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_dims* input_dims, - const cmsis_nn_dims* filter_dims) -{ -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - return (2 * input_dims->c * filter_dims->w * filter_dims->h) * sizeof(int16_t); -#else - (void)input_dims; - (void)filter_dims; - return 0; -#endif -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c deleted file mode 100644 index 68f95f787..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_1x1_HWC_q7_fast_nonsquare.c - * Description: Fast Q7 version of 1x1 convolution (non-square shape) - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1 - * and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise - * separable convolution. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - * - * [1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications - * https://arxiv.org/abs/1704.04861 - */ - -arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, - q7_t * bufferB) -{ - (void)bufferB; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - (void)dim_im_in_y; - int16_t i_out_y, i_out_x; - int16_t i_ch_out; - - /* ----------------------- - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0 || dim_kernel_x != 1 || dim_kernel_y != 1 - || padding_x != 0 || padding_y != 0 || stride_x != 1 || stride_y != 1) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - /* This part implements the im2col function */ - arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + (i_out_y * dim_im_in_x + i_out_x) * ch_im_in, pBuffer, - ch_im_in); - pBuffer += ch_im_in; - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* check if there is left-over for compute */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++) - { - q31_t sum = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift); - const q15_t *pB = bufferA; - /* basically each time it process 4 entries */ - uint16_t colCnt = ch_im_in * dim_kernel_x * dim_kernel_y >> 2; - - while (colCnt) - { - - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad_reordered(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia(&pB); - - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut = (q7_t) __SSAT((sum >> out_shift), 8); - pOut++; - - } - - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0 || dim_kernel_x != 1 || dim_kernel_y != 1 - || padding_x != 0 || padding_y != 0 || stride_x != 1 || stride_y != 1) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out_y; j++) - { - for (k = 0; k < dim_im_out_x; k++) - { - conv_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel_y; m++) - { - for (n = 0; n < dim_kernel_x; n++) - { - // if-for implementation - in_row = stride_y * j + m - padding_y; - in_col = stride_x * k + n - padding_x; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_y + n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c deleted file mode 100644 index 94745676d..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_1x1_s8_fast.c - * Description: Fast q7 version of 1x1 convolution (non-square shape) - * - * $Date: July 27, 2020 - * $Revision: V.2.0.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nn_types.h" - -#define DIM_KER_X (1U) -#define DIM_KER_Y (1U) - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Fast s8 version for 1x1 convolution (non-square shape) - * - * Refer header file for details. - * - */ - -arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data) -{ - if (input_dims->c % 4 != 0 || - conv_params->padding.w != 0 || conv_params->padding.h != 0 || - conv_params->stride.w != 1 || conv_params->stride.h != 1) - { - return ARM_MATH_SIZE_MISMATCH; - } - - (void)ctx; - (void)filter_dims; - (void)bias_dims; - -#if defined(ARM_MATH_MVEI) - - const int32_t col_len = input_dims->w * input_dims->h * input_dims->n; - const int32_t output_ch = output_dims->c; - const int32_t input_ch = input_dims->c; - const int32_t input_offset = conv_params->input_offset; - const int32_t out_offset = conv_params->output_offset; - const int32_t out_activation_min = conv_params->activation.min; - const int32_t out_activation_max = conv_params->activation.max; - int32_t *output_mult = quant_params->multiplier; - int32_t *output_shift = quant_params->shift; - - for (int i_items = 0; i_items <= (col_len - 4); i_items += 4) - { - for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - int32_t sum_row = 0; - int32_t temp_out[4]; - - (void)arm_nn_mat_mul_core_4x_s8(input_ch, - input_ch, - input_data + i_items * input_ch, - filter_data + i_out_ch * input_ch, - &sum_row, - temp_out); - int32x4_t res = vldrwq_s32(temp_out); - if (bias_data) - { - res = vaddq_n_s32(res, bias_data[i_out_ch]); - } - sum_row = sum_row * input_offset; - res = vaddq_n_s32(res, sum_row); - res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]); - res = vaddq_n_s32(res, out_offset); - - res = vmaxq_s32(res, vdupq_n_s32(out_activation_min)); - res = vminq_s32(res, vdupq_n_s32(out_activation_max)); - - const uint32x4_t scatter_offset = {0, (uint32_t)output_ch, - (uint32_t)output_ch * 2, - (uint32_t)output_ch * 3}; - vstrbq_scatter_offset_s32(output_data, scatter_offset, res); - output_data++; - } - output_data += (3 * output_ch); - } - - /* Handle left over elements */ - for (int i_items = (col_len & ~0x3); i_items < col_len; i_items++) - { - for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - int32_t sum_row = 0; - - int32_t acc; - (void)arm_nn_mat_mul_core_1x_s8(input_ch, - input_data + i_items * input_ch, - filter_data + i_out_ch * input_ch, - &sum_row, - &acc); - if (bias_data) - { - acc += bias_data[i_out_ch]; - } - sum_row = (sum_row * input_offset); - acc += sum_row; - acc = arm_nn_requantize(acc, output_mult[i_out_ch], output_shift[i_out_ch]); - acc += out_offset; - - acc = MAX(acc, out_activation_min); - acc = MIN(acc, out_activation_max); - *output_data++ = acc; - } - } - -#else - /* Run the following code as reference implementation for Cortex-M processors with or without DSP extension */ - - const int32_t lhs_rows = input_dims->w * input_dims->h * input_dims->n; - const int32_t rhs_rows = output_dims->c; - const int32_t rhs_cols = input_dims->c; - - arm_nn_mat_mult_nt_t_s8(input_data, - filter_data, - bias_data, - output_data, - quant_params->multiplier, - quant_params->shift, - lhs_rows, - rhs_rows, - rhs_cols, - conv_params->input_offset, - conv_params->output_offset, - conv_params->activation.min, - conv_params->activation.max); - -#endif - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims) -{ - (void)input_dims; - return 0; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c deleted file mode 100644 index f11c51793..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q15_basic.c - * Description: Q15 version of convolution - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - - /** - * @brief Basic Q15 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * bufferA size: ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * This basic version is designed to work for any input tensor and weight - * dimension. - */ - -arm_status -arm_convolve_HWC_q15_basic(const q15_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB) -{ - (void)bufferB; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - uint16_t im2col_out_pixel_index = 0; - q15_t *pBuffer = bufferA; - q15_t *pOut = Im_out; - q15_t *im_buffer = bufferA; - const q15_t *pA; - int i; - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* Filling 0 for out-of-bound paddings */ - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - /* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); */ - memcpy(pBuffer, (q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, sizeof(q15_t)*ch_im_in); - } - pBuffer += ch_im_in; - } - } - - pA = wt; - for (i = 0; i < ch_im_out; i++) - { - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - const q15_t *pB = im_buffer; - uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 2; - while (colCnt) - { - q31_t inA1 = arm_nn_read_q15x2_ia(&pA); - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inA2 = arm_nn_read_q15x2_ia(&pA); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB); - - sum = __SMLAD(inA1, inB1, sum); - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3; - while (colCnt) - { - q15_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut = (q15_t) __SSAT((sum >> out_shift), 16); - pOut++; - } - - /* counter reset */ - pBuffer = im_buffer; - im2col_out_pixel_index++; - } - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out; j++) - { - for (k = 0; k < dim_im_out; k++) - { - conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel; m++) - { - for (n = 0; n < dim_kernel; n++) - { - in_row = stride * j + m - padding; - in_col = stride * k + n - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += - Im_in[(in_row * dim_im_in + in_col) * ch_im_in + - l] * wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + - n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c deleted file mode 100644 index 9c4a65749..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q15_fast.c - * Description: Fast Q15 version of convolution - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - - /** - * @brief Fast Q15 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in is multiple of 2 - * - * ch_im_out is multipe of 2 - * - */ - -arm_status -arm_convolve_HWC_q15_fast(const q15_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB) -{ - (void)bufferB; -#if defined (ARM_MATH_DSP) - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - q15_t *pBuffer = bufferA; - q15_t *im_buffer = bufferA; - q15_t *pOut = Im_out; - - if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - /* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); */ - memcpy(pBuffer, (q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, sizeof(q15_t)*ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (i_out_x & 0x1) - { - int i; - /* initialize the matrix pointers for A */ - const q15_t *pA = wt; - - /* set up the second output pointers */ - q15_t *pOut2 = pOut + ch_im_out; - - /* this loop over rows in A */ - for (i = 0; i < ch_im_out; i += 2) - { - /* setup pointers for B */ - const q15_t *pB = im_buffer; - const q15_t *pB2 = pB + ch_im_in * dim_kernel * dim_kernel; - - /* aling the second pointer for A */ - const q15_t *pA2 = pA + ch_im_in * dim_kernel * dim_kernel; - - /* init the sum with bias */ - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 1; - /* accumulate over the vector */ - while (colCnt) - { - q31_t inA1 = arm_nn_read_q15x2_ia(&pA); - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inA2 = arm_nn_read_q15x2_ia(&pA2); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB2); - - sum = __SMLAD(inA1, inB1, sum); - sum2 = __SMLAD(inA1, inB2, sum2); - sum3 = __SMLAD(inA2, inB1, sum3); - sum4 = __SMLAD(inA2, inB2, sum4); - - colCnt--; - } /* while over colCnt */ - colCnt = ch_im_in * dim_kernel * dim_kernel & 0x1; - while (colCnt) - { - q15_t inA1 = *pA++; - q15_t inB1 = *pB++; - q15_t inA2 = *pA2++; - q15_t inB2 = *pB2++; - - sum += inA1 * inB1; - sum2 += inA1 * inB2; - sum3 += inA2 * inB1; - sum4 += inA2 * inB2; - colCnt--; - } /* while over colCnt */ - *pOut++ = (q15_t) __SSAT(sum >> out_shift, 16); - *pOut++ = (q15_t) __SSAT(sum3 >> out_shift, 16); - *pOut2++ = (q15_t) __SSAT(sum2 >> out_shift, 16); - *pOut2++ = (q15_t) __SSAT(sum4 >> out_shift, 16); - - /* skip the row computed with A2 */ - pA += ch_im_in * dim_kernel * dim_kernel; - } /* for over ch_im_out */ - - pOut += ch_im_out; - /* counter reset */ - pBuffer = im_buffer; - } - } - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out; j++) - { - for (k = 0; k < dim_im_out; k++) - { - conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel; m++) - { - for (n = 0; n < dim_kernel; n++) - { - in_row = stride * j + m - padding; - in_col = stride * k + n - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += - Im_in[(in_row * dim_im_in + in_col) * ch_im_in + - l] * wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + - n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c deleted file mode 100644 index 4889f495f..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q15_fast.c - * Description: Fast Q15 version of convolution - * - * $Date: 24. May 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - - /** - * @brief Fast Q15 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in is multiple of 2 - * - * ch_im_out is multipe of 2 - * - */ - -arm_status -arm_convolve_HWC_q15_fast_nonsquare(const q15_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q15_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q15_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, - q7_t * bufferB) -{ - (void)bufferB; -#if defined (ARM_MATH_DSP) - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - q15_t *pBuffer = bufferA; - q15_t *im_buffer = bufferA; - q15_t *pOut = Im_out; - - if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - /* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in); */ - memcpy(pBuffer, (q15_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, sizeof(q15_t)*ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (i_out_x & 0x1) - { - int i; - /* initialize the matrix pointers for A */ - const q15_t *pA = wt; - - /* set up the second output pointers */ - q15_t *pOut2 = pOut + ch_im_out; - - /* this loop over rows in A */ - for (i = 0; i < ch_im_out; i += 2) - { - /* setup pointers for B */ - const q15_t *pB = im_buffer; - const q15_t *pB2 = pB + ch_im_in * dim_kernel_y * dim_kernel_x; - - /* aling the second pointer for A */ - const q15_t *pA2 = pA + ch_im_in * dim_kernel_y * dim_kernel_x; - - /* init the sum with bias */ - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = ch_im_in * dim_kernel_y * dim_kernel_x >> 1; - /* accumulate over the vector */ - while (colCnt) - { - q31_t inA1 = arm_nn_read_q15x2_ia(&pA); - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inA2 = arm_nn_read_q15x2_ia(&pA2); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB2); - - sum = __SMLAD(inA1, inB1, sum); - sum2 = __SMLAD(inA1, inB2, sum2); - sum3 = __SMLAD(inA2, inB1, sum3); - sum4 = __SMLAD(inA2, inB2, sum4); - - colCnt--; - } /* while over colCnt */ - colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x1; - while (colCnt) - { - q15_t inA1 = *pA++; - q15_t inB1 = *pB++; - q15_t inA2 = *pA2++; - q15_t inB2 = *pB2++; - - sum += inA1 * inB1; - sum2 += inA1 * inB2; - sum3 += inA2 * inB1; - sum4 += inA2 * inB2; - colCnt--; - } /* while over colCnt */ - *pOut++ = (q15_t) __SSAT(sum >> out_shift, 16); - *pOut++ = (q15_t) __SSAT(sum3 >> out_shift, 16); - *pOut2++ = (q15_t) __SSAT(sum2 >> out_shift, 16); - *pOut2++ = (q15_t) __SSAT(sum4 >> out_shift, 16); - - /* skip the row computed with A2 */ - pA += ch_im_in * dim_kernel_y * dim_kernel_x; - } /* for over ch_im_out */ - - pOut += ch_im_out; - /* counter reset */ - pBuffer = im_buffer; - } - } - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out_y; j++) - { - for (k = 0; k < dim_im_out_x; k++) - { - conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel_y; m++) - { - for (n = 0; n < dim_kernel_x; n++) - { - in_row = stride_y * j + m - padding_y; - in_col = stride_x * k + n - padding_x; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += - Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + - l] * wt[i * ch_im_in * dim_kernel_x * dim_kernel_y + (m * dim_kernel_x + - n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c deleted file mode 100644 index 0a55d7221..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q7_RGB.c - * Description: Q7 version of convolution for RGB image - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - - /** - * @brief Q7 convolution function for RGB image - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in equals 3 - * - * This kernel is written exclusively for convolution with ch_im_in - * equals 3. This applies on the first layer of CNNs which has input - * image with RGB format. - */ - -arm_status -arm_convolve_HWC_q7_RGB(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, const uint16_t dim_im_out, q15_t * bufferA, q7_t * bufferB) -{ - (void)bufferB; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - // check if number of input channels is 3 - if (ch_im_in != 3) - { - return ARM_MATH_SIZE_MISMATCH; - } - // This part implements the im2col function - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* Equivalent to arm_fill_q15(0, pBuffer, ch_im_in) with assumption: ch_im_in = 3 */ - *__SIMD32(pBuffer) = 0x0; - *(pBuffer + 2) = 0; - pBuffer += 3; - } else - { - /* - * Equivalent to: - * arm_q7_to_q15_no_shift( (q7_t*)Im_in+(i_ker_y*dim_im_in+i_ker_x)*3, pBuffer, 3); - */ - - const q7_t *pPixel = Im_in + (i_ker_y * dim_im_in + i_ker_x) * 3; - q31_t buf = arm_nn_read_q7x4(pPixel); - - union arm_nnword top; - union arm_nnword bottom; - - top.word = __SXTB16(buf); - bottom.word = __SXTB16(__ROR(buf, 8)); - -#ifndef ARM_MATH_BIG_ENDIAN - /* - * little-endian, | omit | 3rd | 2nd | 1st | - * MSB LSB - * top | 3rd | 1st |; bottom | omit | 2nd | - * - * version 1, need to swap 2nd and 3rd weight - * *__SIMD32(pBuffer) = top.word; - * *(pBuffer+2) = bottom.half_words[0]; - * - * version 2, no weight shuffling required - */ - *pBuffer++ = top.half_words[0]; - *__SIMD32(pBuffer) = __PKHBT(bottom.word, top.word, 0); -#else - /* - * big-endian, | 1st | 2nd | 3rd | omit | - * MSB LSB - * top | 2nd | omit |; bottom | 1st | 3rd | - * - * version 1, need to swap 2nd and 3rd weight - * *__SIMD32(pBuffer) = bottom.word; - * *(pBuffer+2) = top.half_words[1]; - * - * version 2, no weight shuffling required - */ - *pBuffer++ = bottom.half_words[0]; - *__SIMD32(pBuffer) = __PKHTB(top.word, bottom.word, 0); -#endif - pBuffer += 2; - } - } - } - - if (pBuffer == bufferA + 2 * 3 * dim_kernel * dim_kernel) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15(wt, bufferA, - ch_im_out, - 3 * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* left-over because odd number of output pixels */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - int i; - - for (i = 0; i < ch_im_out; i++) - { - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - q15_t *pB = bufferA; - /* basically each time it process 4 entries */ - uint16_t colCnt = 3 * dim_kernel * dim_kernel >> 2; - - while (colCnt) - { - - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia((const q15_t **)&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia((const q15_t **)&pB); - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = 3 * dim_kernel * dim_kernel & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); - } - } -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - // check if number of input channels is 3 - if (ch_im_in != 3) - { - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out; j++) - { - for (k = 0; k < dim_im_out; k++) - { - conv_out = (bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel; m++) - { - for (n = 0; n < dim_kernel; n++) - { - /* if-for implementation */ - in_row = stride * j + m - padding; - in_col = stride * k + n - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += - Im_in[(in_row * dim_im_in + in_col) * ch_im_in + - l] * wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + - n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c deleted file mode 100644 index d416d0a9c..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q7_basic.c - * Description: Q7 version of convolution - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - - /** - * @brief Basic Q7 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * This basic version is designed to work for any input tensor and weight - * dimension. - */ - -arm_status -arm_convolve_HWC_q7_basic(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB) -{ - (void)bufferB; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* Filling 0 for out-of-bound paddings */ - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - /* Copying the pixel data to column */ - arm_q7_to_q15_no_shift((q7_t *) - Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - /* Computation is filed for every 2 columns */ - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15(wt, bufferA, - ch_im_out, - ch_im_in * - dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* left-over because odd number of output pixels */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - int i; - - for (i = 0; i < ch_im_out; i++) - { - /* Load the accumulator with bias first */ - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - - /* Point to the beging of the im2col buffer */ - const q15_t *pB = bufferA; - - /* Each time it process 4 entries */ - uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 2; - - while (colCnt) - { - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia(&pB); - - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); - } - } -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out; j++) - { - for (k = 0; k < dim_im_out; k++) - { - conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel; m++) - { - for (n = 0; n < dim_kernel; n++) - { - // if-for implementation - in_row = stride * j + m - padding; - in_col = stride * k + n - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += - Im_in[(in_row * dim_im_in + in_col) * ch_im_in + - l] * wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + - n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c deleted file mode 100644 index c5d3ed56c..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q7_basic.c - * Description: Q7 version of convolution - * - * $Date: 13. July 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - - /** - * @brief Basic Q7 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - */ - -arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, - q7_t * bufferB) -{ - (void)bufferB; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* Filling 0 for out-of-bound paddings */ - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - /* Copying the pixel data to column */ - arm_q7_to_q15_no_shift((q7_t *) - Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - /* Computation is filed for every 2 columns */ - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_y * dim_kernel_x) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15(wt, bufferA, - ch_im_out, - ch_im_in * - dim_kernel_y * dim_kernel_x, bias_shift, out_shift, bias, pOut); - - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* left-over because odd number of output pixels */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - int i; - - for (i = 0; i < ch_im_out; i++) - { - /* Load the accumulator with bias first */ - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - - /* Point to the beging of the im2col buffer */ - const q15_t *pB = bufferA; - - /* Each time it process 4 entries */ - uint16_t colCnt = ch_im_in * dim_kernel_y * dim_kernel_x >> 2; - - while (colCnt) - { - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia(&pB); - - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); - } - } -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out_y; j++) - { - for (k = 0; k < dim_im_out_x; k++) - { - conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel_y; m++) - { - for (n = 0; n < dim_kernel_x; n++) - { - // if-for implementation - in_row = stride_y * j + m - padding_y; - in_col = stride_x * k + n - padding_x; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += - Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + - (m * dim_kernel_x + n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c deleted file mode 100644 index e270640cd..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c +++ /dev/null @@ -1,408 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q7_fast.c - * Description: Fast Q7 version of convolution - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - - /** - * @brief Fast Q7 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in is multiple of 4 ( because of the SIMD32 read and swap ) - * - * ch_im_out is multipe of 2 ( bacause 2x2 mat_mult kernel ) - * - * The im2col converts the Q7 tensor input into Q15 column, which is stored in - * bufferA. There is reordering happenning during this im2col process with - * arm_q7_to_q15_reordered_no_shift. For every four elements, the second and - * third elements are swapped. - * - * The computation kernel arm_nn_mat_mult_kernel_q7_q15_reordered does the - * GEMM computation with the reordered columns. - * - * To speed-up the determination of the padding condition, we split the - * computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}. - * This reduces the total number of boundary condition checks and improves - * the data copying performance. - */ - -arm_status -arm_convolve_HWC_q7_fast(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB) -{ - (void)bufferB; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - /* - * Here we split the entire matrix into three regions depending on the padding situation - * Top: i_out_y from 0 to padding - 1 - * Middle: i_out_y from padding to dim_im_out-padding-1 - * Bottom: i_out_y from dim_im_out-padding to dim_im_out-1 - */ - - /* top part */ - for (i_out_y = 0; i_out_y < padding; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - arm_q7_to_q15_reordered_no_shift - ((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, - bufferA, - ch_im_out, - ch_im_in - * - dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* middle part, here we also divide the x into left, mid and right */ - for (; i_out_y < dim_im_out - padding; i_out_y++) - { - - /* left part */ - for (i_out_x = 0; i_out_x < padding; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - arm_q7_to_q15_reordered_no_shift - ((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, - bufferA, - ch_im_out, - ch_im_in - * - dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - - /* mid part */ - for (; i_out_x < dim_im_out - padding; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in - + - (i_ker_y * - dim_im_in + - i_out_x * - stride - padding) * ch_im_in, pBuffer, ch_im_in * dim_kernel); - pBuffer += ch_im_in * dim_kernel; - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, - bufferA, - ch_im_out, - ch_im_in - * - dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - - /* right part */ - for (; i_out_x < dim_im_out; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - arm_q7_to_q15_reordered_no_shift - ((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, - bufferA, - ch_im_out, - ch_im_in - * - dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - for (; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - arm_q7_to_q15_reordered_no_shift - ((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, - bufferA, - ch_im_out, - ch_im_in - * - dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* check if there is left-over for compute */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - int i; - - for (i = 0; i < ch_im_out; i++) - { - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - const q15_t *pB = bufferA; - /* each time it process 4 entries */ - uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 2; - - while (colCnt) - { - - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad_reordered(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut = (q7_t) __SSAT((sum >> out_shift), 8); - pOut++; - - } - - } -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out; j++) - { - for (k = 0; k < dim_im_out; k++) - { - conv_out = (bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel; m++) - { - for (n = 0; n < dim_kernel; n++) - { - // if-for implementation - in_row = stride * j + m - padding; - in_col = stride * k + n - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += - Im_in[(in_row * dim_im_in + in_col) * ch_im_in + - l] * wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + - n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c deleted file mode 100644 index 2dc94ef9c..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q7_fast_nonsquare.c - * Description: Fast Q7 version of convolution (non-sqaure shape) - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Fast Q7 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ - -arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, - q7_t * bufferB) -{ - (void)bufferB; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* ----------------------- - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - /* - * Here we split the entire matrix into three regions depending on the padding situation - * Top: i_out_y from 0 to padding - 1 - * Middle: i_out_y from padding to dim_im_out-padding-1 - * Bottom: i_out_y from dim_im_out-padding to dim_im_out-1 - */ - - /* top part */ - for (i_out_y = 0; i_out_y < padding_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, - pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, - bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* middle part, here we also divide the x into left, mid and right */ - for (; i_out_y < dim_im_out_y - padding_y; i_out_y++) - { - - /* left part */ - for (i_out_x = 0; i_out_x < padding_x; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, - pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, - bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - - /* mid part */ - for (; i_out_x < dim_im_out_x - padding_x; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + - (i_ker_y * dim_im_in_x + i_out_x * stride_x - padding_x) * ch_im_in, - pBuffer, ch_im_in * dim_kernel_x); - pBuffer += ch_im_in * dim_kernel_x; - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, - bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - - /* right part */ - for (; i_out_x < dim_im_out_x; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, - pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, - bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - for (; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t)*ch_im_in); - } else - { - arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, - pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = - arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, - bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* check if there is left-over for compute */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - int i; - for (i = 0; i < ch_im_out; i++) - { - q31_t sum = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - const q15_t *pB = bufferA; - /* basically each time it process 4 entries */ - uint16_t colCnt = ch_im_in * dim_kernel_x * dim_kernel_y >> 2; - - while (colCnt) - { - - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad_reordered(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = (ch_im_in * dim_kernel_y * dim_kernel_x) & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut = (q7_t) __SSAT((sum >> out_shift), 8); - pOut++; - - } - - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out_y; j++) - { - for (k = 0; k < dim_im_out_x; k++) - { - conv_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel_y; m++) - { - for (n = 0; n < dim_kernel_x; n++) - { - /* if-for implementation */ - in_row = stride_y * j + m - padding_y; - in_col = stride_x * k + n - padding_x; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_x + n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8); - } - } - } - - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c deleted file mode 100644 index 56355b3bc..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_s8.c - * Description: s8 version of convolution using symmetric quantization. - * - * $Date: July 27, 2020 - * $Revision: V.2.0.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ -#include "arm_math.h" -#include "arm_nn_types.h" -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Basic s8 convolution function. - * - * Refer header file for details. Optimal use case for the DSP/MVE implementation is when input and output channels - * are multiples of 4 or atleast greater than 4. - * - */ - -arm_status arm_convolve_s8(const cmsis_nn_context* ctx, - const cmsis_nn_conv_params* conv_params, - const cmsis_nn_per_channel_quant_params* quant_params, - const cmsis_nn_dims* input_dims, - const q7_t *input_data, - const cmsis_nn_dims* filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims* bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims* output_dims, - q7_t *output_data) -{ - q15_t *buffer_a = (q15_t *)ctx->buf; - - const uint16_t input_batches = input_dims->n; - const uint16_t input_x = input_dims->w; - const uint16_t input_y = input_dims->h; - const uint16_t input_ch = input_dims->c; - const uint16_t kernel_x = filter_dims->w; - const uint16_t kernel_y = filter_dims->h; - const uint16_t output_x = output_dims->w; - const uint16_t output_y = output_dims->h; - const uint16_t output_ch = output_dims->c; - - const uint16_t pad_x = conv_params->padding.w; - const uint16_t pad_y = conv_params->padding.h; - const uint16_t stride_x = conv_params->stride.w; - const uint16_t stride_y = conv_params->stride.h; - - const int32_t input_offset = conv_params->input_offset; - const int32_t out_offset = conv_params->output_offset; - const int32_t out_activation_min = conv_params->activation.min; - const int32_t out_activation_max = conv_params->activation.max; - int32_t *output_mult = quant_params->multiplier; - int32_t *output_shift = quant_params->shift; - - int i_batch; - for (i_batch = 0; i_batch < input_batches; i_batch++) - { -#if defined(ARM_MATH_MVEI) - (void)bias_dims; - /* Generate upto four columns from the input tensor a GEMM computation */ - q7_t *im2col_buf = (q7_t *)buffer_a; - q7_t *out = output_data; - int32_t buffer_fill_cnt = 0; - int32_t padded = 0; - const int32_t num_elem = kernel_x * kernel_y * input_ch; - - /* This part implements the im2col function */ - for (int i_out_y = 0; i_out_y < output_y; i_out_y++) - { - for (int i_out_x = 0; i_out_x < output_x; i_out_x++) - { - for (int i_ker_y = i_out_y * stride_y - pad_y; i_ker_y < i_out_y * stride_y - pad_y + kernel_y; i_ker_y++) - { - for (int i_ker_x = i_out_x * stride_x - pad_x; i_ker_x < i_out_x * stride_x - pad_x + kernel_x; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x) - { - memset(im2col_buf, (int8_t)-input_offset, sizeof(q7_t) * input_ch); - padded = 1; - } - else - { - arm_memcpy_q7(im2col_buf, input_data + (i_ker_y * input_x + i_ker_x) * input_ch, input_ch); - } - im2col_buf += input_ch; - } - } - - buffer_fill_cnt++; - - /* Computation is filed for every 4 columns */ - if (buffer_fill_cnt == 4 && (padded == 0)) - { - buffer_fill_cnt = 0; - for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - int32_t sum_row; - int32_t acc[4]; - - (void)arm_nn_mat_mul_core_4x_s8(num_elem, - num_elem, - (q7_t *)buffer_a, - filter_data + num_elem * i_out_ch, - &sum_row, - acc); - int32x4_t s_offset = vdupq_n_s32(sum_row); - - int32x4_t res = vldrwq_s32(acc); - s_offset = vmulq_n_s32(s_offset, input_offset); - if (bias_data) - { - res = vaddq_n_s32(res, bias_data[i_out_ch]); - } - res = vaddq_s32(res, s_offset); - res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]); - res = vaddq_n_s32(res, out_offset); - - res = vmaxq_s32(res, vdupq_n_s32(out_activation_min)); - res = vminq_s32(res, vdupq_n_s32(out_activation_max)); - - const uint32x4_t scatter_offset = {0, output_ch, output_ch * 2, output_ch * 3}; - vstrbq_scatter_offset_s32(out, scatter_offset, res); - out++; - } - out += (3 * output_ch); - im2col_buf = (q7_t *)buffer_a; - } - else if (buffer_fill_cnt == 4 && (padded != 0)) - { - buffer_fill_cnt = 0; - out = arm_nn_mat_mult_s8(filter_data, - (q7_t *)buffer_a, - output_ch, - 4, - output_shift, - output_mult, - out_offset, - input_offset, - 0, - out_activation_min, - out_activation_max, - num_elem, - bias_data, - out); - - im2col_buf = (q7_t *)buffer_a; - padded = 0; - } - } - } - /* Handle left over columns */ - if (buffer_fill_cnt != 0) - { - out = arm_nn_mat_mult_s8(filter_data, - (q7_t *)buffer_a, - output_ch, - buffer_fill_cnt, - output_shift, - output_mult, - out_offset, - input_offset, - 0, - out_activation_min, - out_activation_max, - num_elem, - bias_data, - out); - } - -#elif defined(ARM_MATH_DSP) - (void)bias_dims; - int32_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* Generate two columns from the input tensor a GEMM computation */ - q15_t *two_column_buf = buffer_a; - q7_t *out = output_data; - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < output_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < output_x; i_out_x++) - { - for (i_ker_y = i_out_y * stride_y - pad_y; i_ker_y < i_out_y * stride_y - pad_y + kernel_y; i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - pad_x; i_ker_x < i_out_x * stride_x - pad_x + kernel_x; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x) - { - /* Filling 0 for out-of-bound paddings */ - memset(two_column_buf, 0, sizeof(q15_t) * input_ch); - } - else - { - /* Copying the pixel data to column */ - arm_q7_to_q15_with_offset(input_data + (i_ker_y * input_x + i_ker_x) * input_ch, two_column_buf, input_ch, input_offset); - } - two_column_buf += input_ch; - } - } - - /* Computation is filed for every 2 columns */ - if (two_column_buf == buffer_a + 2 * input_ch * kernel_y * kernel_x) - { - out = - arm_nn_mat_mult_kernel_s8_s16(filter_data, - buffer_a, - output_ch, - output_shift, - output_mult, - out_offset, - out_activation_min, - out_activation_max, - input_ch * kernel_y * kernel_x, - bias_data, - out); - - /* counter reset */ - two_column_buf = buffer_a; - } - } - } - - /* left-over because odd number of output pixels */ - if (two_column_buf != buffer_a) - { - const q7_t *ker_a = filter_data; - int i; - - for (i = 0; i < output_ch; i++) - { - /* Load the accumulator with bias first */ - q31_t sum = 0; - if (bias_data) - { - sum = bias_data[i]; - } - - /* Point to the beginning of the im2col buffer where the input is available as a rearranged column */ - const q15_t *ip_as_col = buffer_a; - - /* 4 multiply and accumulates are done in one loop. */ - uint16_t col_count = (input_ch * kernel_y * kernel_x) >> 2; - - while (col_count) - { - q31_t ker_a1, ker_a2; - q31_t ip_b1, ip_b2; - - ker_a = read_and_pad(ker_a, &ker_a1, &ker_a2); - - ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col); - sum = __SMLAD(ker_a1, ip_b1, sum); - ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col); - sum = __SMLAD(ker_a2, ip_b2, sum); - - col_count--; - } - /* Handle left over mac */ - col_count = input_ch * kernel_y * kernel_x & 0x3; - while (col_count) - { - q7_t ker_a1 = *ker_a++; - q15_t ip_b1 = *ip_as_col++; - sum += ker_a1 * ip_b1; - col_count--; - } - - sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - *out++ = (q7_t)sum; - } - } -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - (void)buffer_a; - int32_t i_out_ch, i_out_y, i_out_x, i_input_ch, i_ker_y, i_ker_x; - int32_t conv_out; - - for (i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - for (i_out_y = 0; i_out_y < output_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < output_x; i_out_x++) - { - conv_out = 0; - - const int32_t base_idx_y = stride_y * i_out_y - pad_y; - const int32_t base_idx_x = stride_x * i_out_x - pad_x; - - const int32_t ker_y_start = MAX(0, -base_idx_y); - const int32_t ker_x_start = MAX(0, -base_idx_x); - - const int32_t ker_y_end = MIN(kernel_y, input_y - base_idx_y); - const int32_t ker_x_end = MIN(kernel_x, input_x - base_idx_x); - - for (i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++) - { - for (i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++) - { - const int32_t in_row = base_idx_y + i_ker_y; - const int32_t in_col = base_idx_x + i_ker_x; - for (i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) - { - conv_out += - (input_data[(in_row * input_x + in_col) * input_ch + i_input_ch] + input_offset) * - filter_data[i_out_ch * input_ch * kernel_y * kernel_x + - (i_ker_y * kernel_x + i_ker_x) * input_ch + i_input_ch]; - } - } - } - if (bias_data) - { - conv_out += bias_data[i_out_ch]; - } - conv_out = arm_nn_requantize(conv_out, output_mult[i_out_ch], output_shift[i_out_ch]); - conv_out += out_offset; - conv_out = MAX(conv_out, out_activation_min); - conv_out = MIN(conv_out, out_activation_max); - output_data[i_out_ch + (i_out_y * output_x + i_out_x) * output_ch] = (int8_t)conv_out; - } - } - } -#endif - /* Advance to the next batch */ - input_data += (input_x * input_y * input_ch); - output_data += (output_x * output_y * output_ch); - } - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims* input_dims, - const cmsis_nn_dims* filter_dims) -{ -#if defined(ARM_MATH_DSP) - return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t); -#else - (void)input_dims; - (void)filter_dims; - return 0; -#endif -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c deleted file mode 100644 index c55688c39..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_wrapper_s8.c - * Description: s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in cmsis-nn to perform the convolution. - * - * $Date: May 18, 2020 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nn_types.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Convolution layer - * - * Refer header file for details. - * - */ - -arm_status arm_convolve_wrapper_s8(const cmsis_nn_context* ctx, - const cmsis_nn_conv_params* conv_params, - const cmsis_nn_per_channel_quant_params* quant_params, - const cmsis_nn_dims* input_dims, - const q7_t *input_data, - const cmsis_nn_dims* filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims* bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims* output_dims, - q7_t *output_data) -{ - if ((conv_params->padding.w == 0) && - (conv_params->padding.h == 0) && - (input_dims->c % 4 == 0) && - (conv_params->stride.w == 1) && - (conv_params->stride.h == 1) && - (filter_dims->w == 1) && - (filter_dims->h == 1)) - { - return arm_convolve_1x1_s8_fast(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); - } - else if ((output_dims->h == 1) && - (input_dims->h == 1) && - (filter_dims->h == 1) && - (output_dims->w % 4 == 0) && - (input_dims->n == 1)) - { - return arm_convolve_1_x_n_s8(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); - } - else - { - return arm_convolve_s8(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); - } -} - -int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params* conv_params, - const cmsis_nn_dims* input_dims, - const cmsis_nn_dims* filter_dims, - const cmsis_nn_dims* output_dims) -{ - if ((conv_params->padding.w == 0) && - (conv_params->padding.h == 0) && - (input_dims->c % 4 == 0) && - (conv_params->stride.w == 1) && - (conv_params->stride.h == 1) && - (filter_dims->w == 1) && - (filter_dims->h == 1)) - { - return arm_convolve_1x1_s8_fast_get_buffer_size(input_dims); - } - else if ((output_dims->h == 1) && - (input_dims->h == 1) && - (filter_dims->h == 1) && - (output_dims->w % 4 == 0) && - (input_dims->n == 1)) - { - return arm_convolve_1_x_n_s8_get_buffer_size(input_dims, filter_dims); - } - else - { - return arm_convolve_s8_get_buffer_size(input_dims, filter_dims); - } -} - -/** - * @} end of NNConv group - */ - diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c deleted file mode 100644 index fad8bda55..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_3x3_s8.c - * Description: Optimized s8 depthwise convolution function for channel - * multiplier of 1 and 3x3 kernel size. - * - * $Date: May 14, 2020 - * $Revision: V.2.0.0 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnsupportfunctions.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Optimized s8 depthwise convolution function with constraint that - * in_channel == out_channel and kernel_x == kernel_y == 3 with pads at most 1 - * - * Refer prototype header file for details. - * - */ - -arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *kernel, - const cmsis_nn_dims *bias_dims, - const int32_t *bias, - const cmsis_nn_dims *output_dims, - q7_t *output) -{ - (void)ctx; - (void)bias_dims; - - const int32_t input_x = input_dims->w; - const int32_t input_y = input_dims->h; - const int32_t input_ch = input_dims->c; - const int32_t output_ch = output_dims->c; - const int32_t pad_x = dw_conv_params->padding.w; - const int32_t pad_y = dw_conv_params->padding.h; - const int32_t stride_x = dw_conv_params->stride.w; - const int32_t stride_y = dw_conv_params->stride.h; - const int32_t *output_shift = quant_params->shift; - const int32_t *output_mult = quant_params->multiplier; - const int32_t output_x = output_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_offset = dw_conv_params->output_offset; - const int32_t input_offset = dw_conv_params->input_offset; - const int32_t output_activation_min = dw_conv_params->activation.min; - const int32_t output_activation_max = dw_conv_params->activation.max; - - /* Check input constraints input_ch == output_ch */ - if (input_ch != output_ch) - { - return ARM_MATH_SIZE_MISMATCH; - } - /* Check input constraints pad_x <= 1 */ - if (pad_x > 1 || filter_dims->w != 3 || filter_dims->h != 3) - { - return ARM_MATH_ARGUMENT_ERROR; - } - - for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h) - { - for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w) - { - int32_t in_ch = 0; - int32_t ker_w_start = MAX(0, -in_w); - - for (; in_ch <= (input_ch - 4); in_ch += 4) - { - int32_t out_buff0 = bias[in_ch + 0]; - int32_t out_buff1 = bias[in_ch + 1]; - int32_t out_buff2 = bias[in_ch + 2]; - int32_t out_buff3 = bias[in_ch + 3]; - - const int8_t *input_ptr = input + (in_h + ker_h_start) * (input_ch * input_x) + in_w * input_ch + in_ch; - const int8_t *kernel_ptr = kernel + ker_h_start * (input_ch * 3) + in_ch; - - for (int32_t ker_h = ker_h_start; ker_h < MIN(3, input_y - in_h); ++ker_h) - { - int32_t in_val = 0; - int32_t ker_val = 0; - - if (ker_w_start == 0) - { - in_val = arm_nn_read_q7x4(input_ptr); - ker_val = arm_nn_read_q7x4(kernel_ptr); - - out_buff0 += ((int8_t)in_val + input_offset) * (int8_t)ker_val; - out_buff1 += ((int8_t)(in_val >> 8) + input_offset) * (int8_t)(ker_val >> 8); - out_buff2 += ((int8_t)(in_val >> 16) + input_offset) * (int8_t)(ker_val >> 16); - out_buff3 += ((int8_t)(in_val >> 24) + input_offset) * (int8_t)(ker_val >> 24); - } - - in_val = arm_nn_read_q7x4(input_ptr + input_ch); - ker_val = arm_nn_read_q7x4(kernel_ptr + input_ch); - - out_buff0 += ((int8_t)in_val + input_offset) * (int8_t)ker_val; - out_buff1 += ((int8_t)(in_val >> 8) + input_offset) * (int8_t)(ker_val >> 8); - out_buff2 += ((int8_t)(in_val >> 16) + input_offset) * (int8_t)(ker_val >> 16); - out_buff3 += ((int8_t)(in_val >> 24) + input_offset) * (int8_t)(ker_val >> 24); - - if ((input_x - in_w) >= 3) - { - in_val = arm_nn_read_q7x4(input_ptr + (input_ch << 1)); - ker_val = arm_nn_read_q7x4(kernel_ptr + (input_ch << 1)); - - out_buff0 += ((int8_t)in_val + input_offset) * (int8_t)ker_val; - out_buff1 += ((int8_t)(in_val >> 8) + input_offset) * (int8_t)(ker_val >> 8); - out_buff2 += ((int8_t)(in_val >> 16) + input_offset) * (int8_t)(ker_val >> 16); - out_buff3 += ((int8_t)(in_val >> 24) + input_offset) * (int8_t)(ker_val >> 24); - } - - input_ptr += (input_ch * input_x); - kernel_ptr += (input_ch * 3); - } - - out_buff0 = arm_nn_requantize(out_buff0, output_mult[in_ch + 0], output_shift[in_ch + 0]); - out_buff1 = arm_nn_requantize(out_buff1, output_mult[in_ch + 1], output_shift[in_ch + 1]); - out_buff2 = arm_nn_requantize(out_buff2, output_mult[in_ch + 2], output_shift[in_ch + 2]); - out_buff3 = arm_nn_requantize(out_buff3, output_mult[in_ch + 3], output_shift[in_ch + 3]); - - out_buff0 += output_offset; - out_buff1 += output_offset; - out_buff2 += output_offset; - out_buff3 += output_offset; - - out_buff0 = MIN(MAX(out_buff0, output_activation_min), output_activation_max); - out_buff1 = MIN(MAX(out_buff1, output_activation_min), output_activation_max); - out_buff2 = MIN(MAX(out_buff2, output_activation_min), output_activation_max); - out_buff3 = MIN(MAX(out_buff3, output_activation_min), output_activation_max); - - output[out_idx++] = (int8_t)out_buff0; - output[out_idx++] = (int8_t)out_buff1; - output[out_idx++] = (int8_t)out_buff2; - output[out_idx++] = (int8_t)out_buff3; - } - - // Leftover - for (; in_ch < input_ch; ++in_ch) - { - int32_t out_buff = bias[in_ch]; - - const int8_t *input_ptr = input + (in_h + ker_h_start) * (input_ch * input_x) + in_w * input_ch + in_ch; - const int8_t *kernel_ptr = kernel + ker_h_start * (input_ch * 3) + in_ch; - - for (int32_t ker_h = ker_h_start; ker_h < MIN(3, input_y - in_h); ++ker_h) - { - if (ker_w_start == 0) - { - out_buff += (*(input_ptr) + input_offset) * *(kernel_ptr); - } - - out_buff += (*(input_ptr + input_ch) + input_offset) * *(kernel_ptr + input_ch); - - if ((input_x - in_w) >= 3) - { - out_buff += (*(input_ptr + (input_ch << 1)) + input_offset) * *(kernel_ptr + (input_ch << 1)); - } - - input_ptr += (input_ch * input_x); - kernel_ptr += (input_ch * 3); - } - - out_buff = arm_nn_requantize(out_buff, output_mult[in_ch], output_shift[in_ch]); - out_buff += output_offset; - out_buff = MIN(MAX(out_buff, output_activation_min), output_activation_max); - output[out_idx++] = (int8_t)out_buff; - } - } - } - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c deleted file mode 100644 index 9f2ab1151..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_s8.c - * Description: s8 version of depthwise convolution. - * - * $Date: May 14, 2020 - * $Revision: V.2.0.0 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ -#include "arm_math.h" -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -static void depthwise_conv_s8_mult_4(const int8_t *input, - const int32_t input_x, - const int32_t input_y, - const int32_t input_ch, - const int8_t *kernel, - const int32_t output_ch, - const int32_t ch_mult, - const int32_t kernel_x, - const int32_t kernel_y, - const int32_t pad_x, - const int32_t pad_y, - const int32_t stride_x, - const int32_t stride_y, - const int32_t *bias, - int8_t *output, - const int32_t *output_shift, - const int32_t *output_mult, - const int32_t output_x, - const int32_t output_y, - const int32_t output_offset, - const int32_t input_offset, - const int32_t output_activation_min, - const int32_t output_activation_max) -{ - for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h) - { - for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w) - { - for (int32_t in_ch = 0, out_ch = 0, ker_w_start = MAX(0, -in_w); out_ch < output_ch; ++in_ch, out_ch += ch_mult) - { - for (int mult_tile = 0; mult_tile < ch_mult; mult_tile += 4) - { - int32_t out_buff[4]; - - out_buff[0] = bias[out_ch + 0 + mult_tile]; - out_buff[1] = bias[out_ch + 1 + mult_tile]; - out_buff[2] = bias[out_ch + 2 + mult_tile]; - out_buff[3] = bias[out_ch + 3 + mult_tile]; - - for (int32_t ker_h = ker_h_start; ker_h < MIN(kernel_y, input_y - in_h); ++ker_h) - { - int32_t ker_idx = ker_h * (output_ch * kernel_x) + ker_w_start * output_ch + out_ch; - int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch; - - for (int32_t ker_w = ker_w_start; ker_w < MIN(kernel_x, input_x - in_w); ++ker_w, ker_idx += output_ch) - { - int32_t in_val = input[in_idx + ker_w * input_ch] + input_offset; - out_buff[0] += in_val * kernel[ker_idx + 0 + mult_tile]; - out_buff[1] += in_val * kernel[ker_idx + 1 + mult_tile]; - out_buff[2] += in_val * kernel[ker_idx + 2 + mult_tile]; - out_buff[3] += in_val * kernel[ker_idx + 3 + mult_tile]; - } - } -#if defined(ARM_MATH_MVEI) - (void)out_idx; - int32x4_t res = vldrwq_s32(out_buff); - res = arm_requantize_mve_32x4(res, vldrwq_s32(&output_mult[out_ch + mult_tile]), vldrwq_s32(&output_shift[out_ch + mult_tile])); - res = vaddq_n_s32(res, output_offset); - - res = vmaxq_s32(res, vdupq_n_s32(output_activation_min)); - res = vminq_s32(res, vdupq_n_s32(output_activation_max)); - vstrbq_s32(output, res); - output += 4; -#else - out_buff[0] = arm_nn_requantize(out_buff[0], output_mult[out_ch + 0 + mult_tile], output_shift[out_ch + 0 + mult_tile]); - out_buff[1] = arm_nn_requantize(out_buff[1], output_mult[out_ch + 1 + mult_tile], output_shift[out_ch + 1 + mult_tile]); - out_buff[2] = arm_nn_requantize(out_buff[2], output_mult[out_ch + 2 + mult_tile], output_shift[out_ch + 2 + mult_tile]); - out_buff[3] = arm_nn_requantize(out_buff[3], output_mult[out_ch + 3 + mult_tile], output_shift[out_ch + 3 + mult_tile]); - - out_buff[0] += output_offset; - out_buff[1] += output_offset; - out_buff[2] += output_offset; - out_buff[3] += output_offset; - - out_buff[0] = MIN(MAX(out_buff[0], output_activation_min), output_activation_max); - out_buff[1] = MIN(MAX(out_buff[1], output_activation_min), output_activation_max); - out_buff[2] = MIN(MAX(out_buff[2], output_activation_min), output_activation_max); - out_buff[3] = MIN(MAX(out_buff[3], output_activation_min), output_activation_max); - - output[out_idx++] = (int8_t)out_buff[0]; - output[out_idx++] = (int8_t)out_buff[1]; - output[out_idx++] = (int8_t)out_buff[2]; - output[out_idx++] = (int8_t)out_buff[3]; - -#endif - } - } - } - } -} - -static void depthwise_conv_s8_generic(const q7_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_ch, - const q7_t *kernel, - const uint16_t output_ch, - const uint16_t ch_mult, - const uint16_t kernel_x, - const uint16_t kernel_y, - const uint16_t pad_x, - const uint16_t pad_y, - const uint16_t stride_x, - const uint16_t stride_y, - const int32_t *bias, - q7_t *output, - const int32_t *output_shift, - const int32_t *output_mult, - const uint16_t output_x, - const uint16_t output_y, - const int32_t output_offset, - const int32_t input_offset, - const int32_t output_activation_min, - const int32_t output_activation_max) -{ - (void)output_ch; - int i_out = 0; - for (int i_out_y = 0; i_out_y < output_y; i_out_y++) - { - const int16_t base_idx_y = (i_out_y * stride_y) - pad_y; - for (int i_out_x = 0; i_out_x < output_x; i_out_x++) - { - const int16_t base_idx_x = (i_out_x * stride_x) - pad_x; - for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) - { - for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++) - { - const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult; - int32_t acc_0; - /* Condition for kernel start dimension: (base_idx_ + ker__start) >= 0 */ - const int ker_y_start = MAX(0, -base_idx_y); - const int ker_x_start = MAX(0, -base_idx_x); - /* Condition for kernel end dimension: (base_idx_ + ker__end) < input_ */ - const int ker_y_end = MIN(kernel_y, input_y - base_idx_y); - const int ker_x_end = MIN(kernel_x, input_x - base_idx_x); - acc_0 = bias[idx_out_ch]; - - for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++) - { - const int32_t idx_y = base_idx_y + i_ker_y; - for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++) - { - const int32_t idx_x = base_idx_x + i_ker_x; - int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch; - int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch; - - acc_0 += (input[idx_0] + input_offset) * kernel[ker_idx_0]; - } - } - - /* Requantize and clamp output to provided range */ - acc_0 = arm_nn_requantize(acc_0, output_mult[idx_out_ch], output_shift[idx_out_ch]); - acc_0 += output_offset; - acc_0 = MAX(acc_0, output_activation_min); - acc_0 = MIN(acc_0, output_activation_max); - - output[i_out++] = acc_0; - } - } - } - } -} - -/* - * Basic s8 depthwise convolution function. - * - * Refer header file for details. - * Optimization using DSP extension is not available for the generic case where channel multiplier is > 1. - * - */ -arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *kernel, - const cmsis_nn_dims *bias_dims, - const int32_t *bias, - const cmsis_nn_dims *output_dims, - q7_t *output) -{ - (void)dw_conv_params->dilation; - (void)ctx; - - if (dw_conv_params->ch_mult % 4 == 0) - { - depthwise_conv_s8_mult_4(input, input_dims->w, input_dims->h, input_dims->c, kernel, output_dims->c, dw_conv_params->ch_mult, filter_dims->w, filter_dims->h, - dw_conv_params->padding.w, dw_conv_params->padding.h, dw_conv_params->stride.w, dw_conv_params->stride.h, bias, output, - quant_params->shift, quant_params->multiplier, output_dims->w, output_dims->h, dw_conv_params->output_offset, - dw_conv_params->input_offset, dw_conv_params->activation.min, dw_conv_params->activation.max); - } - else - { - depthwise_conv_s8_generic(input, input_dims->w, input_dims->h, input_dims->c, kernel, output_dims->c, dw_conv_params->ch_mult, filter_dims->w, filter_dims->h, - dw_conv_params->padding.w, dw_conv_params->padding.h, dw_conv_params->stride.w, dw_conv_params->stride.h, bias, output, - quant_params->shift, quant_params->multiplier, output_dims->w, output_dims->h, dw_conv_params->output_offset, - dw_conv_params->input_offset, dw_conv_params->activation.min, dw_conv_params->activation.max); - } - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c deleted file mode 100644 index 2908b37f7..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_s8_opt.c - * Description: Optimized s8 depthwise separable convolution function for - * channel multiplier of 1. - * - * $Date: May 29, 2020 - * $Revision: V.2.0.1 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnsupportfunctions.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel - * - * Refer prototype header file for details. - * - */ - -arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *kernel, - const cmsis_nn_dims *bias_dims, - const int32_t *bias, - const cmsis_nn_dims *output_dims, - q7_t *output) -{ - const int32_t input_x = input_dims->w; - const int32_t input_y = input_dims->h; - const int32_t input_ch = input_dims->c; - const int32_t output_ch = output_dims->c; - const int32_t kernel_x = filter_dims->w; - const int32_t kernel_y = filter_dims->h; - const int32_t pad_x = dw_conv_params->padding.w; - const int32_t pad_y = dw_conv_params->padding.h; - const int32_t stride_x = dw_conv_params->stride.w; - const int32_t stride_y = dw_conv_params->stride.h; - const int32_t *output_shift = quant_params->shift; - const int32_t *output_mult = quant_params->multiplier; - const int32_t output_x = output_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_offset = dw_conv_params->output_offset; - const int32_t input_offset = dw_conv_params->input_offset; - const int32_t output_activation_min = dw_conv_params->activation.min; - const int32_t output_activation_max = dw_conv_params->activation.max; - q15_t *buffer_a = (q15_t *)ctx->buf; - - /* Check input constraints input_ch == output_ch */ - if (input_ch != output_ch) - { - return ARM_MATH_SIZE_MISMATCH; - } -#ifdef ARM_MATH_MVEI - (void)bias_dims; - /* Generate two columns from the input tensor */ - q7_t *lhs_buffer = (q7_t *)buffer_a; - q7_t *out = output; - int padded = 0; - int buffer_count = 0; - const int32_t kernel_size = kernel_x * kernel_y; - - /* This part implements the im2col function */ - for (int i_out_y = 0, base_idx_y = -pad_y; i_out_y < output_y; base_idx_y += stride_y, i_out_y++) - { - for (int i_out_x = 0, base_idx_x = -pad_x; i_out_x < output_x; base_idx_x += stride_x, i_out_x++) - { - for (int i_ker_y = base_idx_y; i_ker_y < base_idx_y + kernel_y; i_ker_y++) - { - for (int i_ker_x = base_idx_x; i_ker_x < base_idx_x + kernel_x; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x) - { - arm_memset_q7(lhs_buffer, (int8_t)-input_offset, (uint32_t)input_ch); - padded = 1; - } - else - { - arm_memcpy_q7(lhs_buffer, input + (i_ker_y * input_x + i_ker_x) * input_ch, (uint32_t)input_ch); - } - lhs_buffer += input_ch; - } - } - buffer_count++; - - if (buffer_count == 4) - { - lhs_buffer = (q7_t *)buffer_a; - if (padded == 0) - { - out = arm_nn_depthwise_conv_nt_t_s8(lhs_buffer, - kernel, - input_offset, - input_ch, - output_shift, - output_mult, - output_offset, - output_activation_min, - output_activation_max, - kernel_size, - bias, - out); - } - else - { - out = arm_nn_depthwise_conv_nt_t_padded_s8(lhs_buffer, - kernel, - input_offset, - input_ch, - output_shift, - output_mult, - output_offset, - output_activation_min, - output_activation_max, - kernel_size, - bias, - out); - padded = 0; - } - buffer_count = 0; - } - } - } - - /* Handle left over buffers */ - lhs_buffer = (q7_t *)buffer_a; - - for (int i_buf = 0; i_buf < buffer_count; i_buf++) - { - int32_t loop_count = (input_ch + 3) / 4; - - int32_t num_ch_to_process = input_ch; - for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; - num_ch_to_process -= 4, offset += 4, i_loop_cnt++) - { - const int8_t *col_0 = lhs_buffer + (kernel_size * input_ch * i_buf) + offset; - const int8_t *row_0 = kernel + offset; - int32x4_t out_0 = vldrwq_s32(&bias[offset]); - - for (int i_ker = 0; i_ker < kernel_size; i_ker++) - { - const int32x4_t ker_0 = vldrbq_s32(row_0); - - int32x4_t ip_0 = vldrbq_s32(col_0); - ip_0 = vaddq_n_s32(ip_0, input_offset); - out_0 += vmulq_s32(ip_0, ker_0); - - col_0 += input_ch; - row_0 += input_ch; - } - - const int32x4_t mult = vldrwq_s32(&output_mult[offset]); - const int32x4_t shift = vldrwq_s32(&output_shift[offset]); - - out_0 = arm_requantize_mve_32x4(out_0, mult, shift); - out_0 = vaddq_n_s32(out_0, output_offset); - out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min)); - out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max)); - mve_pred16_t p = vctp32q((uint32_t)num_ch_to_process); - vstrbq_p_s32(out, out_0, p); - - out += 4; - } - - const int tail_ch = input_ch & 0x3; - if (tail_ch != 0) - { - out -= (4 - tail_ch); - } - } - -#elif defined(ARM_MATH_DSP) - (void)bias_dims; - /* Run the following code in cores using DSP extension */ - q15_t *const col_buffer_start = buffer_a; - q15_t *col_buffer = col_buffer_start; - const int32_t *const bias_start_pos = bias; - const q31_t *const out_mult_start_pos = output_mult; - const q31_t *const out_shift_start_pos = output_shift; - uint16_t row_count; - uint16_t row_shift; - - for (int i_out_y = 0; i_out_y < output_y; i_out_y++) - { - const int16_t base_idx_y = (i_out_y * stride_y) - pad_y; - for (int i_out_x = 0; i_out_x < output_x; i_out_x++) - { - const int16_t base_idx_x = (i_out_x * stride_x) - pad_x; - - /* Out of bounds is only considered for the y axis as it provides a contiguous zero'ing opportunity than along - the x axis */ - const int ker_y_start = MAX(0, -base_idx_y); - /* Condition for kernel end dimension: (base_idx_y + ker_y_end) < input_y */ - const int ker_y_end = MIN(kernel_y, input_y - base_idx_y); - - int32_t index = 0; - if (ker_y_start != 0) - { - memset(&col_buffer[index], 0, (kernel_x * input_ch) * ker_y_start * sizeof(q15_t)); - index += (kernel_x * input_ch) * ker_y_start; - } - - for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++) - { - const int32_t idx_y = base_idx_y + i_ker_y; - - for (int i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++) - { - const int32_t idx_x = base_idx_x + i_ker_x; - if (idx_x < 0 || idx_x >= input_x) - { - memset(&col_buffer[index], 0, input_ch * sizeof(q15_t)); - } - else - { - arm_q7_to_q15_with_offset((q7_t *)input + (idx_y * input_x + idx_x) * input_ch, &col_buffer[index], input_ch, input_offset); - } - index += input_ch; - } - } - - const int diff = kernel_y - ker_y_end; - if (diff != 0) - { - memset(&col_buffer[index], 0, (kernel_x * input_ch) * diff * sizeof(q15_t)); - } - - row_count = output_ch / 4; - row_shift = 0; - bias = bias_start_pos; - output_mult = out_mult_start_pos; - output_shift = out_shift_start_pos; - - while (row_count) - { - q31_t sum = *bias++; - q31_t sum_2 = *bias++; - q31_t sum_3 = *bias++; - q31_t sum_4 = *bias++; - - uint16_t col_count = (kernel_x * kernel_y) / 2; - q15_t *col_pos = col_buffer_start + row_shift; - const q7_t *row_pos = kernel + row_shift; - row_shift += 4; - - while (col_count) - { - /* General idea is to read 4 + 4 (input, kernel) pair and re-arrange them in the right order to - use in a SMLAD instruction . One run of this loop produces 4 partial outputs with 8 MACs. */ - /* Note: variable names can be improved here to align with rows and columns. */ - q31_t ip_a1, ip_a2, ip_b1, ip_b2, op_a, op_b, op_c; - /* Read 4 weights */ - ip_b1 = arm_nn_read_q7x4(row_pos); - ip_a1 = arm_nn_read_q7x4(row_pos + input_ch); - op_a = arm_nn_read_q15x2(col_pos); - op_b = arm_nn_read_q15x2(col_pos + input_ch); - - ip_a2 = __SXTB16(ip_b1); - ip_b1 = __SXTB16(__ROR(ip_b1, 8)); - - ip_b2 = __SXTB16(ip_a1); - ip_a1 = __SXTB16(__ROR(ip_a1, 8)); - - op_c = __PKHBT(op_b, op_a, 16); - op_a = __PKHTB(op_b, op_a, 16); - op_b = __PKHBT(ip_b2, ip_a2, 16); - sum = __SMLAD(op_c, op_b, sum); - - op_b = __PKHBT(ip_b1, ip_a1, 16); - sum_2 = __SMLAD(op_a, op_b, sum_2); - - op_a = arm_nn_read_q15x2(col_pos + 2); - op_b = arm_nn_read_q15x2(col_pos + input_ch + 2); - - op_c = __PKHBT(op_b, op_a, 16); - op_a = __PKHTB(op_b, op_a, 16); - op_b = __PKHTB(ip_a2, ip_b2, 16); - sum_3 = __SMLAD(op_c, op_b, sum_3); - - op_b = __PKHTB(ip_a1, ip_b1, 16); - sum_4 = __SMLAD(op_a, op_b, sum_4); - - row_pos += input_ch << 1; - col_pos += input_ch << 1; - col_count--; - } - - col_count = (kernel_x * kernel_y) & 0x1; - while (col_count) - { - sum += row_pos[0] * col_pos[0]; - sum_2 += row_pos[1] * col_pos[1]; - sum_3 += row_pos[2] * col_pos[2]; - sum_4 += row_pos[3] * col_pos[3]; - - row_pos += input_ch; - col_pos += input_ch; - - col_count--; - } - sum = arm_nn_requantize(sum, *output_mult++, *output_shift++); - sum += output_offset; - sum = MAX(sum, output_activation_min); - sum = MIN(sum, output_activation_max); - *output++ = (q7_t)sum; - - sum_2 = arm_nn_requantize(sum_2, *output_mult++, *output_shift++); - sum_2 += output_offset; - sum_2 = MAX(sum_2, output_activation_min); - sum_2 = MIN(sum_2, output_activation_max); - *output++ = (q7_t)sum_2; - sum_3 = arm_nn_requantize(sum_3, *output_mult++, *output_shift++); - sum_3 += output_offset; - sum_3 = MAX(sum_3, output_activation_min); - sum_3 = MIN(sum_3, output_activation_max); - *output++ = (q7_t)sum_3; - - sum_4 = arm_nn_requantize(sum_4, *output_mult++, *output_shift++); - sum_4 += output_offset; - sum_4 = MAX(sum_4, output_activation_min); - sum_4 = MIN(sum_4, output_activation_max); - *output++ = (q7_t)sum_4; - - row_count--; - } - - row_count = output_ch & 0x3; - while (row_count) - { - q15_t *col_pos = col_buffer_start + row_shift; - const q7_t *row_pos = kernel + row_shift; - q31_t sum = *bias++; - const uint16_t col_count = (kernel_x * kernel_y); - row_shift += 1; - - for (int i = 0; i < col_count; i++) - { - sum += row_pos[i * input_ch] * col_pos[i * input_ch]; - } - sum = arm_nn_requantize(sum, *output_mult++, *output_shift++); - sum += output_offset; - sum = MAX(sum, output_activation_min); - sum = MIN(sum, output_activation_max); - *output++ = (q7_t)sum; - - row_count--; - } - - // clear counter and pointers - col_buffer = col_buffer_start; - } - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - return arm_depthwise_conv_s8(ctx, - dw_conv_params, - quant_params, - input_dims, - input, - filter_dims, - kernel, - bias_dims, - bias, - output_dims, - output); -#endif /* ARM_MATH_MVEI | ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims) -{ -#if defined(ARM_MATH_MVEI) - /* The + 4 accounts for out of bounds read of the lhs buffers in the *_nt_t_* functions. */ - return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t) + 4; -#elif defined(ARM_MATH_DSP) - return (input_dims->c * filter_dims->w * filter_dims->h) * sizeof(int16_t); -#else - (void)input_dims; - (void)filter_dims; - return 0; -#endif -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c deleted file mode 100644 index 8f374aa84..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_u8_basic_ver1.c - * Description: u8 depthwise convolution function - * - * $Date: May 29, 2020 - * $Revision: V.1.1.0 - * - * Target : Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -static void depthwise_conv_u8_mult_4(const uint8_t *input, - const int32_t input_x, - const int32_t input_y, - const int32_t input_ch, - const uint8_t *kernel, - const int32_t output_ch, - const int32_t ch_mult, - const int32_t kernel_x, - const int32_t kernel_y, - const int32_t pad_x, - const int32_t pad_y, - const int32_t stride_x, - const int32_t stride_y, - const int32_t *bias, - uint8_t *output, - const int32_t output_shift, - const int32_t output_mult, - const int32_t output_x, - const int32_t output_y, - const int32_t output_offset, - const int32_t input_offset, - const int32_t filter_offset, - const int32_t output_activation_min, - const int32_t output_activation_max) -{ - for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h) - { - for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w) - { - for (int32_t in_ch = 0, out_ch = 0, ker_w_start = MAX(0, -in_w); out_ch < output_ch; ++in_ch, out_ch += ch_mult) - { - for (int mult_tile = 0; mult_tile < ch_mult; mult_tile += 4) - { - int32_t out_buff[4]; - - out_buff[0] = 0; - out_buff[1] = 0; - out_buff[2] = 0; - out_buff[3] = 0; - - for (int32_t ker_h = ker_h_start; ker_h < MIN(kernel_y, input_y - in_h); ++ker_h) - { - int32_t ker_idx = ker_h * (output_ch * kernel_x) + ker_w_start * output_ch + out_ch; - int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch; - - for (int32_t ker_w = ker_w_start; ker_w < MIN(kernel_x, input_x - in_w); ++ker_w, ker_idx += output_ch) - { - int32_t in_val = input[in_idx + ker_w * input_ch] + input_offset; - out_buff[0] += in_val * (kernel[ker_idx + 0 + mult_tile] + filter_offset); - out_buff[1] += in_val * (kernel[ker_idx + 1 + mult_tile] + filter_offset); - out_buff[2] += in_val * (kernel[ker_idx + 2 + mult_tile] + filter_offset); - out_buff[3] += in_val * (kernel[ker_idx + 3 + mult_tile] + filter_offset); - } - } - - if (bias != NULL) - { - out_buff[0] += bias[out_ch + 0 + mult_tile]; - out_buff[1] += bias[out_ch + 1 + mult_tile]; - out_buff[2] += bias[out_ch + 2 + mult_tile]; - out_buff[3] += bias[out_ch + 3 + mult_tile]; - } - out_buff[0] = arm_nn_requantize(out_buff[0], output_mult, output_shift); - out_buff[1] = arm_nn_requantize(out_buff[1], output_mult, output_shift); - out_buff[2] = arm_nn_requantize(out_buff[2], output_mult, output_shift); - out_buff[3] = arm_nn_requantize(out_buff[3], output_mult, output_shift); - - out_buff[0] += output_offset; - out_buff[1] += output_offset; - out_buff[2] += output_offset; - out_buff[3] += output_offset; - - out_buff[0] = MIN(MAX(out_buff[0], output_activation_min), output_activation_max); - out_buff[1] = MIN(MAX(out_buff[1], output_activation_min), output_activation_max); - out_buff[2] = MIN(MAX(out_buff[2], output_activation_min), output_activation_max); - out_buff[3] = MIN(MAX(out_buff[3], output_activation_min), output_activation_max); - - output[out_idx++] = (uint8_t)out_buff[0]; - output[out_idx++] = (uint8_t)out_buff[1]; - output[out_idx++] = (uint8_t)out_buff[2]; - output[out_idx++] = (uint8_t)out_buff[3]; - } - } - } - } -} - -static void depthwise_conv_u8_generic(const uint8_t *input, - const int32_t input_x, - const int32_t input_y, - const int32_t input_ch, - const uint8_t *kernel, - const int32_t output_ch, - const int32_t ch_mult, - const int32_t kernel_x, - const int32_t kernel_y, - const int32_t pad_x, - const int32_t pad_y, - const int32_t stride_x, - const int32_t stride_y, - const int32_t *bias, - uint8_t *output, - const int32_t output_shift, - const int32_t output_mult, - const int32_t output_x, - const int32_t output_y, - const int32_t output_offset, - const int32_t input_offset, - const int32_t filter_offset, - const int32_t output_activation_min, - const int32_t output_activation_max) -{ - (void)output_ch; - int i_out = 0; - for (int i_out_y = 0; i_out_y < output_y; i_out_y++) - { - const int16_t base_idx_y = (i_out_y * stride_y) - pad_y; - for (int i_out_x = 0; i_out_x < output_x; i_out_x++) - { - const int16_t base_idx_x = (i_out_x * stride_x) - pad_x; - for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) - { - for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++) - { - const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult; - int32_t acc_0; - /* Condition for kernel start dimension: (base_idx_ + ker__start) >= 0 */ - const int ker_y_start = MAX(0, -base_idx_y); - const int ker_x_start = MAX(0, -base_idx_x); - /* Condition for kernel end dimension: (base_idx_ + ker__end) < input_ */ - const int ker_y_end = MIN(kernel_y, input_y - base_idx_y); - const int ker_x_end = MIN(kernel_x, input_x - base_idx_x); - acc_0 = 0; - - for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++) - { - const int32_t idx_y = base_idx_y + i_ker_y; - for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++) - { - const int32_t idx_x = base_idx_x + i_ker_x; - int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch; - int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch; - - acc_0 += (input[idx_0] + input_offset) * (kernel[ker_idx_0] + filter_offset); - } - } - if (bias != NULL) - { - acc_0 += bias[idx_out_ch]; - } - - /* Requantize and clamp output to provided range */ - acc_0 = arm_nn_requantize(acc_0, output_mult, output_shift); - acc_0 += output_offset; - acc_0 = MAX(acc_0, output_activation_min); - acc_0 = MIN(acc_0, output_activation_max); - - output[i_out++] = acc_0; - } - } - } - } -} - -/** - * @brief uint8 depthwise convolution function with asymmetric quantization - * - * @param[in] input Pointer to input tensor - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_ch Channels in input tensor - * @param[in] kernel Pointer to kernel weights - * @param[in] kernel_x Width of kernel - * @param[in] kernel_y Height of kernel - * @param[in] ch_mult Number of channel multiplier - * @param[in] pad_x Padding sizes x - * @param[in] pad_y Padding sizes y - * @param[in] stride_x Convolution stride along the width - * @param[in] stride_y Convolution stride along the height - * @param[in] dilation_x Dilation along width. Not used and intended for future enhancement. - * @param[in] dilation_y Dilation along height. Not used and intended for future enhancement. - * @param[in] bias Pointer to optional bias values. If no bias is - * availble, NULL is expected - * @param[in] input_offset Input tensor zero offset - * @param[in] filter_offset Kernel tensor zero offset - * @param[in] output_offset Output tensor zero offset - * @param[in,out] output Pointer to output tensor - * @param[in] output_x Width of output tensor - * @param[in] output_y Height of output tensor - * @param[in] output_activation_min Minimum value to clamp the output to. Range : {0, 255} - * @param[in] output_activation_max Minimum value to clamp the output to. Range : {0, 255} - * @param[in] output_shift Amount of right-shift for output - * @param[in] output_mult Output multiplier for requantization - * @return The function returns one of the following - * ARM_MATH_SIZE_MISMATCH - Not supported dimension of tensors - * ARM_MATH_SUCCESS - Successful operation - * ARM_MATH_ARGUMENT_ERROR - Implementation not available - * - * - */ - -arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_ch, - const uint8_t *kernel, - const uint16_t kernel_x, - const uint16_t kernel_y, - const int16_t ch_mult, - const int16_t pad_x, - const int16_t pad_y, - const int16_t stride_x, - const int16_t stride_y, - const int16_t dilation_x, - const int16_t dilation_y, - const int32_t *bias, - const int32_t input_offset, - const int32_t filter_offset, - const int32_t output_offset, - uint8_t *output, - const uint16_t output_x, - const uint16_t output_y, - const int32_t output_activation_min, - const int32_t output_activation_max, - const int32_t output_shift, - const int32_t output_mult) -{ - (void)dilation_x; - (void)dilation_y; - - if (ch_mult % 4 == 0) - { - depthwise_conv_u8_mult_4(input, input_x, input_y, input_ch, kernel, ch_mult * input_ch, ch_mult, - kernel_x, kernel_y, pad_x, pad_y, stride_x, stride_y, bias, output, - output_shift, output_mult, output_x, output_y, output_offset, input_offset, - filter_offset, output_activation_min, output_activation_max); - } - else - { - depthwise_conv_u8_generic(input, input_x, input_y, input_ch, kernel, ch_mult * input_ch, ch_mult, - kernel_x, kernel_y, pad_x, pad_y, stride_x, stride_y, bias, - output, output_shift, output_mult, output_x, output_y, output_offset, - input_offset, filter_offset, output_activation_min, output_activation_max); - } - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c deleted file mode 100644 index eed2036fd..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_wrapper_s8.c - * Description: Wrapper API to select appropriate depthwise conv API based - * on dimensions. - * - * $Date: May 29, 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ -#include "arm_math.h" -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * s8 Depthwise conv wrapper function - * - * Refer header file for details. - * - */ -arm_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *filter, - const cmsis_nn_dims *bias_dims, - const int32_t *bias, - const cmsis_nn_dims *output_dims, - q7_t *output) -{ - arm_status status = ARM_MATH_SUCCESS; - if (1 == dw_conv_params->ch_mult) - { -#if !defined(ARM_MATH_MVEI) - if ((filter_dims->w == 3) && (filter_dims->h == 3) && (dw_conv_params->padding.h <= 1)) - { - status = arm_depthwise_conv_3x3_s8(ctx, - dw_conv_params, - quant_params, - input_dims, - input, - filter_dims, - filter, - bias_dims, - bias, - output_dims, - output); - } - else -#endif - { - status = arm_depthwise_conv_s8_opt(ctx, - dw_conv_params, - quant_params, - input_dims, - input, - filter_dims, - filter, - bias_dims, - bias, - output_dims, - output); - } - } - else - { - status = arm_depthwise_conv_s8(ctx, - dw_conv_params, - quant_params, - input_dims, - input, - filter_dims, - filter, - bias_dims, - bias, - output_dims, - output); - } - - /* Return to application */ - return status; -} - -int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims) -{ - (void)dw_conv_params; - int32_t size = 0; - - if (input_dims->c == output_dims->c) - { - size = arm_depthwise_conv_s8_opt_get_buffer_size(input_dims, filter_dims); - } - - return size; -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c deleted file mode 100644 index a7eaf93f2..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_separable_conv_HWC_q7.c - * Description: Q7 depthwise separable convolution function - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Q7 depthwise separable convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in equals ch_im_out - * - * Implementation: - * There are 3 nested loop here: - * Inner loop: calculate each output value with MAC instruction over an accumulator - * Mid loop: loop over different output channel - * Outer loop: loop over different output (x, y) - */ - -arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, - q7_t * bufferB) -{ - (void)bufferB; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x; - int16_t i_ker_y, i_ker_x; - q7_t *colBuffer = (q7_t *) bufferA; - q7_t *pBuffer = colBuffer; - const q7_t *pBias = bias; - q7_t *pOut = Im_out; - uint16_t rowCnt; - uint16_t row_shift; - - /* do some checking here, basically ch_im_in == ch_im_out */ - if (ch_im_in != ch_im_out) - { - return ARM_MATH_SIZE_MISMATCH; - } - - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - /* we first do im2col here */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q7(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, ch_im_in); - } else - { - /* arm_copy_q7((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); */ - memcpy(pBuffer, (q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - /* we will do the computation here for each channel */ - rowCnt = ch_im_out >> 2; - row_shift = 0; - pBias = bias; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = (dim_kernel * dim_kernel) >> 1; - q7_t *pB = colBuffer + row_shift; - const q7_t *pA = wt + row_shift; - row_shift += 4; - -#ifdef USE_INTRINSIC - -#ifndef ARM_MATH_BIG_ENDIAN - - while (colCnt) - { - q31_t inA1, inA2, inB1, inB2, opA, opB; - - inB1 = arm_nn_read_q7x4(pB); - pB += ch_im_in; - opB = arm_nn_read_q7x4(pB); - pB += ch_im_in; - inB2 = __PKHTB(opB, inB1, 16); - inB1 = __PKHBT(inB1, opB, 16); - inA1 = arm_nn_read_q7x4(pA); - pA += ch_im_in; - opB = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inA2 = __PKHTB(opB, inA1, 16); - inA1 = __PKHBT(inA1, opB, 16); - opA = __SXTB16(inA1); - opB = __SXTB16(inB1); - sum = __SMLAD(opA, opB, sum); - opA = __SXTB16(__ROR(inA1, 8)); - opB = __SXTB16(__ROR(inB1, 8)); - sum2 = __SMLAD(opA, opB, sum2); - opA = __SXTB16(inA2); - opB = __SXTB16(inB2); - sum3 = __SMLAD(opA, opB, sum3); - opA = __SXTB16(__ROR(inA2, 8)); - opB = __SXTB16(__ROR(inB2, 8)); - sum4 = __SMLAD(opA, opB, sum4); - colCnt--; - } -#else - - while (colCnt) - { - q31_t inA1, inA2, inB1, inB2, opA, opB; - - inB1 = arm_nn_read_q7x4(pB); - pB += ch_im_in; - opB = arm_nn_read_q7x4(pB); - pB += ch_im_in; - inB2 = __PKHBT(opB, inB1, 16); - inB1 = __PKHTB(inB1, opB, 16); - inA1 = arm_nn_read_q7x4(pA); - pA += ch_im_in; - opB = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inA2 = __PKHBT(opB, inA1, 16); - inA1 = __PKHTB(inA1, opB, 16); - opA = __SXTB16(inA1); - opB = __SXTB16(inB1); - sum2 = __SMLAD(opA, opB, sum2); - opA = __SXTB16(__ROR(inA1, 8)); - opB = __SXTB16(__ROR(inB1, 8)); - sum = __SMLAD(opA, opB, sum); - opA = __SXTB16(inA2); - opB = __SXTB16(inB2); - sum4 = __SMLAD(opA, opB, sum4); - opA = __SXTB16(__ROR(inA2, 8)); - opB = __SXTB16(__ROR(inB2, 8)); - sum3 = __SMLAD(opA, opB, sum3); - colCnt--; - } - -#endif /* ARM_MATH_BIG_ENDIAN */ - -#else - -#ifndef ARM_MATH_BIG_ENDIAN - /* - * r0 r1 r2 r3 r4 r5 - * inA1, inA2, inB1, inB2, opA, opB - */ - - asm volatile ("COL_LOOP_%=:\n" - "ldr.w r2, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "ldr.w r5, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "pkhtb r3, r5, r2, ASR #16\n" - "pkhbt r2, r2, r5, LSL #16\n" - "ldr.w r0, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "ldr.w r5, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "pkhtb r1, r5, r0, ASR #16\n" - "pkhbt r0, r0, r5, LSL #16\n" - "sxtb16 r4, r0\n" - "sxtb16 r5, r2\n" - "smlad %[sum], r4, r5, %[sum]\n" - "mov.w r4, r0, ror #8\n" - "mov.w r5, r2, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum2], r4, r5, %[sum2]\n" - "sxtb16 r4, r1\n" - "sxtb16 r5, r3\n" - "smlad %[sum3], r4, r5, %[sum3]\n" - "mov.w r4, r1, ror #8\n" - "mov.w r5, r3, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum4], r4, r5, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n":[sum] - "+r"(sum),[sum2] "+r"(sum2), - [sum3] "+r"(sum3), - [sum4] "+r"(sum4),[pB] "+r"(pB), - [pA] "+r"(pA):[colCnt] - "r"(colCnt),[ch_im_in] "r"(ch_im_in):"r0", "r1", "r2", "r3", "r4", "r5"); -#else - /* - * r0 r1 r2 r3 r4 r5 - * inA1, inA2, inB1, inB2, opA, opB - */ - asm volatile ("COL_LOOP_%=:\n" - "ldr.w r2, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "ldr.w r5, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "pkhbt r3, r5, r2, LSL #16\n" - "pkhtb r2, r2, r5, ASR #16\n" - "ldr.w r0, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "ldr.w r5, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "pkhbt r1, r5, r0, LSL #16\n" - "pkhtb r0, r0, r5, ASR #16\n" - "sxtb16 r4, r0\n" - "sxtb16 r5, r2\n" - "smlad %[sum2], r4, r5, %[sum2]\n" - "mov.w r4, r0, ror #8\n" - "mov.w r5, r2, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum], r4, r5, %[sum]\n" - "sxtb16 r4, r1\n" - "sxtb16 r5, r3\n" - "smlad %[sum4], r4, r5, %[sum4]\n" - "mov.w r4, r1, ror #8\n" - "mov.w r5, r3, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum3], r4, r5, %[sum3]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n":[sum] - "+r"(sum),[sum2] "+r"(sum2), - [sum3] "+r"(sum3), - [sum4] "+r"(sum4),[pB] "+r"(pB), - [pA] "+r"(pA):[colCnt] - "r"(colCnt),[ch_im_in] "r"(ch_im_in):"r0", "r1", "r2", "r3", "r4", "r5"); - -#endif /* ARM_MATH_BIG_ENDIAN */ - -#endif /* USE_INTRINSIC */ - - colCnt = (dim_kernel * dim_kernel) & 0x1; - while (colCnt) - { - union arm_nnword inA, inB; - inA.word = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inB.word = arm_nn_read_q7x4(pB); - pB += ch_im_in; - sum += inA.bytes[0] * inB.bytes[0]; - sum2 += inA.bytes[1] * inB.bytes[1]; - sum3 += inA.bytes[2] * inB.bytes[2]; - sum4 += inA.bytes[3] * inB.bytes[3]; - colCnt--; - } - - *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); - *pOut++ = (q7_t) __SSAT((sum2 >> out_shift), 8); - *pOut++ = (q7_t) __SSAT((sum3 >> out_shift), 8); - *pOut++ = (q7_t) __SSAT((sum4 >> out_shift), 8); - - rowCnt--; - } - - rowCnt = ch_im_out & 0x3; - while (rowCnt) - { - q7_t *pB = colBuffer + row_shift; - const q7_t *pA = wt + row_shift; - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = (dim_kernel * dim_kernel); - - row_shift += 1; - - while (colCnt) - { - q7_t A1 = *pA; - q7_t B1 = *pB; - pA += ch_im_in; - pB += ch_im_in; - sum += A1 * B1; - - colCnt--; - } - *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); - rowCnt--; - } - - /* clear counter and pointers */ - pBuffer = colBuffer; - } - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i_out_y, i_out_x, i_ch_out, i_ker_x, i_ker_y; - int conv_out; - - /* do some checking here, basically ch_im_in == ch_im_out */ - if (ch_im_in != ch_im_out) - { - return ARM_MATH_SIZE_MISMATCH; - } - - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++) - { - // for each output - conv_out = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift); - for (i_ker_y = 0; i_ker_y < dim_kernel; i_ker_y++) - { - for (i_ker_x = 0; i_ker_x < dim_kernel; i_ker_x++) - { - int in_row = stride * i_out_y + i_ker_y - padding; - int in_col = stride * i_out_x + i_ker_x - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - conv_out += - Im_in[(in_row * - dim_im_in + - in_col) * - ch_im_in + - i_ch_out] * wt[(i_ker_y * dim_kernel + i_ker_x) * ch_im_out + i_ch_out]; - } - } - } - Im_out[(i_out_y * dim_im_out + - i_out_x) * ch_im_out + i_ch_out] = (q7_t) __SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; - -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c deleted file mode 100644 index 53143da3e..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c +++ /dev/null @@ -1,413 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_separable_conv_HWC_q7_nonsquare.c - * Description: Q7 depthwise separable convolution function (non-square shape) - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Q7 depthwise separable convolution function (non-square shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding sizes x - * @param[in] padding_y padding sizes y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is equal to ch_im_out - * - */ - -arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, - q7_t * bufferB) -{ - - (void)bufferB; - -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - -/* - * Implementation: - * There are 3 nested loop here: - * Inner loop: calculate each output value with MAC instruction over an accumulator - * Mid loop: loop over different output channel - * Outer loop: loop over different output (x, y) - * - */ - - int16_t i_out_y, i_out_x; - int16_t i_ker_y, i_ker_x; - q7_t *colBuffer = (q7_t *) bufferA; - q7_t *pBuffer = colBuffer; - const q7_t *pBias = bias; - q7_t *pOut = Im_out; - uint16_t rowCnt; - uint16_t row_shift; - - /* do some checking here, basically ch_im_in == ch_im_out */ - if (ch_im_in != ch_im_out) - { - return ARM_MATH_SIZE_MISMATCH; - } - - for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - /* we first do im2col here */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q7(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, ch_im_in); - } else - { - /* arm_copy_q7((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in); */ - memcpy(pBuffer, (q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - /* we will do the computation here for each channel */ - rowCnt = ch_im_out >> 2; - row_shift = 0; - pBias = bias; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = (dim_kernel_x * dim_kernel_y) >> 1; - q7_t *pB = colBuffer + row_shift; - const q7_t *pA = wt + row_shift; - row_shift += 4; - -#ifdef USE_INTRINSIC - -#ifndef ARM_MATH_BIG_ENDIAN - - while (colCnt) - { - q31_t inA1, inA2, inB1, inB2, opA, opB; - - inB1 = arm_nn_read_q7x4(pB); - pB += ch_im_in; - opB = arm_nn_read_q7x4(pB); - pB += ch_im_in; - inB2 = __PKHTB(opB, inB1, 16); - inB1 = __PKHBT(inB1, opB, 16); - inA1 = arm_nn_read_q7x4(pA); - pA += ch_im_in; - opB = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inA2 = __PKHTB(opB, inA1, 16); - inA1 = __PKHBT(inA1, opB, 16); - opA = __SXTB16(inA1); - opB = __SXTB16(inB1); - sum = __SMLAD(opA, opB, sum); - opA = __SXTB16(__ROR(inA1, 8)); - opB = __SXTB16(__ROR(inB1, 8)); - sum2 = __SMLAD(opA, opB, sum2); - opA = __SXTB16(inA2); - opB = __SXTB16(inB2); - sum3 = __SMLAD(opA, opB, sum3); - opA = __SXTB16(__ROR(inA2, 8)); - opB = __SXTB16(__ROR(inB2, 8)); - sum4 = __SMLAD(opA, opB, sum4); - colCnt--; - } -#else - - while (colCnt) - { - q31_t inA1, inA2, inB1, inB2, opA, opB; - - inB1 = arm_nn_read_q7x4(pB); - pB += ch_im_in; - opB = arm_nn_read_q7x4(pB); - pB += ch_im_in; - inB2 = __PKHBT(opB, inB1, 16); - inB1 = __PKHTB(inB1, opB, 16); - inA1 = arm_nn_read_q7x4(pA); - pA += ch_im_in; - opB = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inA2 = __PKHBT(opB, inA1, 16); - inA1 = __PKHTB(inA1, opB, 16); - opA = __SXTB16(inA1); - opB = __SXTB16(inB1); - sum2 = __SMLAD(opA, opB, sum2); - opA = __SXTB16(__ROR(inA1, 8)); - opB = __SXTB16(__ROR(inB1, 8)); - sum = __SMLAD(opA, opB, sum); - opA = __SXTB16(inA2); - opB = __SXTB16(inB2); - sum4 = __SMLAD(opA, opB, sum4); - opA = __SXTB16(__ROR(inA2, 8)); - opB = __SXTB16(__ROR(inB2, 8)); - sum3 = __SMLAD(opA, opB, sum3); - colCnt--; - } - -#endif /* ARM_MATH_BIG_ENDIAN */ - -#else - -#ifndef ARM_MATH_BIG_ENDIAN - // r0 r1 r2 r3 r4 r5 - // inA1, inA2, inB1, inB2, opA, opB - asm volatile ("COL_LOOP:\n" - "ldr.w r2, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "ldr.w r5, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "pkhtb r3, r5, r2, ASR #16\n" - "pkhbt r2, r2, r5, LSL #16\n" - "ldr.w r0, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "ldr.w r5, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "pkhtb r1, r5, r0, ASR #16\n" - "pkhbt r0, r0, r5, LSL #16\n" - "sxtb16 r4, r0\n" - "sxtb16 r5, r2\n" - "smlad %[sum], r4, r5, %[sum]\n" - "mov.w r4, r0, ror #8\n" - "mov.w r5, r2, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum2], r4, r5, %[sum2]\n" - "sxtb16 r4, r1\n" - "sxtb16 r5, r3\n" - "smlad %[sum3], r4, r5, %[sum3]\n" - "mov.w r4, r1, ror #8\n" - "mov.w r5, r3, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum4], r4, r5, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP\n":[sum] "+r"(sum),[sum2] "+r"(sum2),[sum3] "+r"(sum3), - [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt), - [ch_im_in] "r"(ch_im_in):"r0", "r1", "r2", "r3", "r4", "r5"); -#else - // r0 r1 r2 r3 r4 r5 - // inA1, inA2, inB1, inB2, opA, opB - asm volatile ("COL_LOOP:\n" - "ldr.w r2, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "ldr.w r5, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "pkhbt r3, r5, r2, LSL #16\n" - "pkhtb r2, r2, r5, ASR #16\n" - "ldr.w r0, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "ldr.w r5, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "pkhbt r1, r5, r0, LSL #16\n" - "pkhtb r0, r0, r5, ASR #16\n" - "sxtb16 r4, r0\n" - "sxtb16 r5, r2\n" - "smlad %[sum2], r4, r5, %[sum2]\n" - "mov.w r4, r0, ror #8\n" - "mov.w r5, r2, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum], r4, r5, %[sum]\n" - "sxtb16 r4, r1\n" - "sxtb16 r5, r3\n" - "smlad %[sum4], r4, r5, %[sum4]\n" - "mov.w r4, r1, ror #8\n" - "mov.w r5, r3, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum3], r4, r5, %[sum3]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP\n":[sum] "+r"(sum),[sum2] "+r"(sum2),[sum3] "+r"(sum3), - [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt), - [ch_im_in] "r"(ch_im_in):"r0", "r1", "r2", "r3", "r4", "r5"); -#endif /*ARM_MATH_BIG_ENDIAN */ - -#endif /* USE_INTRINSIC */ - - colCnt = (dim_kernel_x * dim_kernel_y) & 0x1; - while (colCnt) - { - union arm_nnword inA, inB; - inA.word = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inB.word = arm_nn_read_q7x4(pB); - pB += ch_im_in; - sum += inA.bytes[0] * inB.bytes[0]; - sum2 += inA.bytes[1] * inB.bytes[1]; - sum3 += inA.bytes[2] * inB.bytes[2]; - sum4 += inA.bytes[3] * inB.bytes[3]; - colCnt--; - } - - *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); - *pOut++ = (q7_t) __SSAT((sum2 >> out_shift), 8); - *pOut++ = (q7_t) __SSAT((sum3 >> out_shift), 8); - *pOut++ = (q7_t) __SSAT((sum4 >> out_shift), 8); - - rowCnt--; - } - - rowCnt = ch_im_out & 0x3; - while (rowCnt) - { - q7_t *pB = colBuffer + row_shift; - const q7_t *pA = wt + row_shift; - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = (dim_kernel_x * dim_kernel_y); - - row_shift += 1; - - while (colCnt) - { - q7_t A1 = *pA; - q7_t B1 = *pB; - pA += ch_im_in; - pB += ch_im_in; - sum += A1 * B1; - - colCnt--; - } - *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); - rowCnt--; - } - - // clear counter and pointers - pBuffer = colBuffer; - } - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i_out_y, i_out_x, i_ch_out; - int i_ker_y, i_ker_x; - - /* do some checking here, basically ch_im_in == ch_im_out */ - if (ch_im_in != ch_im_out) - { - return ARM_MATH_SIZE_MISMATCH; - } - - for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++) - { - // for each output - int conv_out = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift); - for (i_ker_y = 0; i_ker_y < dim_kernel_y; i_ker_y++) - { - for (i_ker_x = 0; i_ker_x < dim_kernel_x; i_ker_x++) - { - int in_row = stride_y * i_out_y + i_ker_y - padding_y; - int in_col = stride_x * i_out_x + i_ker_x - padding_x; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) - { - conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + i_ch_out] * - wt[(i_ker_y * dim_kernel_x + i_ker_x) * ch_im_out + i_ch_out]; - } - } - } - Im_out[(i_out_y * dim_im_out_x + i_out_x) * ch_im_out + i_ch_out] = - (q7_t) __SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - - /* Return to application */ - return ARM_MATH_SUCCESS; - -} - -/** - * @} end of NNConv group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c deleted file mode 100644 index 97511ff37..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_depthwise_conv_s8_core.c - * Description: Depthwise convolution on im2col buffers. - * - * $Date: May 29, 2020 - * $Revision: V.1.0.3 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/* - * Depthwise conv on an im2col buffer where the input channel equals - * output channel. - * - * Refer header file for details. - * - */ - -q7_t *arm_nn_depthwise_conv_s8_core(const q7_t *row, - const q15_t *col, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t kernel_size, - const int32_t *const output_bias, - q7_t *out) -{ -#if defined(ARM_MATH_MVEI) - int32_t ch_per_loop = num_ch / 4; - - const int32_t *bias = output_bias; - int8_t *out_tmp = out; - - int32_t idx = 0; - - while (ch_per_loop > 0) - { - int32x4_t ip_0; - int32x4_t ip_1; - int32_t ker_loop = kernel_size / 3; - int32x4_t out_0 = vldrwq_s32(bias); - int32x4_t out_1 = out_0; - bias += 4; - - const int32_t offset = idx * 4; - const int8_t *row_0 = row + offset; - const int16_t *col_0 = col + offset; - const int16_t *col_1 = col + kernel_size * num_ch + offset; - - int32x4_t ker_0 = vldrbq_s32(row_0); - - while (ker_loop > 0) - { - const int8_t *row_1 = row_0 + num_ch; - const int8_t *row_2 = row_0 + 2 * num_ch; - const int32x4_t ker_1 = vldrbq_s32(row_1); - const int32x4_t ker_2 = vldrbq_s32(row_2); - - ip_0 = vldrhq_s32(col_0); - ip_1 = vldrhq_s32(col_1); - col_0 += num_ch; - col_1 += num_ch; - - out_0 += vmulq_s32(ip_0, ker_0); - out_1 += vmulq_s32(ip_1, ker_0); - - ip_0 = vldrhq_s32(col_0); - ip_1 = vldrhq_s32(col_1); - col_0 += num_ch; - col_1 += num_ch; - - out_0 += vmulq_s32(ip_0, ker_1); - out_1 += vmulq_s32(ip_1, ker_1); - - ip_0 = vldrhq_s32(col_0); - ip_1 = vldrhq_s32(col_1); - col_0 += num_ch; - col_1 += num_ch; - - out_0 += vmulq_s32(ip_0, ker_2); - out_1 += vmulq_s32(ip_1, ker_2); - row_0 += 3 * num_ch; - - ker_0 = vldrbq_s32(row_0); - ker_loop--; - } - - idx++; - /* Handle tail kernel elements */ - ker_loop = kernel_size - ((kernel_size / 3) * 3); - while (ker_loop > 0) - { - ip_0 = vldrhq_s32(col_0); - ip_1 = vldrhq_s32(col_1); - - out_0 += vmulq_s32(ip_0, ker_0); - out_1 += vmulq_s32(ip_1, ker_0); - - col_0 += num_ch; - col_1 += num_ch; - - ip_0 = vldrhq_s32(col_0); - ip_1 = vldrhq_s32(col_1); - - row_0 += num_ch; - ker_0 = vldrbq_s32(row_0); - ker_loop--; - } - const int32x4_t mult = vldrwq_s32(out_mult); - const int32x4_t shift = vldrwq_s32(out_shift); - out_mult += 4; - out_shift += 4; - - out_0 = arm_requantize_mve_32x4(out_0, mult, shift); - out_1 = arm_requantize_mve_32x4(out_1, mult, shift); - - out_0 = vaddq_n_s32(out_0, out_offset); - out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); - out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); - vstrbq_s32(out_tmp, out_0); - - out_1 = vaddq_n_s32(out_1, out_offset); - out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); - out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); - vstrbq_s32(out_tmp + num_ch, out_1); - - out_tmp += 4; - ch_per_loop--; - } - - int32_t tail_ch = num_ch & 3; - if (tail_ch != 0) - { - int32_t ch_idx = (num_ch & ~3); - int32x4_t col_0_sum; - int32x4_t col_1_sum; - - const int32_t single_buffer_size = kernel_size * num_ch; - for (int i = 0; i < tail_ch; i++) - { - const int16_t *col_pos_0 = col + ch_idx; - const int16_t *col_pos_1 = col_pos_0 + single_buffer_size; - - const int8_t *row_pos = row + ch_idx; - int32_t sum_0 = bias[i]; - int32_t sum_1 = bias[i]; - - for (int j = 0; j < kernel_size; j++) - { - const int8_t row_val = row_pos[j * num_ch]; - sum_0 += row_val * col_pos_0[j * num_ch]; - sum_1 += row_val * col_pos_1[j * num_ch]; - } - col_0_sum[i] = sum_0; - col_1_sum[i] = sum_1; - - ch_idx++; - } - const mve_pred16_t p = vctp32q((uint32_t)tail_ch); - const int32x4_t mult = vldrwq_z_s32(out_mult, p); - const int32x4_t shift = vldrwq_z_s32(out_shift, p); - - col_0_sum = arm_requantize_mve_32x4(col_0_sum, mult, shift); - col_1_sum = arm_requantize_mve_32x4(col_1_sum, mult, shift); - - col_0_sum = vaddq_n_s32(col_0_sum, out_offset); - col_0_sum = vmaxq_s32(col_0_sum, vdupq_n_s32(activation_min)); - col_0_sum = vminq_s32(col_0_sum, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out_tmp, col_0_sum, p); - - col_1_sum = vaddq_n_s32(col_1_sum, out_offset); - col_1_sum = vmaxq_s32(col_1_sum, vdupq_n_s32(activation_min)); - col_1_sum = vminq_s32(col_1_sum, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out_tmp + num_ch, col_1_sum, p); - - out_tmp += tail_ch; - } - - return out_tmp + num_ch; -#else - (void)row; - (void)col; - (void)num_ch; - (void)out_shift; - (void)out_mult; - (void)out_offset; - (void)activation_min; - (void)activation_max; - (void)kernel_size; - (void)output_bias; - (void)out; - return NULL; -#endif -} diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c deleted file mode 100644 index f03e5ff35..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_kernel_q7_q15.c - * Description: Matrix-multiplication function for convolution - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - - /** - * @brief Matrix-multiplication function for convolution. - * - * @details Refer to header file for details. - * - */ - -q7_t *arm_nn_mat_mult_kernel_q7_q15(const q7_t * pA, - const q15_t * pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut) -{ -#if defined (ARM_MATH_DSP) - /* set up the second output pointers */ - q7_t *pOut2 = pOut + ch_im_out; - const q7_t *pBias = bias; - - uint16_t rowCnt = ch_im_out >> 1; - /* this loop over rows in A */ - while (rowCnt) - { - /* setup pointers for B */ - const q15_t *pB = pInBuffer; - const q15_t *pB2 = pB + numCol_A; - - /* align the second pointer for A */ - const q7_t *pA2 = pA + numCol_A; - - /* init the sum with bias */ - q31_t sum = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = numCol_A >> 2; - /* accumulate over the vector */ - while (colCnt) - { - q31_t inA11, inA12, inA21, inA22; - - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB2); - - pA = read_and_pad(pA, &inA11, &inA12); - pA2 = read_and_pad(pA2, &inA21, &inA22); - - sum = __SMLAD(inA11, inB1, sum); - sum2 = __SMLAD(inA11, inB2, sum2); - sum3 = __SMLAD(inA21, inB1, sum3); - sum4 = __SMLAD(inA21, inB2, sum4); - - inB1 = arm_nn_read_q15x2_ia(&pB); - inB2 = arm_nn_read_q15x2_ia(&pB2); - - sum = __SMLAD(inA12, inB1, sum); - sum2 = __SMLAD(inA12, inB2, sum2); - sum3 = __SMLAD(inA22, inB1, sum3); - sum4 = __SMLAD(inA22, inB2, sum4); - - colCnt--; - } /* while over colCnt */ - colCnt = numCol_A & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - q7_t inA2 = *pA2++; - q15_t inB2 = *pB2++; - - sum += inA1 * inB1; - sum2 += inA1 * inB2; - sum3 += inA2 * inB1; - sum4 += inA2 * inB2; - colCnt--; - } /* while over colCnt */ - *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); - *pOut++ = (q7_t) __SSAT((sum3 >> out_shift), 8); - *pOut2++ = (q7_t) __SSAT((sum2 >> out_shift), 8); - *pOut2++ = (q7_t) __SSAT((sum4 >> out_shift), 8); - - /* skip the row computed with A2 */ - pA += numCol_A; - rowCnt--; - } /* for over ch_im_out */ - - /* compute left-over row if any */ - if (ch_im_out & 0x1) - { - /* setup pointers for B */ - const q15_t *pB = pInBuffer; - const q15_t *pB2 = pB + numCol_A; - - /* load the bias */ - q31_t sum = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = numCol_A >> 2; - while (colCnt) - { - q31_t inA11, inA12; - - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB2); - - pA = read_and_pad(pA, &inA11, &inA12); - - sum = __SMLAD(inA11, inB1, sum); - sum2 = __SMLAD(inA11, inB2, sum2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - inB2 = arm_nn_read_q15x2_ia(&pB2); - - sum = __SMLAD(inA12, inB1, sum); - sum2 = __SMLAD(inA12, inB2, sum2); - - colCnt--; - } - colCnt = numCol_A & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - q15_t inB2 = *pB2++; - - sum += inA1 * inB1; - sum2 += inA1 * inB2; - colCnt--; - } - - *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); - *pOut2++ = (q7_t) __SSAT((sum2 >> out_shift), 8); - } - - pOut += ch_im_out; - - /* return the new output pointer with offset */ - return pOut; -#else - /* To be completed */ - return NULL; -#endif /* ARM_MATH_DSP */ - -} diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c deleted file mode 100644 index 31aa334c8..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_kernel_q7_q15_reordered.c - * Description: Matrix-multiplication function for convolution with reordered columns - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_math.h" - - /** - * @brief Matrix-multiplication function for convolution with re-ordered input. - * - * @details Refer to header file for details. - * - */ - -q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t * pA, - const q15_t * pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut) -{ - -#if defined (ARM_MATH_DSP) - /* set up the second output pointers */ - q7_t *pOut2 = pOut + ch_im_out; - int i; - - /* this loop over rows in A */ - for (i = 0; i < ch_im_out; i += 2) - { - /* setup pointers for B */ - const q15_t *pB = pInBuffer; - const q15_t *pB2 = pB + numCol_A; - - /* align the second pointer for A */ - const q7_t *pA2 = pA + numCol_A; - - /* init the sum with bias */ - q31_t sum = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = numCol_A >> 2; - /* accumulate over the vector */ - while (colCnt) - { - q31_t inA11, inA12, inA21, inA22; - - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB2); - - pA = read_and_pad_reordered(pA, &inA11, &inA12); - pA2 = read_and_pad_reordered(pA2, &inA21, &inA22); - - sum = __SMLAD(inA11, inB1, sum); - sum2 = __SMLAD(inA11, inB2, sum2); - sum3 = __SMLAD(inA21, inB1, sum3); - sum4 = __SMLAD(inA21, inB2, sum4); - - inB1 = arm_nn_read_q15x2_ia(&pB); - inB2 = arm_nn_read_q15x2_ia(&pB2); - - sum = __SMLAD(inA12, inB1, sum); - sum2 = __SMLAD(inA12, inB2, sum2); - sum3 = __SMLAD(inA22, inB1, sum3); - sum4 = __SMLAD(inA22, inB2, sum4); - - colCnt--; - } /* while over colCnt */ - colCnt = numCol_A & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - q7_t inA2 = *pA2++; - q15_t inB2 = *pB2++; - - sum += inA1 * inB1; - sum2 += inA1 * inB2; - sum3 += inA2 * inB1; - sum4 += inA2 * inB2; - colCnt--; - } /* while over colCnt */ - *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); - *pOut++ = (q7_t) __SSAT((sum3 >> out_shift), 8); - *pOut2++ = (q7_t) __SSAT((sum2 >> out_shift), 8); - *pOut2++ = (q7_t) __SSAT((sum4 >> out_shift), 8); - - /* skip the row computed with A2 */ - pA += numCol_A; - } /* for over ch_im_out */ - - pOut += ch_im_out; - - /* return the new output pointer with offset */ - return pOut; -#else - /* To be completed */ - return NULL; -#endif /* ARM_MATH_DSP */ -} diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c deleted file mode 100644 index ebdfaaa7e..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c +++ /dev/null @@ -1,391 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_kernel_s8_s16.c - * Description: Matrix-multiplication function for convolution - * - * $Date: May 29, 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/* - * Matrix-multiplication function for convolution with per-channel requantization. - * - * Refer header file for details. - * - */ - -q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a, - const q15_t *input_b, - const uint16_t output_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int16_t activation_min, - const int16_t activation_max, - const uint16_t num_col_a, - const int32_t *const output_bias, - q7_t *out_0) -{ -#if defined(ARM_MATH_MVEI) -#define ROW_PER_LOOP (4) -#define COL_PER_LOOP (8) - - const q7_t *ip_a0_s8 = input_a; - q7_t *out_1 = out_0 + output_ch; - - const int32_t *bias = output_bias; - - int32_t row_count = output_ch / ROW_PER_LOOP; - - while (row_count) - { - const q15_t *ip_b0_s16 = input_b; - const q15_t *ip_b1_s16 = input_b + num_col_a; - - const q7_t *ip_a1_s8 = ip_a0_s8 + num_col_a; - const q7_t *ip_a2_s8 = ip_a0_s8 + num_col_a * 2; - const q7_t *ip_a3_s8 = ip_a0_s8 + num_col_a * 3; - - q31_t ch_0_out_n = bias[0]; - q31_t ch_1_out_n = bias[1]; - q31_t ch_2_out_n = bias[2]; - q31_t ch_3_out_n = bias[3]; - - q31_t ch_0_out_n1 = ch_0_out_n; - q31_t ch_1_out_n1 = ch_1_out_n; - q31_t ch_2_out_n1 = ch_2_out_n; - q31_t ch_3_out_n1 = ch_3_out_n; - bias += 4; - - int32_t col_count = num_col_a / COL_PER_LOOP; - - while (col_count) - { - // Load inputs - const int16x8_t ip_b0 = vld1q_s16(ip_b0_s16); - ip_b0_s16 += COL_PER_LOOP; - const int16x8_t ip_b1 = vld1q_s16(ip_b1_s16); - ip_b1_s16 += COL_PER_LOOP; - - // Load filters - const int16x8_t ip_a0 = vldrbq_s16(ip_a0_s8); - ip_a0_s8 += COL_PER_LOOP; - const int16x8_t ip_a1 = vldrbq_s16(ip_a1_s8); - ip_a1_s8 += COL_PER_LOOP; - const int16x8_t ip_a2 = vldrbq_s16(ip_a2_s8); - ip_a2_s8 += COL_PER_LOOP; - const int16x8_t ip_a3 = vldrbq_s16(ip_a3_s8); - ip_a3_s8 += COL_PER_LOOP; - - // MAC - ch_0_out_n += vmladavq_s16(ip_b0, ip_a0); - ch_1_out_n += vmladavq_s16(ip_b0, ip_a1); - ch_2_out_n += vmladavq_s16(ip_b0, ip_a2); - ch_3_out_n += vmladavq_s16(ip_b0, ip_a3); - ch_0_out_n1 += vmladavq_s16(ip_b1, ip_a0); - ch_1_out_n1 += vmladavq_s16(ip_b1, ip_a1); - ch_2_out_n1 += vmladavq_s16(ip_b1, ip_a2); - ch_3_out_n1 += vmladavq_s16(ip_b1, ip_a3); - - col_count--; - } - - /* Handle tail */ - col_count = (num_col_a & (COL_PER_LOOP - 1)) - 1; - while (col_count >= 0) - { - const int32_t b0 = ip_b0_s16[col_count]; - const int32_t b1 = ip_b1_s16[col_count]; - - ch_0_out_n += b0 * ip_a0_s8[col_count]; - ch_1_out_n += b0 * ip_a1_s8[col_count]; - ch_2_out_n += b0 * ip_a2_s8[col_count]; - ch_3_out_n += b0 * ip_a3_s8[col_count]; - - ch_0_out_n1 += b1 * ip_a0_s8[col_count]; - ch_1_out_n1 += b1 * ip_a1_s8[col_count]; - ch_2_out_n1 += b1 * ip_a2_s8[col_count]; - ch_3_out_n1 += b1 * ip_a3_s8[col_count]; - col_count--; - } - ip_a0_s8 += (num_col_a & (COL_PER_LOOP - 1)); - - int32x4_t out_vec_0; - int32x4_t out_vec_1; - out_vec_0[0] = ch_0_out_n; - out_vec_0[1] = ch_1_out_n; - out_vec_0[2] = ch_2_out_n; - out_vec_0[3] = ch_3_out_n; - - out_vec_1[0] = ch_0_out_n1; - out_vec_1[1] = ch_1_out_n1; - out_vec_1[2] = ch_2_out_n1; - out_vec_1[3] = ch_3_out_n1; - - int32x4_t mult = vldrwq_s32(out_mult); - int32x4_t shift = vldrwq_s32(out_shift); - out_mult += ROW_PER_LOOP; - out_shift += ROW_PER_LOOP; - - out_vec_0 = arm_requantize_mve_32x4(out_vec_0, mult, shift); - out_vec_1 = arm_requantize_mve_32x4(out_vec_1, mult, shift); - - out_vec_0 = vaddq_n_s32(out_vec_0, out_offset); - out_vec_0 = vmaxq_s32(out_vec_0, vdupq_n_s32(activation_min)); - out_vec_0 = vminq_s32(out_vec_0, vdupq_n_s32(activation_max)); - vstrbq_s32(out_0, out_vec_0); - out_0 += ROW_PER_LOOP; - - out_vec_1 = vaddq_n_s32(out_vec_1, out_offset); - out_vec_1 = vmaxq_s32(out_vec_1, vdupq_n_s32(activation_min)); - out_vec_1 = vminq_s32(out_vec_1, vdupq_n_s32(activation_max)); - vstrbq_s32(out_1, out_vec_1); - out_1 += ROW_PER_LOOP; - row_count--; - ip_a0_s8 += (num_col_a * 3); - } - - row_count = output_ch & (ROW_PER_LOOP - 1); - - if (row_count) - { - ip_a0_s8 = input_a + num_col_a * (output_ch & ~3); - const mve_pred16_t p = vctp32q((uint32_t)row_count); - int32x4_t out_vec_0 = vdupq_n_s32(0); - int32x4_t out_vec_1 = vdupq_n_s32(0); - int32x4_t mult_tail; - int32x4_t shift_tail; - - for (int i_ch = 0; i_ch < row_count; i_ch++) - { - int32_t output_0 = bias[i_ch]; - int32_t output_1 = bias[i_ch]; - const q15_t *ip_b0_s16 = input_b; - const q15_t *ip_b1_s16 = input_b + num_col_a; - - for (int i_idx = 0; i_idx < num_col_a; i_idx++) - { - output_0 += ip_b0_s16[i_idx] * ip_a0_s8[i_idx]; - output_1 += ip_b1_s16[i_idx] * ip_a0_s8[i_idx]; - } - - ip_a0_s8 += num_col_a; - out_vec_0[i_ch] = output_0; - out_vec_1[i_ch] = output_1; - mult_tail[i_ch] = out_mult[i_ch]; - shift_tail[i_ch] = out_shift[i_ch]; - } - out_vec_0 = arm_requantize_mve_32x4(out_vec_0, mult_tail, shift_tail); - out_vec_1 = arm_requantize_mve_32x4(out_vec_1, mult_tail, shift_tail); - - out_vec_0 = vaddq_n_s32(out_vec_0, out_offset); - out_vec_0 = vmaxq_s32(out_vec_0, vdupq_n_s32(activation_min)); - out_vec_0 = vminq_s32(out_vec_0, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out_0, out_vec_0, p); - - out_vec_1 = vaddq_n_s32(out_vec_1, out_offset); - out_vec_1 = vmaxq_s32(out_vec_1, vdupq_n_s32(activation_min)); - out_vec_1 = vminq_s32(out_vec_1, vdupq_n_s32(activation_max)); - - vstrbq_p_s32(out_1, out_vec_1, p); - out_1 += row_count; - } - - return out_1; - -#elif defined(ARM_MATH_DSP) - /* set up the second output pointers */ - q7_t *out_1 = out_0 + output_ch; - const int32_t *bias = output_bias; - - uint16_t row_count = output_ch / 2; - const q7_t *ip_a0 = input_a; - /* this loop over rows in A */ - while (row_count) - { - /* setup pointers for B */ - const q15_t *ip_b0 = input_b; - const q15_t *ip_b1 = ip_b0 + num_col_a; - - /* align the second pointer for A */ - const q7_t *ip_a1 = ip_a0 + num_col_a; - - /* Init accumulator with bias for channel N and N + 1 */ - q31_t ch_0_out_0 = *bias; - q31_t ch_0_out_1 = *bias++; - q31_t ch_1_out_0 = *bias; - q31_t ch_1_out_1 = *bias++; - - uint16_t col_count = num_col_a / 4; - /* accumulate over the vector */ - while (col_count) - { - q31_t a01, a02, a11, a12; - q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0); - q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ip_a0 = read_and_pad(ip_a0, &a01, &a02); - ip_a1 = read_and_pad(ip_a1, &a11, &a12); - - ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1); - ch_1_out_0 = __SMLAD(a11, b0, ch_1_out_0); - ch_1_out_1 = __SMLAD(a11, b1, ch_1_out_1); - - b0 = arm_nn_read_q15x2_ia(&ip_b0); - b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1); - ch_1_out_0 = __SMLAD(a12, b0, ch_1_out_0); - ch_1_out_1 = __SMLAD(a12, b1, ch_1_out_1); - - col_count--; - } /* while over col_count */ - col_count = num_col_a & 0x3; - while (col_count) - { - q7_t a0 = *ip_a0++; - q15_t b0 = *ip_b0++; - q7_t a1 = *ip_a1++; - q15_t b1 = *ip_b1++; - - ch_0_out_0 += a0 * b0; - ch_0_out_1 += a0 * b1; - ch_1_out_0 += a1 * b0; - ch_1_out_1 += a1 * b1; - col_count--; - } /* while over col_count */ - - ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); - ch_0_out_0 += out_offset; - ch_0_out_0 = MAX(ch_0_out_0, activation_min); - ch_0_out_0 = MIN(ch_0_out_0, activation_max); - *out_0++ = (q7_t)ch_0_out_0; - - ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); - ch_0_out_1 += out_offset; - ch_0_out_1 = MAX(ch_0_out_1, activation_min); - ch_0_out_1 = MIN(ch_0_out_1, activation_max); - *out_1++ = (q7_t)ch_0_out_1; - out_mult++; - out_shift++; - - ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift); - ch_1_out_0 += out_offset; - ch_1_out_0 = MAX(ch_1_out_0, activation_min); - ch_1_out_0 = MIN(ch_1_out_0, activation_max); - *out_0++ = (q7_t)ch_1_out_0; - - ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift); - ch_1_out_1 += out_offset; - ch_1_out_1 = MAX(ch_1_out_1, activation_min); - ch_1_out_1 = MIN(ch_1_out_1, activation_max); - *out_1++ = (q7_t)ch_1_out_1; - out_mult++; - out_shift++; - - /* skip row */ - ip_a0 += num_col_a; - row_count--; - } - - /* compute the last odd numbered row if any */ - if (output_ch & 0x1) - { - /* setup pointers for B */ - const q15_t *ip_b0 = input_b; - const q15_t *ip_b1 = ip_b0 + num_col_a; - - /* load the bias */ - q31_t ch_0_out_0 = *bias; - q31_t ch_0_out_1 = *bias++; - - uint16_t col_count = num_col_a >> 2; - while (col_count) - { - q31_t a01, a02; - q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0); - q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ip_a0 = read_and_pad(ip_a0, &a01, &a02); - - ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1); - - b0 = arm_nn_read_q15x2_ia(&ip_b0); - b1 = arm_nn_read_q15x2_ia(&ip_b1); - ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1); - - col_count--; - } - col_count = num_col_a & 0x3; - while (col_count) - { - q7_t a0 = *ip_a0++; - q15_t b0 = *ip_b0++; - q15_t b1 = *ip_b1++; - - ch_0_out_0 += a0 * b0; - ch_0_out_1 += a0 * b1; - col_count--; - } - ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); - ch_0_out_0 += out_offset; - ch_0_out_0 = MAX(ch_0_out_0, activation_min); - ch_0_out_0 = MIN(ch_0_out_0, activation_max); - *out_0++ = (q7_t)ch_0_out_0; - - ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); - ch_0_out_1 += out_offset; - ch_0_out_1 = MAX(ch_0_out_1, activation_min); - ch_0_out_1 = MIN(ch_0_out_1, activation_max); - *out_1++ = (q7_t)ch_0_out_1; - out_mult++; - out_shift++; - } - - out_0 += output_ch; - - /* return the new output pointer with offset */ - return out_0; -#else - (void)input_a; - (void)input_b; - (void)output_ch; - (void)out_shift; - (void)out_mult; - (void)out_offset; - (void)activation_min; - (void)activation_max; - (void)num_col_a; - (void)output_bias; - (void)out_0; - /* To be completed */ - return NULL; -#endif -} diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c deleted file mode 100644 index a25b1ffe4..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_kernel_s8_s16_reordered.c - * Description: Matrix-multiplication function for convolution with reordered columns - * - * $Date: February 27, 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_math.h" - -/* - * Matrix-multiplication with re-ordered input and bias inputs for convolution with per-channel - * requantization. The re-ordering is a consequence of sign extension is done by the SXTB16 command. - * - * Refer header file for details. This function differs from arm_nn_mat_mult_kernel_s8_s16(), in that it uses - * read_and_pad_reordered() instead of arm_nn_mat_mult_kernel_s8_s16(). Investigating the cycles impact and - * unifying these two functions is a potential future improvement. - * - */ - -q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a, - const q15_t *input_b, - const uint16_t output_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int16_t activation_min, - const int16_t activation_max, - const uint16_t num_col_a, - const int32_t *const output_bias, - q7_t *out_0) -{ -#if defined(ARM_MATH_DSP) - /* set up the second output pointers */ - q7_t *out_1 = out_0 + output_ch; - const int32_t *bias = output_bias; - - uint16_t row_count = output_ch / 2; - const q7_t *ip_a0 = input_a; - /* this loop over rows in A */ - while (row_count) - { - /* setup pointers for B */ - const q15_t *ip_b0 = input_b; - const q15_t *ip_b1 = ip_b0 + num_col_a; - - /* align the second pointer for A */ - const q7_t *ip_a1 = ip_a0 + num_col_a; - - /* Init accumulator with bias for channel N and N + 1 */ - q31_t ch_0_out_0 = *bias; - q31_t ch_0_out_1 = *bias++; - q31_t ch_1_out_0 = *bias; - q31_t ch_1_out_1 = *bias++; - - uint16_t col_count = num_col_a / 4; - /* accumulate over the vector */ - while (col_count) - { - q31_t a01, a02, a11, a12; - q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0); - q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02); - ip_a1 = read_and_pad_reordered(ip_a1, &a11, &a12); - - ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1); - ch_1_out_0 = __SMLAD(a11, b0, ch_1_out_0); - ch_1_out_1 = __SMLAD(a11, b1, ch_1_out_1); - - b0 = arm_nn_read_q15x2_ia(&ip_b0); - b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1); - ch_1_out_0 = __SMLAD(a12, b0, ch_1_out_0); - ch_1_out_1 = __SMLAD(a12, b1, ch_1_out_1); - - col_count--; - } /* while over col_count */ - - ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); - ch_0_out_0 += out_offset; - ch_0_out_0 = MAX(ch_0_out_0, activation_min); - ch_0_out_0 = MIN(ch_0_out_0, activation_max); - *out_0++ = (q7_t)ch_0_out_0; - - ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); - ch_0_out_1 += out_offset; - ch_0_out_1 = MAX(ch_0_out_1, activation_min); - ch_0_out_1 = MIN(ch_0_out_1, activation_max); - *out_1++ = (q7_t)ch_0_out_1; - out_mult++; - out_shift++; - - ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift); - ch_1_out_0 += out_offset; - ch_1_out_0 = MAX(ch_1_out_0, activation_min); - ch_1_out_0 = MIN(ch_1_out_0, activation_max); - *out_0++ = (q7_t)ch_1_out_0; - - ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift); - ch_1_out_1 += out_offset; - ch_1_out_1 = MAX(ch_1_out_1, activation_min); - ch_1_out_1 = MIN(ch_1_out_1, activation_max); - *out_1++ = (q7_t)ch_1_out_1; - out_mult++; - out_shift++; - - /* skip row */ - ip_a0 += num_col_a; - row_count--; - } - - if (output_ch & 1) - { - /* setup pointers for B */ - const q15_t *ip_b0 = input_b; - const q15_t *ip_b1 = ip_b0 + num_col_a; - - /* Init accumulator with bias for channel N + 1 */ - q31_t ch_0_out_0 = *bias; - q31_t ch_0_out_1 = ch_0_out_0; - - int32_t col_count = num_col_a / 4; - while (col_count) - { - q31_t a01, a02; - q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0); - q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02); - - ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1); - - b0 = arm_nn_read_q15x2_ia(&ip_b0); - b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1); - - col_count--; - } /* while over col_count */ - - ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); - ch_0_out_0 += out_offset; - ch_0_out_0 = MAX(ch_0_out_0, activation_min); - ch_0_out_0 = MIN(ch_0_out_0, activation_max); - *out_0++ = (q7_t)ch_0_out_0; - - ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); - ch_0_out_1 += out_offset; - ch_0_out_1 = MAX(ch_0_out_1, activation_min); - ch_0_out_1 = MIN(ch_0_out_1, activation_max); - *out_1++ = (q7_t)ch_0_out_1; - } - - out_0 += output_ch; - - /* return the new output pointer with offset */ - return out_0; -#else - (void)input_a; - (void)input_b; - (void)output_ch; - (void)out_shift; - (void)out_mult; - (void)out_offset; - (void)activation_min; - (void)activation_max; - (void)num_col_a; - (void)output_bias; - (void)out_0; - /* To be completed */ - return NULL; -#endif -} diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c deleted file mode 100644 index 6af509f36..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_s8.c - * Description: General Matrix-multiplication function - * - * $Date: July 27, 2020 - * $Revision: V.2.0.4 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/* - * s8 General matrix multiplication function with per-channel requantization for upto 4 column batches. - * - * Refer header file for details. - * - */ - -q7_t *arm_nn_mat_mult_s8(const q7_t *input_row, - const q7_t *input_col, - const uint16_t output_ch, - const uint16_t col_batches, - const int32_t *output_shift, - const int32_t *output_mult, - const int32_t out_offset, - const int32_t col_offset, - const int32_t row_offset, - const int16_t activation_min, - const int16_t activation_max, - const uint16_t row_len, - const int32_t *const bias, - q7_t *out) -{ -#if defined(ARM_MATH_MVEI) - (void)row_offset; - if (col_batches == 4) - { - for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - int32_t row_len_tmp = row_len; - const int8_t *ip_r0 = input_row + (i_out_ch * row_len); - const int8_t *ip_c0 = input_col; - const int8_t *ip_c1 = input_col + row_len; - const int8_t *ip_c2 = input_col + (2 * row_len); - const int8_t *ip_c3 = input_col + (3 * row_len); - - int32_t acc_0 = 0; - int32_t acc_1 = 0; - int32_t acc_2 = 0; - int32_t acc_3 = 0; - const int32_t row_loop_cnt = (row_len + 7) / 8; - - for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++) - { - mve_pred16_t p = vctp16q((uint32_t)row_len_tmp); - const int16x8_t offset = vdupq_m_n_s16(vuninitializedq_s16(), col_offset, p); - row_len_tmp -= 8; - - int16x8_t r0 = vldrbq_z_s16(ip_r0, p); - ip_r0 += 8; - - int16x8_t c0 = vldrbq_z_s16(ip_c0, p); - ip_c0 += 8; - c0 = vaddq_m_s16(vuninitializedq_s16(), c0, offset, p); - - int16x8_t c1 = vldrbq_z_s16(ip_c1, p); - ip_c1 += 8; - c1 = vaddq_m_s16(vuninitializedq_s16(), c1, offset, p); - - int16x8_t c2 = vldrbq_z_s16(ip_c2, p); - ip_c2 += 8; - c2 = vaddq_m_s16(vuninitializedq_s16(), c2, offset, p); - - int16x8_t c3 = vldrbq_z_s16(ip_c3, p); - ip_c3 += 8; - c3 = vaddq_m_s16(vuninitializedq_s16(), c3, offset, p); - - acc_0 = vmladavaq_p_s16(acc_0, r0, c0, p); - acc_1 = vmladavaq_p_s16(acc_1, r0, c1, p); - acc_2 = vmladavaq_p_s16(acc_2, r0, c2, p); - acc_3 = vmladavaq_p_s16(acc_3, r0, c3, p); - } - - int32x4_t res = {acc_0, acc_1, acc_2, acc_3}; - if (bias) - { - res = vaddq_n_s32(res, bias[i_out_ch]); - } - res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]); - res = vaddq_n_s32(res, out_offset); - - res = vmaxq_s32(res, vdupq_n_s32(activation_min)); - res = vminq_s32(res, vdupq_n_s32(activation_max)); - - const uint32x4_t scatter_offset = {0, output_ch, output_ch * 2, output_ch * 3}; - vstrbq_scatter_offset_s32(&out[i_out_ch], scatter_offset, res); - } - out += 4 * output_ch; - } - else - { - for (int i_col_batch = (col_batches & ~0x3); i_col_batch < (col_batches & 0x3); i_col_batch++) - { - for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - int32_t row_len_tmp = row_len; - - const int8_t *ip_r0 = input_row + (i_out_ch * row_len); - const int8_t *ip_c0 = input_col + (i_col_batch * row_len); - int32_t acc_0 = 0; - const int32_t row_loop_cnt = (row_len + 7) / 8; - - for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++) - { - const mve_pred16_t p = vctp16q((uint32_t)row_len_tmp); - const int16x8_t offset = vdupq_m_n_s16(vuninitializedq_s16(), col_offset, p); - row_len_tmp -= 8; - - int16x8_t r0 = vldrbq_z_s16(ip_r0, p); - ip_r0 += 8; - int16x8_t c0 = vldrbq_z_s16(ip_c0, p); - ip_c0 += 8; - - c0 = vaddq_m_s16(vuninitializedq_s16(), c0, offset, p); - acc_0 = vmladavaq_p_s16(acc_0, r0, c0, p); - } - - if (bias) - { - acc_0 += bias[i_out_ch]; - } - acc_0 = arm_nn_requantize(acc_0, output_mult[i_out_ch], output_shift[i_out_ch]); - acc_0 += out_offset; - acc_0 = MAX(acc_0, activation_min); - acc_0 = MIN(acc_0, activation_max); - out[i_out_ch] = (q7_t)acc_0; - } - out += output_ch; - } - } - return out; - -#else - (void)input_row; - (void)input_col; - (void)output_ch; - (void)col_batches; - (void)output_shift; - (void)output_mult; - (void)out_offset; - (void)col_offset; - (void)row_offset; - (void)activation_min; - (void)activation_max; - (void)row_len; - (void)bias; - (void)out; - return NULL; -#endif -} diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c deleted file mode 100644 index ae2287c8d..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_mat_q7_vec_q15.c - * Description: Mixed Q15-Q7 fully-connected layer function - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - - /** - * @brief Mixed Q15-Q7 fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * vec_buffer size: 0 - * - * Q7_Q15 version of the fully connected layer - * - * Weights are in q7_t and Activations are in q15_t - * - */ - -arm_status -arm_fully_connected_mat_q7_vec_q15(const q15_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q15_t * pOut, - q15_t * vec_buffer) -{ - (void)vec_buffer; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q7_t *pB = pM; - const q7_t *pB2; - q15_t *pO = pOut; - const q7_t *pBias = bias; - const q15_t *pA = pV; - - uint16_t rowCnt = num_of_rows >> 1; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - pB2 = pB + dim_vec; - - while (colCnt) - { - q31_t inV, inM11, inM12, inM21, inM22; - pB = read_and_pad(pB, &inM11, &inM12); - pB2 = read_and_pad(pB2, &inM21, &inM22); - - inV = arm_nn_read_q15x2_ia(&pA); - - sum = __SMLAD(inV, inM11, sum); - sum2 = __SMLAD(inV, inM21, sum2); - - inV = arm_nn_read_q15x2_ia(&pA); - - sum = __SMLAD(inV, inM12, sum); - sum2 = __SMLAD(inV, inM22, sum2); - - colCnt--; - } - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - q7_t inM2 = *pB2++; - - sum += inV * inM; - sum2 += inV * inM2; - colCnt--; - } /* while over colCnt */ - *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16)); - *pO++ = (q15_t) (__SSAT((sum2 >> out_shift), 16)); - - /*adjust the pointers and counters */ - pB += dim_vec; - rowCnt--; - } - - /* left-over part of the rows */ - rowCnt = num_of_rows & 0x1; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - - while (colCnt) - { - q31_t inV1, inV2, inM11, inM12; - - pB = read_and_pad(pB, &inM11, &inM12); - - inV1 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV1, inM11, sum); - - inV2 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV2, inM12, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - sum += inV * inM; - colCnt--; - } - - *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16)); - - rowCnt--; - } - -#else - int i, j; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - for (i = 0; i < num_of_rows; i++) - { - int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - for (j = 0; j < dim_vec; j++) - { - ip_out += pV[j] * pM[i * dim_vec + j]; - } - pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16); - } - -#endif /* ARM_MATH_DSP */ - - /* Return to ARM_MATH_SUCCESS */ - return (ARM_MATH_SUCCESS); - -} - -/** - * @} end of FC group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c deleted file mode 100644 index b01f9e379..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_mat_q7_vec_q15_opt.c - * Description: Mixed Q15-Q7 opt fully-connected layer function - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - - /** - * @brief Mixed Q15-Q7 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * vec_buffer size: 0 - * - * Q7_Q15 version of the fully connected layer - * - * Weights are in q7_t and Activations are in q15_t - * - * Limitation: x4 version requires weight reordering to work - * - * Here we use only one pointer to read 4 rows in the weight - * matrix. So if the original q7_t matrix looks like this: - * - * | a11 | a12 | a13 | a14 | a15 | a16 | a17 | - * - * | a21 | a22 | a23 | a24 | a25 | a26 | a27 | - * - * | a31 | a32 | a33 | a34 | a35 | a36 | a37 | - * - * | a41 | a42 | a43 | a44 | a45 | a46 | a47 | - * - * | a51 | a52 | a53 | a54 | a55 | a56 | a57 | - * - * | a61 | a62 | a63 | a64 | a65 | a66 | a67 | - * - * We operates on multiple-of-4 rows, so the first four rows becomes - * - * | a11 | a21 | a12 | a22 | a31 | a41 | a32 | a42 | - * - * | a13 | a23 | a14 | a24 | a33 | a43 | a34 | a44 | - * - * | a15 | a25 | a16 | a26 | a35 | a45 | a36 | a46 | - * - * The column left over will be in-order. - * which is: - * | a17 | a27 | a37 | a47 | - * - * For the left-over rows, we do 1x1 computation, so the data remains - * as its original order. - * - * So the stored weight matrix looks like this: - * - * | a11 | a21 | a12 | a22 | a31 | a41 | - * - * | a32 | a42 | a13 | a23 | a14 | a24 | - * - * | a33 | a43 | a34 | a44 | a15 | a25 | - * - * | a16 | a26 | a35 | a45 | a36 | a46 | - * - * | a17 | a27 | a37 | a47 | a51 | a52 | - * - * | a53 | a54 | a55 | a56 | a57 | a61 | - * - * | a62 | a63 | a64 | a65 | a66 | a67 | - * - */ - -arm_status -arm_fully_connected_mat_q7_vec_q15_opt(const q15_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, const q7_t * bias, q15_t * pOut, q15_t * vec_buffer) -{ - - (void)vec_buffer; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q7_t *pB = pM; - q15_t *pO = pOut; - const q7_t *pBias = bias; - const q15_t *pA = pV; - - uint16_t rowCnt = num_of_rows >> 2; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 1; - - pA = pV; - -#ifdef USE_INTRINSIC - -#ifndef ARM_MATH_BIG_ENDIAN - - while (colCnt) - { - q31_t inM11, inM12, inM13, inM14; - q31_t inV; - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM11, inV, sum); - sum2 = __SMLAD(inM12, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM13, inV, sum3); - sum4 = __SMLAD(inM14, inV, sum4); - colCnt--; - } - -#else - - while (colCnt) - { - q31_t inM11, inM12, inM13, inM14; - q31_t inV; - - inV = *__SIMD32(pA)++; - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM12, inV, sum); - sum2 = __SMLAD(inM11, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM14, inV, sum3); - sum4 = __SMLAD(inM13, inV, sum4); - colCnt--; - } - -#endif /* ARM_MATH_BIG_ENDIAN */ - -#else - - /* - * register needed: - * loop counter: colCnt - * accumulators: sum, sum2, sum3, sum4 - * pointers: pB, pA - * weight data: inM11, inM12, inM13, inM14 - * activation data: inV - */ - -#ifndef ARM_MATH_BIG_ENDIAN - asm volatile ("COL_LOOP_%=:\n" - "ldr.w r4, [%[pA]], #4\n" - "ldr.w r1, [%[pB]], #8\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r1, %[sum]\n" - "smlad %[sum2], r4, r0, %[sum2]\n" - "ldr.w r3, [%[pB], #-4]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r3, %[sum3]\n" - "smlad %[sum4], r4, r2, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n":[sum] "+r"(sum), - [sum2] "+r"(sum2),[sum3] "+r"(sum3), - [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt):"r0", "r1", "r2", "r3", "r4"); -#else - asm volatile ("COL_LOOP_%=:\n" - "ldr.w r4, [%[pA]], #4\n" - "ldr.w r1, [%[pB]], #8\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r0, %[sum]\n" - "smlad %[sum2], r4, r1, %[sum2]\n" - "ldr.w r3, [%[pB], #-4]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r2, %[sum3]\n" - "smlad %[sum4], r4, r3, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n":[sum] "+r"(sum), - [sum2] "+r"(sum2),[sum3] "+r"(sum3), - [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt):"r0", "r1", "r2", "r3", "r4"); -#endif /* ARM_MATH_BIG_ENDIAN */ - -#endif /* USE_INTRINSIC */ - - colCnt = dim_vec & 0x1; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - q7_t inM2 = *pB++; - q7_t inM3 = *pB++; - q7_t inM4 = *pB++; - - sum += inV * inM; - sum2 += inV * inM2; - sum3 += inV * inM3; - sum4 += inV * inM4; - colCnt--; - } /* while over colCnt */ - *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16)); - *pO++ = (q15_t) (__SSAT((sum2 >> out_shift), 16)); - *pO++ = (q15_t) (__SSAT((sum3 >> out_shift), 16)); - *pO++ = (q15_t) (__SSAT((sum4 >> out_shift), 16)); - - /* adjust the pointers and counters */ - rowCnt--; - } - - /* left-over part of the rows */ - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - - while (colCnt) - { - q31_t inV1, inV2, inM11, inM12; - - pB = read_and_pad(pB, &inM11, &inM12); - - inV1 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV1, inM11, sum); - - inV2 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV2, inM12, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - sum += inV * inM; - colCnt--; - } - - *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16)); - - rowCnt--; - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - uint16_t rowCnt = num_of_rows >> 2; - const q7_t *pB = pM; - const q15_t *pA; - q15_t *pO = pOut; - const q7_t *pBias = bias; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = dim_vec >> 1; - - pA = pV; - - while (colCnt) - { - q15_t inA1 = *pA++; - q15_t inA2 = *pA++; - - q7_t inB1 = *pB++; - q7_t inB3 = *pB++; - q7_t inB2 = *pB++; - q7_t inB4 = *pB++; - - sum += inA1 * inB1 + inA2 * inB2; - sum2 += inA1 * inB3 + inA2 * inB4; - - inB1 = *pB++; - inB3 = *pB++; - inB2 = *pB++; - inB4 = *pB++; - - sum3 += inA1 * inB1 + inA2 * inB2; - sum4 += inA1 * inB3 + inA2 * inB4; - - colCnt--; - } - - colCnt = dim_vec & 0x1; - while (colCnt) - { - q15_t inA = *pA++; - q7_t inB = *pB++; - sum += inA * inB; - inB = *pB++; - sum2 += inA * inB; - inB = *pB++; - sum3 += inA * inB; - inB = *pB++; - sum4 += inA * inB; - - colCnt--; - } - *pO++ = (q15_t) __SSAT((sum >> out_shift), 16); - *pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16); - *pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16); - *pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16); - - rowCnt--; - } - - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - int ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - int j; - - pA = pV; - for (j = 0; j < dim_vec; j++) - { - q15_t inA = *pA++; - q7_t inB = *pB++; - ip_out += inA * inB; - } - *pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16); - - rowCnt--; - } - -#endif /* ARM_MATH_DSP */ - - /* Return to ARM_MATH_SUCCESS */ - return (ARM_MATH_SUCCESS); - -} - -/** - * @} end of FC group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c deleted file mode 100644 index 9a69a961d..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_q15.c - * Description: Q15 basic fully-connected layer function - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - - /** - * @brief Q15 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * - * @details - * - * Buffer size: - * - * vec_buffer size: 0 - * - */ - -arm_status -arm_fully_connected_q15(const q15_t * pV, - const q15_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t * bias, - q15_t * pOut, - q15_t * vec_buffer) -{ - (void)vec_buffer; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q15_t *pB = pM; - const q15_t *pB2 = pB + dim_vec; - q15_t *pO = pOut; - const q15_t *pA; - const q15_t *pBias = bias; - uint16_t rowCnt = num_of_rows >> 1; - - /* this loop loops over different output */ - while (rowCnt) { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - pB2 = pB + dim_vec; - - while (colCnt) - { - q31_t inV1, inM1, inM2; - inV1 = arm_nn_read_q15x2_ia(&pA); - inM1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inV1, inM1, sum); - inM2 = arm_nn_read_q15x2_ia(&pB2); - sum2 = __SMLAD(inV1, inM2, sum2); - - inV1 = arm_nn_read_q15x2_ia(&pA); - inM1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inV1, inM1, sum); - inM2 = arm_nn_read_q15x2_ia(&pB2); - sum2 = __SMLAD(inV1, inM2, sum2); - - colCnt--; - } - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q15_t inM = *pB++; - q15_t inM2 = *pB2++; - - sum += inV * inM; - sum2 += inV * inM2; - colCnt--; - } /* while over colCnt */ - *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16)); - *pO++ = (q15_t) (__SSAT((sum2>> out_shift), 16)); - - /* adjust the pointers and counters */ - pB = pB + dim_vec; - rowCnt --; - } - - rowCnt = num_of_rows & 0x1; - - while (rowCnt) { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - - while (colCnt) { - q31_t inV1, inM1; - inV1 = arm_nn_read_q15x2_ia(&pA); - inM1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inV1, inM1, sum); - - inV1 = arm_nn_read_q15x2_ia(&pA); - inM1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inV1, inM1, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while(colCnt) { - q15_t inV = *pA++; - q15_t inM = *pB++; - - sum += inV * inM; - - colCnt--; - } - - *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16)); - - rowCnt --; - } - -#else - int i, j; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - for (i = 0; i < num_of_rows; i++) - { - int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - for (j = 0; j < dim_vec; j++) - { - ip_out += pV[j] * pM[i * dim_vec + j]; - } - pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16); - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return (ARM_MATH_SUCCESS); - -} - -/** - * @} end of FC group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c deleted file mode 100644 index 72a39dd96..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c +++ /dev/null @@ -1,332 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_q15_opt.c - * Description: Q15 opt fully-connected layer function - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - - /** - * @brief Q15 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * - * @details - * - * Buffer size: - * - * vec_buffer size: 0 - * - * Here we use only one pointer to read 4 rows in the weight - * matrix. So if the original matrix looks like this: - * - * | a11 | a12 | a13 | - * - * | a21 | a22 | a23 | - * - * | a31 | a32 | a33 | - * - * | a41 | a42 | a43 | - * - * | a51 | a52 | a53 | - * - * | a61 | a62 | a63 | - * - * We operates on multiple-of-4 rows, so the first four rows becomes - * - * | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 | - * - * | a13 | a23 | a33 | a43 | - * - * Remaining rows are kept the same original order. - * - * So the stored weight matrix looks like this: - * - * - * | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 | - * - * | a13 | a23 | a33 | a43 | a51 | a52 | a53 | a61 | - * - * | a62 | a63 | - */ - -arm_status -arm_fully_connected_q15_opt(const q15_t * pV, - const q15_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t * bias, - q15_t * pOut, - q15_t * vec_buffer) -{ - (void)vec_buffer; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q15_t *pB = pM; - q15_t *pO = pOut; - const q15_t *pBias = bias; - const q15_t *pA = pV; - - uint16_t rowCnt = num_of_rows >> 2; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 1; - - pA = pV; - -#ifdef USE_INTRINSIC - - while (colCnt) - { - q31_t inM11, inM12, inM13, inM14; - q31_t inV; - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inV, inM11, sum); - inM12 = arm_nn_read_q15x2_ia(&pB); - sum2 = __SMLAD(inV, inM12, sum2); - inM13 = arm_nn_read_q15x2_ia(&pB); - sum3 = __SMLAD(inV, inM13, sum3); - inM14 = arm_nn_read_q15x2_ia(&pB); - sum4 = __SMLAD(inV, inM14, sum4); - colCnt--; - } - -#else - - /* - * register needed: - * loop counter: colCnt - * accumulators: sum, sum2, sum3, sum4 - * pointers: pB, pA - * weight data: inM11, inM12, inM13, inM14 - * activation data: inV - */ - - asm volatile ("COL_LOOP_%=:\n" - "ldr.w r4, [%[pA]], #4\n" - "ldr.w r0, [%[pB]], #16\n" - "smlad %[sum], r4, r0, %[sum]\n" - "ldr.w r1, [%[pB] , #-12]\n" - "smlad %[sum2], r4, r1, %[sum2]\n" - "ldr.w r2, [%[pB] , #-8]\n" - "smlad %[sum3], r4, r2, %[sum3]\n" - "ldr.w r3, [%[pB] , #-4]\n" - "smlad %[sum4], r4, r3, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n":[sum] "+r"(sum), - [sum2] "+r"(sum2),[sum3] "+r"(sum3), - [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt):"r0", "r1", "r2", "r3", "r4"); - -#endif /* USE_INTRINSIC */ - - colCnt = dim_vec & 0x1; - while (colCnt) - { - - q15_t inV = *pA++; - q15_t inM = *pB++; - q15_t inM2 = *pB++; - q15_t inM3 = *pB++; - q15_t inM4 = *pB++; - - sum += inV * inM; - sum2 += inV * inM2; - sum3 += inV * inM3; - sum4 += inV * inM4; - colCnt--; - } /* while over colCnt */ - *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16)); - *pO++ = (q15_t) (__SSAT((sum2 >> out_shift), 16)); - *pO++ = (q15_t) (__SSAT((sum3 >> out_shift), 16)); - *pO++ = (q15_t) (__SSAT((sum4 >> out_shift), 16)); - - /* adjust the pointers and counters */ - rowCnt--; - } - - /* left-over part of the rows */ - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - - while (colCnt) - { - q31_t inV1, inV2, inM1, inM2; - - inM1 = arm_nn_read_q15x2_ia(&pB); - inV1 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV1, inM1, sum); - - inM2 = arm_nn_read_q15x2_ia(&pB); - inV2 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV2, inM2, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q15_t inM = *pB++; - sum += inV * inM; - colCnt--; - } - - *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16)); - - rowCnt--; - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - uint16_t rowCnt = num_of_rows >> 2; - const q15_t *pB = pM; - const q15_t *pA; - q15_t *pO = pOut; - const q15_t *pBias = bias; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 1; - - pA = pV; - while (colCnt) - { - q15_t inA1 = *pA++; - q15_t inA2 = *pA++; - - q15_t inB1 = *pB++; - q15_t inB2 = *pB++; - sum += inA1 * inB1 + inA2 * inB2; - - inB1 = *pB++; - inB2 = *pB++; - sum2 += inA1 * inB1 + inA2 * inB2; - - inB1 = *pB++; - inB2 = *pB++; - sum3 += inA1 * inB1 + inA2 * inB2; - - inB1 = *pB++; - inB2 = *pB++; - sum4 += inA1 * inB1 + inA2 * inB2; - - colCnt--; - } - colCnt = dim_vec & 0x1; - while (colCnt) - { - q15_t inA = *pA++; - q15_t inB = *pB++; - sum += inA * inB; - inB = *pB++; - sum2 += inA * inB; - inB = *pB++; - sum3 += inA * inB; - inB = *pB++; - sum4 += inA * inB; - colCnt--; - } - *pO++ = (q15_t) __SSAT((sum >> out_shift), 16); - *pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16); - *pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16); - *pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16); - - rowCnt--; - } - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - int ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - int j; - - pA = pV; - for (j = 0; j < dim_vec; j++) - { - q15_t inA = *pA++; - q15_t inB = *pB++; - ip_out += inA * inB; - } - *pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16); - - rowCnt--; - } - -#endif /* ARM_MATH_DSP */ - - /* Return to ARM_MATH_SUCCESS */ - return (ARM_MATH_SUCCESS); - -} - -/** - * @} end of FC group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c deleted file mode 100644 index 8e499bca2..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_q7.c - * Description: Q7 basic fully-connected layer function - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - - /** - * @brief Q7 basic fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * vec_buffer size: dim_vec - * - * This basic function is designed to work with regular weight - * matrix without interleaving. - * - */ - -arm_status -arm_fully_connected_q7(const q7_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, const q7_t * bias, q7_t * pOut, q15_t * vec_buffer) -{ - -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q7_t *pB = pM; - const q7_t *pB2; - q7_t *pO = pOut; - const q7_t *pBias = bias; - const q15_t *pA; - uint16_t rowCnt = num_of_rows >> 1; - - /* expand the vector into the buffer */ - arm_q7_to_q15_reordered_no_shift(pV, vec_buffer, dim_vec); - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = dim_vec >> 2; - - pA = vec_buffer; - pB2 = pB + dim_vec; - - while (colCnt) - { - q31_t inV, inM11, inM12, inM21, inM22; - pB = read_and_pad_reordered(pB, &inM11, &inM12); - pB2 = read_and_pad_reordered(pB2, &inM21, &inM22); - - inV = arm_nn_read_q15x2_ia(&pA); - - sum = __SMLAD(inV, inM11, sum); - sum2 = __SMLAD(inV, inM21, sum2); - - inV = arm_nn_read_q15x2_ia(&pA); - - sum = __SMLAD(inV, inM12, sum); - sum2 = __SMLAD(inV, inM22, sum2); - - colCnt--; - } - colCnt = dim_vec & 0x3; - while (colCnt) - { - q7_t inV = *pA++; - q15_t inM = *pB++; - q15_t inM2 = *pB2++; - - sum += inV * inM; - sum2 += inV * inM2; - colCnt--; - } /* while over colCnt */ - *pO++ = (q7_t) (__SSAT((sum >> out_shift), 8)); - *pO++ = (q7_t) (__SSAT((sum2 >> out_shift), 8)); - - /* adjust the pointers and counters */ - pB += dim_vec; - rowCnt--; - } - - /* left-over part of the rows */ - rowCnt = num_of_rows & 0x1; - - while (rowCnt) - { - uint16_t colCnt = dim_vec >> 2; - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - pA = vec_buffer; - - while (colCnt) - { - q31_t inV1, inV2, inM11, inM12; - - pB = read_and_pad_reordered(pB, &inM11, &inM12); - - inV1 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV1, inM11, sum); - - inV2 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV2, inM12, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q7_t inV = *pA++; - q15_t inM = *pB++; - sum += inV * inM; - colCnt--; - } - - *pO++ = (q7_t) (__SSAT((sum >> out_shift), 8)); - - rowCnt--; - } - -#else - int i, j; - - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - for (i = 0; i < num_of_rows; i++) - { - int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - for (j = 0; j < dim_vec; j++) - { - ip_out += pV[j] * pM[i * dim_vec + j]; - } - pOut[i] = (q7_t) __SSAT((ip_out >> out_shift), 8); - } - -#endif /* ARM_MATH_DSP */ - - /* Return to ARM_MATH_SUCCESS */ - return (ARM_MATH_SUCCESS); - -} - -/** - * @} end of FC group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c deleted file mode 100644 index 5ddf7caf3..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_q7_opt.c - * Description: Q7 basic fully-connected layer function - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - - /** - * @brief Q7 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * vec_buffer size: dim_vec - * - * This opt function is designed to work with interleaved weight - * matrix. The vector input is assumed in q7_t format, we call - * arm_q7_to_q15_no_shift_shuffle function to expand into - * q15_t format with certain weight re-ordering, refer to the function - * comments for more details. - * Here we use only one pointer to read 4 rows in the weight - * matrix. So if the original q7_t matrix looks like this: - * - * | a11 | a12 | a13 | a14 | a15 | a16 | a17 | - * - * | a21 | a22 | a23 | a24 | a25 | a26 | a27 | - * - * | a31 | a32 | a33 | a34 | a35 | a36 | a37 | - * - * | a41 | a42 | a43 | a44 | a45 | a46 | a47 | - * - * | a51 | a52 | a53 | a54 | a55 | a56 | a57 | - * - * | a61 | a62 | a63 | a64 | a65 | a66 | a67 | - * - * - * We operates on multiple-of-4 rows, so the first four rows becomes - * - * | a11 | a21 | a13 | a23 | a31 | a41 | a33 | a43 | - * - * | a12 | a22 | a14 | a24 | a32 | a42 | a34 | a44 | - * - * | a15 | a25 | a35 | a45 | a16 | a26 | a36 | a46 | - * - * So within the kernel, we first read the re-ordered vector in as: - * - * | b1 | b3 | and | b2 | b4 | - * - * the four q31_t weights will look like - * - * | a11 | a13 |, | a21 | a23 |, | a31 | a33 |, | a41 | a43 | - * - * | a12 | a14 |, | a22 | a24 |, | a32 | a34 |, | a42 | a44 | - * - * The column left over will be in-order. - * which is: - * - * | a17 | a27 | a37 | a47 | - * - * For the left-over rows, we do 1x1 computation, so the data remains - * as its original order. - * - * So the stored weight matrix looks like this: - * - * | a11 | a21 | a13 | a23 | a31 | a41 | - * - * | a33 | a43 | a12 | a22 | a14 | a24 | - * - * | a32 | a42 | a34 | a44 | a15 | a25 | - * - * | a35 | a45 | a16 | a26 | a36 | a46 | - * - * | a17 | a27 | a37 | a47 | a51 | a52 | - * - * | a53 | a54 | a55 | a56 | a57 | a61 | - * - * | a62 | a63 | a64 | a65 | a66 | a67 | - * - * - */ - -arm_status -arm_fully_connected_q7_opt(const q7_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut, - q15_t * vec_buffer) -{ - -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q7_t *pB = pM; - q7_t *pO = pOut; - const q7_t *pBias = bias; - const q15_t *pA; - uint16_t rowCnt = num_of_rows >> 2; - - arm_q7_to_q15_reordered_no_shift(pV, vec_buffer, dim_vec); - - while (rowCnt) - { - - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = vec_buffer; - -#ifdef USE_INTRINSIC - -#ifndef ARM_MATH_BIG_ENDIAN - while (colCnt) - { - q31_t inM11, inM12, inM13, inM14; - q31_t inV; - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM11, inV, sum); - sum2 = __SMLAD(inM12, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM13, inV, sum3); - sum4 = __SMLAD(inM14, inV, sum4); - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM11, inV, sum); - sum2 = __SMLAD(inM12, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM13, inV, sum3); - sum4 = __SMLAD(inM14, inV, sum4); - colCnt--; - } -#else - while (colCnt) - { - q31_t inM11, inM12, inM13, inM14; - q31_t inV; - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM12, inV, sum); - sum2 = __SMLAD(inM11, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM14, inV, sum3); - sum4 = __SMLAD(inM13, inV, sum4); - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM12, inV, sum); - sum2 = __SMLAD(inM11, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM14, inV, sum3); - sum4 = __SMLAD(inM13, inV, sum4); - colCnt--; - } -#endif /* ARM_MATH_BIG_ENDIAN */ - -#else - - /* - * register needed: - * loop counter: colCnt - * accumulators: sum, sum2, sum3, sum4 - * pointers: pB, pA - * weight data: inM11, inM12, inM13, inM14 - * activation data: inV - */ - -#ifndef ARM_MATH_BIG_ENDIAN - asm volatile ("COL_LOOP_%=:\n" - "ldr.w r4, [%[pA]], #8\n" - "ldr.w r1, [%[pB]], #16\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r1, %[sum]\n" - "smlad %[sum2], r4, r0, %[sum2]\n" - "ldr.w r3, [%[pB], #-12]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r3, %[sum3]\n" - "smlad %[sum4], r4, r2, %[sum4]\n" - "ldr.w r4, [%[pA], #-4]\n" - "ldr.w r1, [%[pB], #-8]\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r1, %[sum]\n" - "smlad %[sum2], r4, r0, %[sum2]\n" - "ldr.w r3, [%[pB], #-4]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r3, %[sum3]\n" - "smlad %[sum4], r4, r2, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n":[sum] "+r"(sum), - [sum2] "+r"(sum2),[sum3] "+r"(sum3), - [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt):"r0", "r1", "r2", "r3", "r4"); -#else - asm volatile ("COL_LOOP_%=:\n" - "ldr.w r4, [%[pA]], #8\n" - "ldr.w r1, [%[pB]], #16\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r0, %[sum]\n" - "smlad %[sum2], r4, r1, %[sum2]\n" - "ldr.w r3, [%[pB], #-12]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r2, %[sum3]\n" - "smlad %[sum4], r4, r3, %[sum4]\n" - "ldr.w r4, [%[pA], #-4]\n" - "ldr.w r1, [%[pB], #-8]\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r0, %[sum]\n" - "smlad %[sum2], r4, r1, %[sum2]\n" - "ldr.w r3, [%[pB], #-4]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r2, %[sum3]\n" - "smlad %[sum4], r4, r3, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n":[sum] "+r"(sum), - [sum2] "+r"(sum2),[sum3] "+r"(sum3), - [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt):"r0", "r1", "r2", "r3", "r4"); -#endif /* ARM_MATH_BIG_ENDIAN */ - -#endif /* USE_INTRINSIC */ - - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - q7_t inM2 = *pB++; - q7_t inM3 = *pB++; - q7_t inM4 = *pB++; - - sum += inV * inM; - sum2 += inV * inM2; - sum3 += inV * inM3; - sum4 += inV * inM4; - colCnt--; - } /* while over colCnt */ - *pO++ = (q7_t) (__SSAT((sum >> out_shift), 8)); - *pO++ = (q7_t) (__SSAT((sum2 >> out_shift), 8)); - *pO++ = (q7_t) (__SSAT((sum3 >> out_shift), 8)); - *pO++ = (q7_t) (__SSAT((sum4 >> out_shift), 8)); - - /* adjust the pointers and counters */ - rowCnt--; - } - - /* left-over part of the rows */ - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = dim_vec >> 2; - - pA = vec_buffer; - - while (colCnt) - { - q31_t inV1, inV2, inM11, inM12; - - pB = read_and_pad_reordered(pB, &inM11, &inM12); - - inV1 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV1, inM11, sum); - - inV2 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV2, inM12, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - sum += inV * inM; - colCnt--; - } - - *pO++ = (q7_t) (__SSAT((sum >> out_shift), 8)); - - rowCnt--; - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - uint16_t rowCnt = num_of_rows >> 2; - const q7_t *pB = pM; - const q7_t *pA; - q7_t *pO = pOut; - const q7_t *pBias = bias; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - - while (colCnt) - { - q7_t inA1 = *pA++; - q7_t inA3 = *pA++; - q7_t inA2 = *pA++; - q7_t inA4 = *pA++; - - q7_t inB1 = *pB++; - q7_t inB3 = *pB++; - q7_t inB2 = *pB++; - q7_t inB4 = *pB++; - - sum += inA1 * inB1 + inA2 * inB2; - sum2 += inA1 * inB3 + inA2 * inB4; - - inB1 = *pB++; - inB3 = *pB++; - inB2 = *pB++; - inB4 = *pB++; - - sum3 += inA1 * inB1 + inA2 * inB2; - sum4 += inA1 * inB3 + inA2 * inB4; - - inB1 = *pB++; - inB3 = *pB++; - inB2 = *pB++; - inB4 = *pB++; - - sum += inA3 * inB1 + inA4 * inB2; - sum2 += inA3 * inB3 + inA4 * inB4; - - inB1 = *pB++; - inB3 = *pB++; - inB2 = *pB++; - inB4 = *pB++; - - sum3 += inA3 * inB1 + inA4 * inB2; - sum4 += inA3 * inB3 + inA4 * inB4; - - colCnt--; - } - colCnt = dim_vec & 0x3; - while (colCnt) - { - q7_t inA = *pA++; - q7_t inB = *pB++; - sum += inA * inB; - inB = *pB++; - sum2 += inA * inB; - inB = *pB++; - sum3 += inA * inB; - inB = *pB++; - sum4 += inA * inB; - - colCnt--; - } - *pO++ = (q7_t) __SSAT((sum >> out_shift), 8); - *pO++ = (q7_t) __SSAT((sum2 >> out_shift), 8); - *pO++ = (q7_t) __SSAT((sum3 >> out_shift), 8); - *pO++ = (q7_t) __SSAT((sum4 >> out_shift), 8); - - rowCnt--; - } - - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - int ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - int j; - - pA = pV; - for (j = 0; j < dim_vec; j++) - { - q7_t inA = *pA++; - q7_t inB = *pB++; - ip_out += inA * inB; - } - *pO++ = (q7_t) __SSAT((ip_out >> out_shift), 8); - - rowCnt--; - } - -#endif /* ARM_MATH_DSP */ - - /* Return to ARM_MATH_SUCCESS */ - return (ARM_MATH_SUCCESS); - -} - -/** - * @} end of FC group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c deleted file mode 100644 index eace95ffa..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_s8 - * Description: Fully connected function compatible with TF Lite. - * - * $Date: May 2, 2020 - * $Revision: V.2.0.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - -/* - * S8 basic fully-connected and matrix multiplication layer function for TensorFlow Lite - * - * Refer header file for details. - * - */ - -arm_status -arm_fully_connected_s8(const cmsis_nn_context *ctx, - const cmsis_nn_fc_params *fc_params, - const cmsis_nn_per_tensor_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *kernel, - const cmsis_nn_dims *bias_dims, - const int32_t *bias, - const cmsis_nn_dims *output_dims, - q7_t *output) -{ - (void)bias_dims; - (void)ctx; - int32_t batch_cnt = input_dims->n; - - while (batch_cnt) - { - arm_nn_vec_mat_mult_t_s8(input, - kernel, - bias, - output, - fc_params->input_offset, - fc_params->filter_offset, - fc_params->output_offset, - quant_params->multiplier, - quant_params->shift, - filter_dims->n, /* col_dim or accum_depth */ - output_dims->c, /* row_dim or output_depth */ - fc_params->activation.min, - fc_params->activation.max); - input += filter_dims->n; - output += output_dims->c; - batch_cnt--; - } - return (ARM_MATH_SUCCESS); -} - -int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims) -{ - (void)filter_dims; - return 0; -} - -/** - * @} end of FC group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c deleted file mode 100644 index c302bd7f5..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_accumulate_q7_to_q15.c - * Description: Accumulate q7 vector into q15 one. - * - * $Date: May 29, 2020 - * $Revision: V.1.0.1 - * - * pSrc Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -void arm_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length) -{ - q15_t *pCnt = pDst; - const q7_t *pV = pSrc; - q31_t v1, v2, vo1, vo2; - int32_t cnt = length >> 2; - q31_t in; - - while (cnt > 0l) - { - q31_t value = arm_nn_read_q7x4_ia(&pV); - v1 = __SXTB16(__ROR((uint32_t)value, 8)); - v2 = __SXTB16(value); -#ifndef ARM_MATH_BIG_ENDIAN - vo2 = (q31_t)__PKHTB(v1, v2, 16); - vo1 = (q31_t)__PKHBT(v2, v1, 16); -#else - vo1 = (q31_t)__PKHTB(v1, v2, 16); - vo2 = (q31_t)__PKHBT(v2, v1, 16); -#endif - - in = arm_nn_read_q15x2(pCnt); - write_q15x2_ia(&pCnt, __QADD16(vo1, in)); - - in = arm_nn_read_q15x2(pCnt); - write_q15x2_ia(&pCnt, __QADD16(vo2, in)); - - cnt--; - } - cnt = length & 0x3; - while (cnt > 0l) - { - *pCnt++ += *pV++; - cnt--; - } -} - -/** - * @} end of NNBasicMath group - */ \ No newline at end of file diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c deleted file mode 100644 index 331a2d7f3..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_add_q7.c - * Description: Non saturating addition of elements of a q7 vector. - * - * $Date: July 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size) -{ - uint32_t block_count; - q31_t result = 0; -#if defined(ARM_MATH_DSP) - /* Loop unrolling: Compute 4 outputs at a time */ - block_count = block_size >> 2U; - - while (block_count > 0U) - { - const int32_t mult_q15x2 = (1UL << 16) | 1UL; - q31_t in_q7x4 = arm_nn_read_q7x4_ia(&input); - q31_t temp_q15x2 = __SXTAB16(__SXTB16(in_q7x4), - __ROR((uint32_t)in_q7x4, 8)); - - result = __SMLAD(temp_q15x2, mult_q15x2, result); - - /* Decrement loop counter */ - block_count--; - } - - /* Loop unrolling: Compute remaining outputs */ - block_count = block_size & 0x3; -#else - block_count = block_size; -#endif - while (block_count > 0U) - { - /* Add and store result in destination buffer. */ - result += *input++; - - /* Decrement loop counter */ - block_count--; - } - - *output = result; -} - -/** - * @} end of NNBasicMath group - */ \ No newline at end of file diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c deleted file mode 100644 index 3ecd8e526..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_depthwise_conv_nt_t_padded_s8.c - * Description: Depthwise convolution with padded matrices. - * - * $Date: March 17, 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M processors with MVE extension - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * Depthwise convolution of transposed rhs matrix with 4 lhs matrices. One or more of the rhs matrices are padded. - * Dimensions are the same for lhs and rhs. - * - * Refer header file for details. - * - */ - -q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs, - const q7_t *rhs, - const int32_t input_offset, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t row_x_col, - const int32_t *const output_bias, - q7_t *out) -{ -#if defined(ARM_MATH_MVEI) - int32_t loop_count = (num_ch + 3) / 4; - const int32_t *bias = output_bias; - uint32_t num_ch_to_process = num_ch; - - for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; - num_ch_to_process -= 4, out += 4, offset += 4, i_loop_cnt++) - { - int32x4_t out_0 = vldrwq_s32(bias); - int32x4_t out_1 = out_0; - int32x4_t out_2 = out_0; - int32x4_t out_3 = out_0; - bias += 4; - - const int8_t *rhs_0 = rhs + offset; - const int8_t *lhs_0 = lhs + offset; - const int8_t *lhs_1 = lhs + row_x_col * num_ch + offset; - const int8_t *lhs_2 = lhs + (row_x_col * num_ch * 2) + offset; - const int8_t *lhs_3 = lhs + (row_x_col * num_ch * 3) + offset; - - for (int i_row_x_col = 0; i_row_x_col < row_x_col; i_row_x_col++) - { - const int32x4_t ker_0 = vldrbq_s32(rhs_0); - - int32x4_t ip_0 = vldrbq_s32(lhs_0); - ip_0 = vaddq_n_s32(ip_0, input_offset); - out_0 += vmulq_s32(ip_0, ker_0); - - int32x4_t ip_1 = vldrbq_s32(lhs_1); - ip_1 = vaddq_n_s32(ip_1, input_offset); - out_1 += vmulq_s32(ip_1, ker_0); - - int32x4_t ip_2 = vldrbq_s32(lhs_2); - ip_2 = vaddq_n_s32(ip_2, input_offset); - out_2 += vmulq_s32(ip_2, ker_0); - - int32x4_t ip_3 = vldrbq_s32(lhs_3); - ip_3 = vaddq_n_s32(ip_3, input_offset); - - out_3 += vmulq_s32(ip_3, ker_0); - - lhs_0 += num_ch; - lhs_1 += num_ch; - lhs_2 += num_ch; - lhs_3 += num_ch; - - rhs_0 += num_ch; - } - - const int32x4_t mult = vldrwq_s32(out_mult); - const int32x4_t shift = vldrwq_s32(out_shift); - out_mult += 4; - out_shift += 4; - - out_0 = arm_requantize_mve_32x4(out_0, mult, shift); - out_0 = vaddq_n_s32(out_0, out_offset); - out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); - out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); - mve_pred16_t p = vctp32q(num_ch_to_process); - vstrbq_p_s32(out, out_0, p); - - out_1 = arm_requantize_mve_32x4(out_1, mult, shift); - out_1 = vaddq_n_s32(out_1, out_offset); - out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); - out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + num_ch, out_1, p); - - out_2 = arm_requantize_mve_32x4(out_2, mult, shift); - out_2 = vaddq_n_s32(out_2, out_offset); - out_2 = vmaxq_s32(out_2, vdupq_n_s32(activation_min)); - out_2 = vminq_s32(out_2, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + 2 * num_ch, out_2, p); - - out_3 = arm_requantize_mve_32x4(out_3, mult, shift); - out_3 = vaddq_n_s32(out_3, out_offset); - out_3 = vmaxq_s32(out_3, vdupq_n_s32(activation_min)); - out_3 = vminq_s32(out_3, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + 3 * num_ch, out_3, p); - } - - const int tail_ch = num_ch & 0x3; - if (tail_ch != 0) - { - out -= (4 - tail_ch); - } - return out + (3 * num_ch); - -#else - (void)lhs; - (void)rhs; - (void)input_offset; - (void)num_ch; - (void)out_shift; - (void)out_mult; - (void)out_offset; - (void)activation_min; - (void)activation_max; - (void)row_x_col; - (void)output_bias; - (void)out; - return NULL; -#endif -} - -/** - * @} end of NNBasicMath group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c deleted file mode 100644 index 1bff9c4ea..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_depthwise_conv_nt_t_s8.c - * Description: Depthwise convolution on matrices with no padding. - * - * $Date: March 17, 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M processors with MVE extension. - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * Depthwise convolution of rhs matrix with 4 lhs matrices with no padding. Dimensions are the same for lhs and rhs. - * - * Refer header file for details. - * - */ - -q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs, - const q7_t *rhs, - const int32_t input_offset, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t row_x_col, - const int32_t *const output_bias, - q7_t *out) -{ -#if defined(ARM_MATH_MVEI) - const int32_t *bias = output_bias; - int32_t loop_count = (num_ch + 3) / 4; - uint32_t num_ch_to_process = num_ch; - - for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; - num_ch_to_process -= 4, offset += 4, out += 4, i_loop_cnt++) - { - int32x4_t out_0 = vldrwq_s32(bias); - int32x4_t out_1 = out_0; - int32x4_t out_2 = out_0; - int32x4_t out_3 = out_0; - bias += 4; - - const int8_t *rhs_0 = rhs + offset; - const int8_t *lhs_0 = lhs + offset; - const int8_t *lhs_1 = lhs + row_x_col * num_ch + offset; - const int8_t *lhs_2 = lhs + (row_x_col * num_ch * 2) + offset; - const int8_t *lhs_3 = lhs + (row_x_col * num_ch * 3) + offset; - int32x4_t ker_sum = vdupq_n_s32(0); - - for (int i_row_x_col = 0; i_row_x_col < row_x_col; i_row_x_col++) - { - const int32x4_t ker_0 = vldrbq_s32(rhs_0); - ker_sum = vaddq_s32(ker_sum, ker_0); - - int32x4_t ip_0 = vldrbq_s32(lhs_0); - out_0 += vmulq_s32(ip_0, ker_0); - - int32x4_t ip_1 = vldrbq_s32(lhs_1); - out_1 += vmulq_s32(ip_1, ker_0); - - int32x4_t ip_2 = vldrbq_s32(lhs_2); - out_2 += vmulq_s32(ip_2, ker_0); - - int32x4_t ip_3 = vldrbq_s32(lhs_3); - out_3 += vmulq_s32(ip_3, ker_0); - - lhs_0 += num_ch; - lhs_1 += num_ch; - lhs_2 += num_ch; - lhs_3 += num_ch; - - rhs_0 += num_ch; - } - - ker_sum = vmulq_n_s32(ker_sum, input_offset); - out_0 = ker_sum + out_0; - out_1 = ker_sum + out_1; - out_2 = ker_sum + out_2; - out_3 = ker_sum + out_3; - - const int32x4_t mult = vldrwq_s32(out_mult); - const int32x4_t shift = vldrwq_s32(out_shift); - out_mult += 4; - out_shift += 4; - mve_pred16_t p = vctp32q(num_ch_to_process); - - out_0 = arm_requantize_mve_32x4(out_0, mult, shift); - out_0 = vaddq_n_s32(out_0, out_offset); - out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); - out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out, out_0, p); - - out_1 = arm_requantize_mve_32x4(out_1, mult, shift); - out_1 = vaddq_n_s32(out_1, out_offset); - out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); - out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + num_ch, out_1, p); - - out_2 = arm_requantize_mve_32x4(out_2, mult, shift); - out_2 = vaddq_n_s32(out_2, out_offset); - out_2 = vmaxq_s32(out_2, vdupq_n_s32(activation_min)); - out_2 = vminq_s32(out_2, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + 2 * num_ch, out_2, p); - - out_3 = arm_requantize_mve_32x4(out_3, mult, shift); - out_3 = vaddq_n_s32(out_3, out_offset); - out_3 = vmaxq_s32(out_3, vdupq_n_s32(activation_min)); - out_3 = vminq_s32(out_3, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + 3 * num_ch, out_3, p); - } - - const int tail_ch = num_ch & 0x3; - if (tail_ch != 0) - { - out -= (4 - tail_ch); - } - - return out + (3 * num_ch); -#else - (void)lhs; - (void)rhs; - (void)input_offset; - (void)num_ch; - (void)out_shift; - (void)out_mult; - (void)out_offset; - (void)activation_min; - (void)activation_max; - (void)row_x_col; - (void)output_bias; - (void)out; - return NULL; -#endif -} - -/** - * @} end of NNBasicMath group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c deleted file mode 100644 index 607765869..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mul_core_1x_s8.c - * Description: General Matrix-multiplication function - * - * $Date: January 20, 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * s8 matrix multiplication to process 1 row - * - * Refer header file for details. - * - */ - -arm_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements, - const int8_t *row_base, - const int8_t *col_base, - int32_t *const sum_col, - int32_t *const output) -{ - int32_t acc_n0 = 0; - int32_t sum_tmp = 0; - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - - __asm volatile ( - " vldrb.8 q0, [%[col]], 16 \n" - " wlstp.8 lr, %[cnt], 1f \n" - "2: \n" - " vaddva.s8 %[sum], q0 \n" - " vldrb.8 q1, [%[row0]], 16 \n" - " vmladava.s8 %[out0], q0, q1 \n" - " vldrb.8 q0, [%[col]], 16 \n" - " letp lr, 2b \n" - "1: \n" - :[col] "+r"(col_base) - ,[sum] "+Te"(sum_tmp) - ,[row0] "+r"(row_base) - ,[out0] "+Te"(acc_n0) - :[cnt] "r"(row_elements) - :"q0","q1", "memory", "r14"); -#else - for (int i = 0; i < row_elements; i++) - { - sum_tmp += col_base[i]; - acc_n0 += row_base[i] * col_base[i]; - } -#endif - - *sum_col = sum_tmp; - *output = acc_n0; - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNBasicMath group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c deleted file mode 100644 index 39ad52913..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mul_core_4x_s8.c - * Description: General matrix multiplication function for MVE extension - * - * $Date: January 20, 2020 - * $Revision: V.2.0.0 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * s8 matrix multiplication to process 4 rows and one column - * - * Refer header file for details. - * - */ -arm_status arm_nn_mat_mul_core_4x_s8(const int32_t row_elements, - const int32_t offset, - const int8_t *row_base, - const int8_t *col_base, - int32_t *const sum_col, - int32_t *const output) -{ - int32_t acc_n0 = 0; - int32_t acc_n1 = 0; - int32_t acc_n2 = 0; - int32_t acc_n3 = 0; - - const int8_t *ip_row_0 = row_base; - const int8_t *ip_row_1 = row_base + offset; - const int8_t *ip_row_2 = row_base + (2 * offset); - const int8_t *ip_row_3 = row_base + (3 * offset); - int32_t sum_tmp = 0; - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - __asm volatile( - " vldrb.8 q0, [%[col]], 16 \n" - " wlstp.8 lr, %[cnt], 1f \n" - "2: \n" - " vaddva.s8 %[sum], q0 \n" - " vldrb.8 q1, [%[row0]], 16 \n" - " vmladava.s8 %[out0], q0, q1 \n" - " vldrb.8 q2, [%[row1]], 16 \n" - " vmladava.s8 %[out1], q0, q2 \n" - " vldrb.8 q3, [%[row2]], 16 \n" - " vmladava.s8 %[out2], q0, q3 \n" - " vldrb.8 q4, [%[row3]], 16 \n" - " vmladava.s8 %[out3], q0, q4 \n" - " vldrb.8 q0, [%[col]], 16 \n" - " letp lr, 2b \n" - "1: \n" - :[col] "+r"(col_base) - ,[sum] "+Te"(sum_tmp) - ,[row0] "+r"(ip_row_0) - ,[row1] "+r"(ip_row_1) - ,[row2] "+r"(ip_row_2) - ,[row3] "+r"(ip_row_3) - ,[out0] "+Te"(acc_n0) - ,[out1] "+Te"(acc_n1) - ,[out2] "+Te"(acc_n2) - ,[out3] "+Te"(acc_n3) - : [cnt] "r"(row_elements) - : "q0", "q1", "q2", "q3", "q4", "memory", "r14"); -#else - for (int i = 0; i < row_elements; i++) - { - int32_t col = col_base[i]; - sum_tmp += col; - acc_n0 += ip_row_0[i] * col; - acc_n1 += ip_row_1[i] * col; - acc_n2 += ip_row_2[i] * col; - acc_n3 += ip_row_3[i] * col; - } -#endif - output[0] = acc_n0; - output[1] = acc_n1; - output[2] = acc_n2; - output[3] = acc_n3; - - *sum_col = sum_tmp; - - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNBasicMath group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c deleted file mode 100644 index 6ad8999f0..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c +++ /dev/null @@ -1,584 +0,0 @@ -/* - * Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_s8_nt_t_s8 - * Description: Matrix multiplication support function with the right-hand-side (rhs) matrix transposed - * - * $Date: July 27 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * s8 matrix multiplication with the right-hand-side matrix transposed - * - * Refer header file for details. - * - */ -arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs, - const q7_t *rhs, - const q31_t *bias, - q7_t *dst, - const int32_t *dst_multipliers, - const int32_t *dst_shifts, - const int32_t lhs_rows, - const int32_t rhs_rows, - const int32_t rhs_cols, - const int32_t lhs_offset, - const int32_t dst_offset, - const int32_t activation_min, - const int32_t activation_max) -{ -#if defined(ARM_MATH_DSP) - const int32_t off0 = rhs_cols - 4; - - for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2) - { - const q7_t *lhs_ptr = &lhs[0]; - q7_t *dst_ptr = &dst[0]; - - q31_t lhs_offset_contribution0 = 0; - q31_t lhs_offset_contribution1 = 0; - - for (int32_t x = 0; x < rhs_cols; ++x) - { - lhs_offset_contribution0 += rhs[x]; - lhs_offset_contribution1 += rhs[x + rhs_cols]; - } - - lhs_offset_contribution0 *= lhs_offset; - lhs_offset_contribution1 *= lhs_offset; - if (bias) - { - lhs_offset_contribution0 += bias[rhs_rows_idx]; - lhs_offset_contribution1 += bias[rhs_rows_idx + 1]; - } - - int32_t lhs_rows_idx = lhs_rows >> 1; - - while (lhs_rows_idx) - { - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = lhs_offset_contribution0; - q31_t res01 = lhs_offset_contribution1; - q31_t res10 = lhs_offset_contribution0; - q31_t res11 = lhs_offset_contribution1; - - int32_t rhs_cols_idx = 0; - - q31_t val0, val1, val2, val3, val4, val5; - - for (; rhs_cols_idx <= (rhs_cols - 16); rhs_cols_idx += 16) - { - val1 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val2 = __SXTB16(val1); - val0 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val4 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val1 = __SXTB16_RORn(val1, 8); - val0 = __SXTB16_RORn(val0, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTB16(val4); - res00 = __SMLAD(val0, val1, res00); - val4 = __SXTB16_RORn(val4, 8); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val0, val4, res01); - - // 4 x MAC res10, res11 - val0 = arm_nn_read_q7x4((const q7_t *)&lhs_ptr[off0]); - val3 = __SXTB16(val0); - val0 = __SXTB16_RORn(val0, 8); - res10 = __SMLAD(val3, val2, res10); - res11 = __SMLAD(val3, val5, res11); - res10 = __SMLAD(val0, val1, res10); - val1 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - res11 = __SMLAD(val0, val4, res11); - - val4 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = __SXTB16(val1); - val0 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val1 = __SXTB16_RORn(val1, 8); - val0 = __SXTB16_RORn(val0, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTB16(val4); - res00 = __SMLAD(val0, val1, res00); - val4 = __SXTB16_RORn(val4, 8); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val0, val4, res01); - - // 4 x MAC res10, res11 - val0 = arm_nn_read_q7x4((const q7_t *)&lhs_ptr[off0]); - val3 = __SXTB16(val0); - val0 = __SXTB16_RORn(val0, 8); - res10 = __SMLAD(val3, val2, res10); - res11 = __SMLAD(val3, val5, res11); - res10 = __SMLAD(val0, val1, res10); - val1 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - res11 = __SMLAD(val0, val4, res11); - - val4 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = __SXTB16(val1); - val0 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val1 = __SXTB16_RORn(val1, 8); - val0 = __SXTB16_RORn(val0, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTB16(val4); - res00 = __SMLAD(val0, val1, res00); - val4 = __SXTB16_RORn(val4, 8); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val0, val4, res01); - - // 4 x MAC res10, res11 - val0 = arm_nn_read_q7x4((const q7_t *)&lhs_ptr[off0]); - val3 = __SXTB16(val0); - val0 = __SXTB16_RORn(val0, 8); - res10 = __SMLAD(val3, val2, res10); - res11 = __SMLAD(val3, val5, res11); - res10 = __SMLAD(val0, val1, res10); - val1 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - res11 = __SMLAD(val0, val4, res11); - - val4 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = __SXTB16(val1); - val0 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val1 = __SXTB16_RORn(val1, 8); - val0 = __SXTB16_RORn(val0, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTB16(val4); - res00 = __SMLAD(val0, val1, res00); - val4 = __SXTB16_RORn(val4, 8); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val0, val4, res01); - - // 4 x MAC res10, res11 - val0 = arm_nn_read_q7x4((const q7_t *)&lhs_ptr[off0]); - val3 = __SXTB16(val0); - val0 = __SXTB16_RORn(val0, 8); - res10 = __SMLAD(val3, val2, res10); - res11 = __SMLAD(val3, val5, res11); - res10 = __SMLAD(val0, val1, res10); - res11 = __SMLAD(val0, val4, res11); - } - - for (; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q7_t rhs_value0 = rhs_ptr[0]; - q7_t rhs_value1 = rhs_ptr[rhs_cols]; - q7_t lhs_value = lhs_ptr[0]; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - - lhs_value = lhs_ptr[rhs_cols]; - res10 += lhs_value * rhs_value0; - res11 += lhs_value * rhs_value1; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res01 = arm_nn_requantize(res01, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - res10 = arm_nn_requantize(res10, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res11 = arm_nn_requantize(res11, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - res10 += dst_offset; - res11 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - res10 = MAX(res10, activation_min); - res10 = MIN(res10, activation_max); - res11 = MAX(res11, activation_min); - res11 = MIN(res11, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr[1] = (q7_t)res01; - dst_ptr += rhs_rows; - dst_ptr[0] = (q7_t)res10; - dst_ptr[1] = (q7_t)res11; - dst_ptr += rhs_rows; - - lhs_ptr += rhs_cols; - - lhs_rows_idx--; - } - - // Left-over rows - if (lhs_rows % 2) - { - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = lhs_offset_contribution0; - q31_t res01 = lhs_offset_contribution1; - - int32_t rhs_cols_idx = 0; - - q31_t val0, val1, val2, val3, val4, val5; - for (; rhs_cols_idx <= (rhs_cols - 16); rhs_cols_idx += 16) - { - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val5 = __SXTB16(val2); - val4 = __SXTB16(val1); - val0 = __SXTB16_RORn(val0, 8); - val2 = __SXTB16_RORn(val2, 8); - val1 = __SXTB16_RORn(val1, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val5, val3, res00); - res00 = __SMLAD(val2, val0, res00); - res01 = __SMLAD(val5, val4, res01); - res01 = __SMLAD(val2, val1, res01); - - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val5 = __SXTB16(val2); - val4 = __SXTB16(val1); - val0 = __SXTB16_RORn(val0, 8); - val2 = __SXTB16_RORn(val2, 8); - val1 = __SXTB16_RORn(val1, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val5, val3, res00); - res00 = __SMLAD(val2, val0, res00); - res01 = __SMLAD(val5, val4, res01); - res01 = __SMLAD(val2, val1, res01); - - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val5 = __SXTB16(val2); - val4 = __SXTB16(val1); - val0 = __SXTB16_RORn(val0, 8); - val2 = __SXTB16_RORn(val2, 8); - val1 = __SXTB16_RORn(val1, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val5, val3, res00); - res00 = __SMLAD(val2, val0, res00); - res01 = __SMLAD(val5, val4, res01); - res01 = __SMLAD(val2, val1, res01); - - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val5 = __SXTB16(val2); - val4 = __SXTB16(val1); - val0 = __SXTB16_RORn(val0, 8); - val2 = __SXTB16_RORn(val2, 8); - val1 = __SXTB16_RORn(val1, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val5, val3, res00); - res00 = __SMLAD(val2, val0, res00); - res01 = __SMLAD(val5, val4, res01); - res01 = __SMLAD(val2, val1, res01); - } - - // Left-over accumulations - for (; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q7_t rhs_value0 = rhs_ptr[0]; - q7_t rhs_value1 = rhs_ptr[rhs_cols]; - q7_t lhs_value = lhs_ptr[0]; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res01 = arm_nn_requantize(res01, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr[1] = (q7_t)res01; - } - - rhs += 2 * rhs_cols; - dst += 2; - } - - if (rhs_rows % 2) - { - const q7_t *lhs_ptr = &lhs[0]; - q7_t *dst_ptr = &dst[0]; - - for (int32_t lhs_rows_idx = 0; lhs_rows_idx < lhs_rows; ++lhs_rows_idx) - { - const q7_t *rhs_ptr = &rhs[0]; - q31_t res00 = 0; - if (bias) - { - res00 = bias[rhs_rows - 1]; - } - - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q31_t rhs_value = rhs_ptr[0]; - q31_t lhs_value = lhs_ptr[0] + lhs_offset; - - res00 += lhs_value * rhs_value; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows - 1], dst_shifts[rhs_rows - 1]); - - // Add offset - res00 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr += rhs_rows; - } - } -#else - for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2) - { - const q7_t *lhs_ptr = &lhs[0]; - q7_t *dst_ptr = &dst[0]; - - q31_t lhs_offset_contribution0 = 0; - q31_t lhs_offset_contribution1 = 0; - - for (int32_t x = 0; x < rhs_cols; ++x) - { - lhs_offset_contribution0 += rhs[x]; - lhs_offset_contribution1 += rhs[x + rhs_cols]; - } - - lhs_offset_contribution0 *= lhs_offset; - lhs_offset_contribution1 *= lhs_offset; - if (bias) - { - lhs_offset_contribution0 += bias[rhs_rows_idx]; - lhs_offset_contribution1 += bias[rhs_rows_idx + 1]; - } - - int32_t lhs_rows_idx = lhs_rows >> 1; - - while (lhs_rows_idx) - { - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = lhs_offset_contribution0; - q31_t res01 = lhs_offset_contribution1; - q31_t res10 = lhs_offset_contribution0; - q31_t res11 = lhs_offset_contribution1; - - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q7_t rhs_value0 = rhs_ptr[0]; - q7_t rhs_value1 = rhs_ptr[rhs_cols]; - q7_t lhs_value = lhs_ptr[0]; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - - lhs_value = lhs_ptr[rhs_cols]; - res10 += lhs_value * rhs_value0; - res11 += lhs_value * rhs_value1; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res01 = arm_nn_requantize(res01, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - res10 = arm_nn_requantize(res10, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res11 = arm_nn_requantize(res11, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - res10 += dst_offset; - res11 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - res10 = MAX(res10, activation_min); - res10 = MIN(res10, activation_max); - res11 = MAX(res11, activation_min); - res11 = MIN(res11, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr[1] = (q7_t)res01; - dst_ptr += rhs_rows; - dst_ptr[0] = (q7_t)res10; - dst_ptr[1] = (q7_t)res11; - dst_ptr += rhs_rows; - - lhs_ptr += rhs_cols; - - lhs_rows_idx--; - } - - // Left-over rows - if (lhs_rows % 2) - { - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = lhs_offset_contribution0; - q31_t res01 = lhs_offset_contribution1; - - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q7_t rhs_value0 = rhs_ptr[0]; - q7_t rhs_value1 = rhs_ptr[rhs_cols]; - q7_t lhs_value = lhs_ptr[0]; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res01 = arm_nn_requantize(res01, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr[1] = (q7_t)res01; - } - - rhs += 2 * rhs_cols; - dst += 2; - } - - if (rhs_rows % 2) - { - const q7_t *lhs_ptr = &lhs[0]; - q7_t *dst_ptr = &dst[0]; - - for (int32_t lhs_rows_idx = 0; lhs_rows_idx < lhs_rows; ++lhs_rows_idx) - { - const q7_t *rhs_ptr = &rhs[0]; - q31_t res00 = 0; - if (bias) - { - res00 = bias[rhs_rows - 1]; - } - - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q31_t rhs_value = rhs_ptr[0]; - q31_t lhs_value = lhs_ptr[0] + lhs_offset; - - res00 += lhs_value * rhs_value; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows - 1], dst_shifts[rhs_rows - 1]); - - // Add offset - res00 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr += rhs_rows; - } - } -#endif - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNBasicMath group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c deleted file mode 100644 index 8373660ff..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mult_q15.c - * Description: Q15 vector multiplication with variable output shifts - * - * $Date: 29. April 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - - -/** - * @brief Q7 vector multiplication with variable output shifts - * @param[in] *pSrcA pointer to the first input vector - * @param[in] *pSrcB pointer to the second input vector - * @param[out] *pDst pointer to the output vector - * @param[in] out_shift amount of right-shift for output - * @param[in] blockSize number of samples in each vector - * - * Scaling and Overflow Behavior: - * \par - * The function uses saturating arithmetic. - * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated. - */ - -void arm_nn_mult_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - const uint16_t out_shift, - uint32_t blockSize) -{ - uint32_t blkCnt; /* loop counters */ - -#if defined (ARM_MATH_DSP) - -/* Run the below code for Cortex-M4 and Cortex-M3 */ - q31_t inA1, inA2, inB1, inB2; /* temporary input variables */ - q15_t out1, out2, out3, out4; /* temporary output variables */ - q31_t mul1, mul2, mul3, mul4; /* temporary variables */ - - /* loop Unrolling */ - blkCnt = blockSize >> 2U; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while (blkCnt > 0U) - { - /* read two samples at a time from sourceA */ - inA1 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcA); - /* read two samples at a time from sourceB */ - inB1 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcB); - /* read two samples at a time from sourceA */ - inA2 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcA); - /* read two samples at a time from sourceB */ - inB2 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcB); - - /* multiply mul = sourceA * sourceB */ - mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16)); - mul2 = (q31_t) ((q15_t) inA1 * (q15_t) inB1); - mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16)); - mul4 = (q31_t) ((q15_t) inA2 * (q15_t) inB2); - - /* saturate result to 16 bit */ - out1 = (q15_t) __SSAT((q31_t) (mul1 + NN_ROUND(out_shift)) >> out_shift, 16); - out2 = (q15_t) __SSAT((q31_t) (mul2 + NN_ROUND(out_shift)) >> out_shift, 16); - out3 = (q15_t) __SSAT((q31_t) (mul3 + NN_ROUND(out_shift)) >> out_shift, 16); - out4 = (q15_t) __SSAT((q31_t) (mul4 + NN_ROUND(out_shift)) >> out_shift, 16); - - /* store the result */ -#ifndef ARM_MATH_BIG_ENDIAN - - *__SIMD32(pDst)++ = __PKHBT(out2, out1, 16); - *__SIMD32(pDst)++ = __PKHBT(out4, out3, 16); - -#else - - *__SIMD32(pDst)++ = __PKHBT(out2, out1, 16); - *__SIMD32(pDst)++ = __PKHBT(out4, out3, 16); - -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* Decrement the blockSize loop counter */ - blkCnt--; - } - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4U; - -#else - - /* Run the below code for Cortex-M0 */ - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_DSP) */ - - - while (blkCnt > 0U) - { - /* C = A * B */ - /* Multiply the inputs and store the result in the destination buffer */ - *pDst++ = (q15_t) __SSAT(((q31_t) ((q31_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 16); - - /* Decrement the blockSize loop counter */ - blkCnt--; - } -} - -/** - * @} end of NNBasicMath group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c deleted file mode 100644 index cdb5385d6..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mult_q7.c - * Description: Q7 vector multiplication with variable output shifts - * - * $Date: 29. April 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/** - * @brief Q7 vector multiplication with variable output shifts - * @param[in] *pSrcA pointer to the first input vector - * @param[in] *pSrcB pointer to the second input vector - * @param[out] *pDst pointer to the output vector - * @param[in] out_shift amount of right-shift for output - * @param[in] blockSize number of samples in each vector - * - * Scaling and Overflow Behavior: - * \par - * The function uses saturating arithmetic. - * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated. - */ - -void arm_nn_mult_q7( - q7_t * pSrcA, - q7_t * pSrcB, - q7_t * pDst, - const uint16_t out_shift, - uint32_t blockSize) -{ - uint32_t blkCnt; /* loop counters */ - -#if defined (ARM_MATH_DSP) - -/* Run the below code for Cortex-M4 and Cortex-M3 */ - q7_t out1, out2, out3, out4; /* Temporary variables to store the product */ - - /* loop Unrolling */ - blkCnt = blockSize >> 2U; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while (blkCnt > 0U) - { - /* C = A * B */ - /* Multiply the inputs and store the results in temporary variables */ - out1 = (q7_t) __SSAT(((q15_t) ((q15_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); - out2 = (q7_t) __SSAT(((q15_t) ((q15_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); - out3 = (q7_t) __SSAT(((q15_t) ((q15_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); - out4 = (q7_t) __SSAT(((q15_t) ((q15_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); - - /* Store the results of 4 inputs in the destination buffer in single cycle by packing */ - *__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4); - - /* Decrement the blockSize loop counter */ - blkCnt--; - } - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4U; - -#else - - /* Run the below code for Cortex-M0 */ - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_DSP) */ - - - while (blkCnt > 0U) - { - /* C = A * B */ - /* Multiply the inputs and store the result in the destination buffer */ - *pDst++ = (q7_t) __SSAT(((q15_t) ((q15_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); - - /* Decrement the blockSize loop counter */ - blkCnt--; - } -} - -/** - * @} end of NNBasicMath group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c deleted file mode 100644 index b23578734..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c +++ /dev/null @@ -1,462 +0,0 @@ -/* - * Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_vec_mat_mult_t_s8 - * Description: s8 vector by matrix (transposed) multiplication - * - * $Date: April 2, 2020 - * $Revision: V.1.5.0 - * - * Target Processor: Cortex-M - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * s8 vector(lhs) by matrix (transposed) multiplication - * - * Refer header file for details. - * - */ -arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs, - const q7_t *rhs, - const q31_t *bias, - q7_t *dst, - const int32_t lhs_offset, - const int32_t rhs_offset, - const int32_t dst_offset, - const int32_t dst_multiplier, - const int32_t dst_shift, - const int32_t rhs_cols, - const int32_t rhs_rows, - const int32_t activation_min, - const int32_t activation_max) -{ -#if defined(ARM_MATH_MVEI) - - const int16x8_t rhs_offset_vec = vdupq_n_s16((int16_t)rhs_offset); - const int16x8_t lhs_offset_vec = vdupq_n_s16((int16_t)lhs_offset); - - int32_t row_loop_cnt = rhs_rows / 4; - - for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++) - { - int32_t acc1 = bias[0]; - int32_t acc2 = bias[1]; - int32_t acc3 = bias[2]; - int32_t acc4 = bias[3]; - bias += 4; - - int32x4_t acc; - const int32_t col_loop_cnt = (rhs_cols + 7) / 8; - - const int8_t *vec = lhs; - const int8_t *rhs_0 = rhs; - const int8_t *rhs_1 = rhs + rhs_cols; - const int8_t *rhs_2 = rhs + 2 * rhs_cols; - const int8_t *rhs_3 = rhs + 3 * rhs_cols; - - uint32_t col_cnt = (uint32_t)rhs_cols; - - for (int i = 0; i < col_loop_cnt; i++) - { - mve_pred16_t p = vctp16q(col_cnt); - col_cnt -= 8; - const int16x8_t tmp_b = vaddq_m_s16(vuninitializedq_s16(), - vldrbq_z_s16(vec, p), lhs_offset_vec, p); - - const int16x8_t tmp_a0 = vaddq_m_s16(vuninitializedq_s16(), - vldrbq_z_s16(rhs_0, p), rhs_offset_vec, p); - acc1 = vmladavaq_p_s16(acc1, tmp_a0, tmp_b, p); - - const int16x8_t tmp_a1 = vaddq_m_s16(vuninitializedq_s16(), - vldrbq_z_s16(rhs_1, p), rhs_offset_vec, p); - acc2 = vmladavaq_p_s16(acc2, tmp_a1, tmp_b, p); - - const int16x8_t tmp_a2 = vaddq_m_s16(vuninitializedq_s16(), - vldrbq_z_s16(rhs_2, p), rhs_offset_vec, p); - acc3 = vmladavaq_p_s16(acc3, tmp_a2, tmp_b, p); - - const int16x8_t tmp_a3 = vaddq_m_s16(vuninitializedq_s16(), - vldrbq_z_s16(rhs_3, p), rhs_offset_vec, p); - acc4 = vmladavaq_p_s16(acc4, tmp_a3, tmp_b, p); - - vec += 8; - rhs_0 += 8; - rhs_1 += 8; - rhs_2 += 8; - rhs_3 += 8; - } - rhs += 4 * rhs_cols; - - acc[0] = acc1; - acc[1] = acc2; - acc[2] = acc3; - acc[3] = acc4; - - acc = arm_requantize_mve(acc, dst_multiplier, dst_shift); - acc = vaddq_s32(acc, vdupq_n_s32(dst_offset)); - acc = vmaxq_s32(acc, vdupq_n_s32(activation_min)); - acc = vminq_s32(acc, vdupq_n_s32(activation_max)); - - vstrbq_s32(dst, acc); - dst += 4; - } - - row_loop_cnt = rhs_rows & 3; - - for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; - i_row_loop_cnt++) - { - int32_t acc = *bias++; - const int32_t col_loop_cnt = (rhs_cols + 7) / 8; - const int8_t *vec = lhs; - const int8_t *kernel_cur = rhs; - - uint32_t col_cnt = (uint32_t)rhs_cols; - - for (int i = 0; i < col_loop_cnt; i++) - { - mve_pred16_t p = vctp16q(col_cnt); - col_cnt -= 8; - const int16x8_t tmp_b = vaddq_m_s16(vuninitializedq_s16(), - vldrbq_z_s16(vec, p), lhs_offset_vec, p); - - const int16x8_t tmp_a = vaddq_m_s16(vuninitializedq_s16(), - vldrbq_z_s16(kernel_cur, p), rhs_offset_vec, p); - acc = vmladavaq_p_s16(acc, tmp_a, tmp_b, p); - vec += 8; - kernel_cur += 8; - } - rhs += rhs_cols; - - acc = arm_nn_requantize(acc, dst_multiplier, dst_shift); - acc += dst_offset; - - acc = MAX(acc, activation_min); - acc = MIN(acc, activation_max); - *dst++ = (int8_t)(acc); - } - -#elif defined(ARM_MATH_DSP) - const int32_t off0 = rhs_cols - 4; - const int16_t lhs_offset_s16 = lhs_offset; - const int16_t rhs_offset_s16 = rhs_offset; - - const uint32_t lhs_offset_s16x2 = __PKHBT(lhs_offset_s16, lhs_offset_s16, 16); - const uint32_t rhs_offset_s16x2 = __PKHBT(rhs_offset_s16, rhs_offset_s16, 16); - - for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2) - { - const q7_t *lhs_ptr = &lhs[0]; - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = *bias++; - q31_t res01 = *bias++; - - int32_t rhs_cols_idx = 0; - - q31_t val0, val1, val2, val3, val4, val5; - for (; rhs_cols_idx <= (rhs_cols - 16); rhs_cols_idx += 16) - { - // Read 4 x int8 values from the RHS matrix - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val2 = __SXTAB16(rhs_offset_s16x2, val0); - // Read 4 x int8 values from the LHS vector - val1 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8)); - val3 = __SXTAB16(lhs_offset_s16x2, val1); - // Read 4 x int8 values from the RHS matrix - val4 = arm_nn_read_q7x4((const q7_t *)rhs_ptr + off0); - val1 = __SXTAB16(lhs_offset_s16x2, __ROR(val1, 8)); - - // Perform the accumulations - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTAB16(rhs_offset_s16x2, val4); - res00 = __SMLAD(val1, val0, res00); - val4 = __SXTAB16(rhs_offset_s16x2, __ROR(val4, 8)); - // Read 4 x int8 values from the RHS matrix - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val1, val4, res01); - - val2 = __SXTAB16(rhs_offset_s16x2, val0); - // Read 4 x int8 values from the LHS vector - val1 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8)); - val3 = __SXTAB16(lhs_offset_s16x2, val1); - // Read 4 x int8 values from the RHS matrix - val4 = arm_nn_read_q7x4((const q7_t *)rhs_ptr + off0); - val1 = __SXTAB16(lhs_offset_s16x2, __ROR(val1, 8)); - - // Perform the accumulations - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTAB16(rhs_offset_s16x2, val4); - res00 = __SMLAD(val1, val0, res00); - val4 = __SXTAB16(rhs_offset_s16x2, __ROR(val4, 8)); - // Read 4 x int8 values from the RHS matrix - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val1, val4, res01); - - val2 = __SXTAB16(rhs_offset_s16x2, val0); - // Read 4 x int8 values from the LHS vector - val1 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8)); - val3 = __SXTAB16(lhs_offset_s16x2, val1); - // Read 4 x int8 values from the RHS matrix - val4 = arm_nn_read_q7x4((const q7_t *)rhs_ptr + off0); - val1 = __SXTAB16(lhs_offset_s16x2, __ROR(val1, 8)); - - // Perform the accumulations - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTAB16(rhs_offset_s16x2, val4); - res00 = __SMLAD(val1, val0, res00); - val4 = __SXTAB16(rhs_offset_s16x2, __ROR(val4, 8)); - // Read 4 x int8 values from the RHS matrix - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val1, val4, res01); - - val2 = __SXTAB16(rhs_offset_s16x2, val0); - // Read 4 x int8 values from the LHS vector - val1 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8)); - val3 = __SXTAB16(lhs_offset_s16x2, val1); - // Read 4 x int8 values from the RHS matrix - val4 = arm_nn_read_q7x4((const q7_t *)rhs_ptr + off0); - val1 = __SXTAB16(lhs_offset_s16x2, __ROR(val1, 8)); - - // Perform the accumulations - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTAB16(rhs_offset_s16x2, val4); - res00 = __SMLAD(val1, val0, res00); - val4 = __SXTAB16(rhs_offset_s16x2, __ROR(val4, 8)); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val1, val4, res01); - } - - for (; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q31_t rhs_value0 = rhs_ptr[0] + rhs_offset; - q31_t rhs_value1 = rhs_ptr[rhs_cols] + rhs_offset; - q31_t lhs_value = lhs_ptr[0] + lhs_offset; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift); - res01 = arm_nn_requantize(res01, dst_multiplier, dst_shift); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - - *dst++ = (q7_t)res00; - *dst++ = (q7_t)res01; - - rhs += 2 * rhs_cols; - } - - if (rhs_rows % 2) - { - const q7_t *lhs_ptr = &lhs[0]; - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = *bias++; - - int32_t rhs_cols_idx = 0; - - q31_t val0, val1, val2, val3; - for (; rhs_cols_idx <= (rhs_cols - 16); rhs_cols_idx += 16) - { - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = __SXTAB16(rhs_offset_s16x2, val0); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8)); - val3 = __SXTAB16(lhs_offset_s16x2, val2); - val2 = __SXTAB16(lhs_offset_s16x2, __ROR(val2, 8)); - - // Partial accumulations - res00 = __SMLAD(val3, val1, res00); - res00 = __SMLAD(val2, val0, res00); - - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = __SXTAB16(rhs_offset_s16x2, val0); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8)); - val3 = __SXTAB16(lhs_offset_s16x2, val2); - val2 = __SXTAB16(lhs_offset_s16x2, __ROR(val2, 8)); - - // Partial accumulations - res00 = __SMLAD(val3, val1, res00); - res00 = __SMLAD(val2, val0, res00); - - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = __SXTAB16(rhs_offset_s16x2, val0); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8)); - val3 = __SXTAB16(lhs_offset_s16x2, val2); - val2 = __SXTAB16(lhs_offset_s16x2, __ROR(val2, 8)); - - // Partial accumulations - res00 = __SMLAD(val3, val1, res00); - res00 = __SMLAD(val2, val0, res00); - - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = __SXTAB16(rhs_offset_s16x2, val0); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8)); - val3 = __SXTAB16(lhs_offset_s16x2, val2); - val2 = __SXTAB16(lhs_offset_s16x2, __ROR(val2, 8)); - - // Partial accumulations - res00 = __SMLAD(val3, val1, res00); - res00 = __SMLAD(val2, val0, res00); - } - - for (; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q31_t rhs_value0 = rhs_ptr[0] + rhs_offset; - q31_t lhs_value = lhs_ptr[0] + lhs_offset; - - res00 += lhs_value * rhs_value0; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift); - - // Add offset - res00 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - - *dst = (q7_t)res00; - } - -#else - - for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2) - { - const q7_t *lhs_ptr = &lhs[0]; - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = *bias++; - q31_t res01 = *bias++; - - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q31_t rhs_value0 = rhs_ptr[0] + rhs_offset; - q31_t rhs_value1 = rhs_ptr[rhs_cols] + rhs_offset; - q31_t lhs_value = lhs_ptr[0] + lhs_offset; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift); - res01 = arm_nn_requantize(res01, dst_multiplier, dst_shift); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - - *dst++ = (q7_t)res00; - *dst++ = (q7_t)res01; - - rhs += 2 * rhs_cols; - } - - if (rhs_rows % 2) - { - const q7_t *lhs_ptr = &lhs[0]; - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = *bias++; - - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q31_t rhs_value0 = rhs_ptr[0] + rhs_offset; - q31_t lhs_value = lhs_ptr[0] + lhs_offset; - - res00 += lhs_value * rhs_value0; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift); - - // Add offset - res00 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - - *dst = (q7_t)res00; - } -#endif - - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNBasicMath group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c deleted file mode 100644 index c28f1a612..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nntables.c - * Description: Converts the elements of the Q7 vector to Q15 vector without left-shift - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @brief tables for various activation functions - * - * This file include the declaration of common tables. - * Most of them are used for activation functions - * - * Assumption: - * Unified table: input is 3.x format, i.e, range of [-8, 8) - * sigmoid(8) = 0.9996646498695336 - * tanh(8) = 0.9999997749296758 - * The accuracy here should be good enough - * - * 2-stage HL table: - * - * The entire input range is divided into two parts: - * - * Low range table: 0x000x xxxx or 0x111x xxxx - * table entry will be the binary number excluding the first - * two digits, i.e., 0x0x xxxx or 0x1x xxxx - * - * - * - * High range table 0x0010 0000 -- 0x0111 1111 - * 0x1000 0000 -- 0x1101 1111 - * - * For positive numbers, table entry will be - * 0x0010 0000 -- 0x0111 1111 minus 0x0010 0000 - * i.e., 0x0000 0000 - 0x0101 11111 - * - * same thing for the negative numbers, table entry will be - * 0x1000 0000 -- 0x1101 1111 minux 0x0010 0000 - * i.e., 0x0110 0000 - 0x1011 1111 - */ - -const q7_t sigmoidTable_q7[256] = { - 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, - 0x50, 0x52, 0x53, 0x55, 0x57, 0x59, 0x5a, 0x5c, - 0x5e, 0x5f, 0x61, 0x62, 0x63, 0x65, 0x66, 0x67, - 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, - 0x71, 0x72, 0x72, 0x73, 0x74, 0x74, 0x75, 0x76, - 0x76, 0x77, 0x77, 0x78, 0x78, 0x79, 0x79, 0x7a, - 0x7a, 0x7a, 0x7b, 0x7b, 0x7b, 0x7c, 0x7c, 0x7c, - 0x7c, 0x7c, 0x7d, 0x7d, 0x7d, 0x7d, 0x7d, 0x7e, - 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04, - 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, - 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, - 0x0a, 0x0a, 0x0b, 0x0c, 0x0c, 0x0d, 0x0e, 0x0e, - 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, - 0x17, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f, 0x21, - 0x22, 0x24, 0x26, 0x27, 0x29, 0x2b, 0x2d, 0x2e, - 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, -}; - -const q15_t sigmoidTable_q15[256] = { - 0x4000, 0x4200, 0x43ff, 0x45fc, 0x47f5, 0x49eb, 0x4bdc, 0x4dc8, - 0x4fad, 0x518a, 0x5360, 0x552c, 0x56ef, 0x58a8, 0x5a57, 0x5bfb, - 0x5d93, 0x5f20, 0x60a1, 0x6216, 0x637f, 0x64db, 0x662b, 0x676f, - 0x68a6, 0x69d2, 0x6af1, 0x6c05, 0x6d0d, 0x6e09, 0x6efb, 0x6fe2, - 0x70be, 0x7190, 0x7258, 0x7316, 0x73cc, 0x7478, 0x751b, 0x75b7, - 0x764a, 0x76d6, 0x775b, 0x77d8, 0x784f, 0x78c0, 0x792a, 0x798f, - 0x79ee, 0x7a48, 0x7a9d, 0x7aed, 0x7b39, 0x7b80, 0x7bc4, 0x7c03, - 0x7c3f, 0x7c78, 0x7cad, 0x7ce0, 0x7d0f, 0x7d3c, 0x7d66, 0x7d8d, - 0x7db3, 0x7dd6, 0x7df7, 0x7e16, 0x7e33, 0x7e4f, 0x7e69, 0x7e81, - 0x7e98, 0x7eae, 0x7ec2, 0x7ed5, 0x7ee7, 0x7ef8, 0x7f08, 0x7f17, - 0x7f25, 0x7f32, 0x7f3e, 0x7f4a, 0x7f55, 0x7f5f, 0x7f69, 0x7f72, - 0x7f7b, 0x7f83, 0x7f8a, 0x7f91, 0x7f98, 0x7f9e, 0x7fa4, 0x7faa, - 0x7faf, 0x7fb4, 0x7fb8, 0x7fbd, 0x7fc1, 0x7fc5, 0x7fc8, 0x7fcc, - 0x7fcf, 0x7fd2, 0x7fd5, 0x7fd7, 0x7fda, 0x7fdc, 0x7fde, 0x7fe0, - 0x7fe2, 0x7fe4, 0x7fe6, 0x7fe7, 0x7fe9, 0x7fea, 0x7feb, 0x7fed, - 0x7fee, 0x7fef, 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff4, - 0x000b, 0x000c, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, - 0x0012, 0x0013, 0x0015, 0x0016, 0x0017, 0x0019, 0x001a, 0x001c, - 0x001e, 0x0020, 0x0022, 0x0024, 0x0026, 0x0029, 0x002b, 0x002e, - 0x0031, 0x0034, 0x0038, 0x003b, 0x003f, 0x0043, 0x0048, 0x004c, - 0x0051, 0x0056, 0x005c, 0x0062, 0x0068, 0x006f, 0x0076, 0x007d, - 0x0085, 0x008e, 0x0097, 0x00a1, 0x00ab, 0x00b6, 0x00c2, 0x00ce, - 0x00db, 0x00e9, 0x00f8, 0x0108, 0x0119, 0x012b, 0x013e, 0x0152, - 0x0168, 0x017f, 0x0197, 0x01b1, 0x01cd, 0x01ea, 0x0209, 0x022a, - 0x024d, 0x0273, 0x029a, 0x02c4, 0x02f1, 0x0320, 0x0353, 0x0388, - 0x03c1, 0x03fd, 0x043c, 0x0480, 0x04c7, 0x0513, 0x0563, 0x05b8, - 0x0612, 0x0671, 0x06d6, 0x0740, 0x07b1, 0x0828, 0x08a5, 0x092a, - 0x09b6, 0x0a49, 0x0ae5, 0x0b88, 0x0c34, 0x0cea, 0x0da8, 0x0e70, - 0x0f42, 0x101e, 0x1105, 0x11f7, 0x12f3, 0x13fb, 0x150f, 0x162e, - 0x175a, 0x1891, 0x19d5, 0x1b25, 0x1c81, 0x1dea, 0x1f5f, 0x20e0, - 0x226d, 0x2405, 0x25a9, 0x2758, 0x2911, 0x2ad4, 0x2ca0, 0x2e76, - 0x3053, 0x3238, 0x3424, 0x3615, 0x380b, 0x3a04, 0x3c01, 0x3e00, -}; - -const q15_t sigmoidLTable_q15[128] = { - 0x4000, 0x4100, 0x4200, 0x42ff, 0x43ff, 0x44fd, 0x45fc, 0x46f9, - 0x47f5, 0x48f1, 0x49eb, 0x4ae5, 0x4bdc, 0x4cd3, 0x4dc8, 0x4ebb, - 0x4fad, 0x509c, 0x518a, 0x5276, 0x5360, 0x5447, 0x552c, 0x560f, - 0x56ef, 0x57cd, 0x58a8, 0x5981, 0x5a57, 0x5b2a, 0x5bfb, 0x5cc9, - 0x5d93, 0x5e5b, 0x5f20, 0x5fe2, 0x60a1, 0x615d, 0x6216, 0x62cc, - 0x637f, 0x642e, 0x64db, 0x6584, 0x662b, 0x66ce, 0x676f, 0x680c, - 0x68a6, 0x693d, 0x69d2, 0x6a63, 0x6af1, 0x6b7c, 0x6c05, 0x6c8a, - 0x6d0d, 0x6d8d, 0x6e09, 0x6e84, 0x6efb, 0x6f70, 0x6fe2, 0x7051, - 0x0f42, 0x0faf, 0x101e, 0x1090, 0x1105, 0x117c, 0x11f7, 0x1273, - 0x12f3, 0x1376, 0x13fb, 0x1484, 0x150f, 0x159d, 0x162e, 0x16c3, - 0x175a, 0x17f4, 0x1891, 0x1932, 0x19d5, 0x1a7c, 0x1b25, 0x1bd2, - 0x1c81, 0x1d34, 0x1dea, 0x1ea3, 0x1f5f, 0x201e, 0x20e0, 0x21a5, - 0x226d, 0x2337, 0x2405, 0x24d6, 0x25a9, 0x267f, 0x2758, 0x2833, - 0x2911, 0x29f1, 0x2ad4, 0x2bb9, 0x2ca0, 0x2d8a, 0x2e76, 0x2f64, - 0x3053, 0x3145, 0x3238, 0x332d, 0x3424, 0x351b, 0x3615, 0x370f, - 0x380b, 0x3907, 0x3a04, 0x3b03, 0x3c01, 0x3d01, 0x3e00, 0x3f00, -}; - -const q15_t sigmoidHTable_q15[192] = { - 0x70be, 0x7190, 0x7258, 0x7316, 0x73cc, 0x7478, 0x751b, 0x75b7, - 0x764a, 0x76d6, 0x775b, 0x77d8, 0x784f, 0x78c0, 0x792a, 0x798f, - 0x79ee, 0x7a48, 0x7a9d, 0x7aed, 0x7b39, 0x7b80, 0x7bc4, 0x7c03, - 0x7c3f, 0x7c78, 0x7cad, 0x7ce0, 0x7d0f, 0x7d3c, 0x7d66, 0x7d8d, - 0x7db3, 0x7dd6, 0x7df7, 0x7e16, 0x7e33, 0x7e4f, 0x7e69, 0x7e81, - 0x7e98, 0x7eae, 0x7ec2, 0x7ed5, 0x7ee7, 0x7ef8, 0x7f08, 0x7f17, - 0x7f25, 0x7f32, 0x7f3e, 0x7f4a, 0x7f55, 0x7f5f, 0x7f69, 0x7f72, - 0x7f7b, 0x7f83, 0x7f8a, 0x7f91, 0x7f98, 0x7f9e, 0x7fa4, 0x7faa, - 0x7faf, 0x7fb4, 0x7fb8, 0x7fbd, 0x7fc1, 0x7fc5, 0x7fc8, 0x7fcc, - 0x7fcf, 0x7fd2, 0x7fd5, 0x7fd7, 0x7fda, 0x7fdc, 0x7fde, 0x7fe0, - 0x7fe2, 0x7fe4, 0x7fe6, 0x7fe7, 0x7fe9, 0x7fea, 0x7feb, 0x7fed, - 0x7fee, 0x7fef, 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff4, - 0x000b, 0x000c, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, - 0x0012, 0x0013, 0x0015, 0x0016, 0x0017, 0x0019, 0x001a, 0x001c, - 0x001e, 0x0020, 0x0022, 0x0024, 0x0026, 0x0029, 0x002b, 0x002e, - 0x0031, 0x0034, 0x0038, 0x003b, 0x003f, 0x0043, 0x0048, 0x004c, - 0x0051, 0x0056, 0x005c, 0x0062, 0x0068, 0x006f, 0x0076, 0x007d, - 0x0085, 0x008e, 0x0097, 0x00a1, 0x00ab, 0x00b6, 0x00c2, 0x00ce, - 0x00db, 0x00e9, 0x00f8, 0x0108, 0x0119, 0x012b, 0x013e, 0x0152, - 0x0168, 0x017f, 0x0197, 0x01b1, 0x01cd, 0x01ea, 0x0209, 0x022a, - 0x024d, 0x0273, 0x029a, 0x02c4, 0x02f1, 0x0320, 0x0353, 0x0388, - 0x03c1, 0x03fd, 0x043c, 0x0480, 0x04c7, 0x0513, 0x0563, 0x05b8, - 0x0612, 0x0671, 0x06d6, 0x0740, 0x07b1, 0x0828, 0x08a5, 0x092a, - 0x09b6, 0x0a49, 0x0ae5, 0x0b88, 0x0c34, 0x0cea, 0x0da8, 0x0e70, -}; - -const q7_t tanhTable_q7[256] = { - 0x00, 0x08, 0x10, 0x18, 0x1f, 0x27, 0x2e, 0x35, - 0x3b, 0x41, 0x47, 0x4c, 0x51, 0x56, 0x5a, 0x5e, - 0x61, 0x65, 0x68, 0x6a, 0x6d, 0x6f, 0x71, 0x72, - 0x74, 0x75, 0x76, 0x78, 0x78, 0x79, 0x7a, 0x7b, - 0x7b, 0x7c, 0x7c, 0x7d, 0x7d, 0x7e, 0x7e, 0x7e, - 0x7e, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x81, - 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x82, - 0x82, 0x82, 0x82, 0x82, 0x83, 0x83, 0x84, 0x84, - 0x85, 0x85, 0x86, 0x87, 0x88, 0x88, 0x8a, 0x8b, - 0x8c, 0x8e, 0x8f, 0x91, 0x93, 0x96, 0x98, 0x9b, - 0x9f, 0xa2, 0xa6, 0xaa, 0xaf, 0xb4, 0xb9, 0xbf, - 0xc5, 0xcb, 0xd2, 0xd9, 0xe1, 0xe8, 0xf0, 0xf8, -}; - -const q15_t tanhTable_q15[256] = { - 0x0000, 0x07fd, 0x0feb, 0x17b9, 0x1f59, 0x26bf, 0x2ddf, 0x34ae, - 0x3b27, 0x4142, 0x46fd, 0x4c56, 0x514d, 0x55e2, 0x5a1a, 0x5df6, - 0x617c, 0x64b0, 0x6797, 0x6a37, 0x6c95, 0x6eb5, 0x709e, 0x7254, - 0x73dc, 0x753a, 0x7672, 0x7788, 0x787f, 0x795b, 0x7a1e, 0x7acb, - 0x7b65, 0x7bee, 0x7c66, 0x7cd1, 0x7d30, 0x7d84, 0x7dce, 0x7e0f, - 0x7e49, 0x7e7d, 0x7eaa, 0x7ed2, 0x7ef5, 0x7f14, 0x7f30, 0x7f48, - 0x7f5e, 0x7f71, 0x7f82, 0x7f91, 0x7f9e, 0x7fa9, 0x7fb3, 0x7fbc, - 0x7fc4, 0x7fcb, 0x7fd1, 0x7fd7, 0x7fdc, 0x7fe0, 0x7fe4, 0x7fe7, - 0x7fea, 0x7fed, 0x7fef, 0x7ff1, 0x7ff3, 0x7ff4, 0x7ff6, 0x7ff7, - 0x7ff8, 0x7ff9, 0x7ffa, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffc, 0x7ffd, - 0x7ffd, 0x7ffd, 0x7ffe, 0x7ffe, 0x7ffe, 0x7ffe, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, - 0x8001, 0x8001, 0x8001, 0x8002, 0x8002, 0x8002, 0x8002, 0x8003, - 0x8003, 0x8003, 0x8004, 0x8004, 0x8005, 0x8006, 0x8006, 0x8007, - 0x8008, 0x8009, 0x800a, 0x800c, 0x800d, 0x800f, 0x8011, 0x8013, - 0x8016, 0x8019, 0x801c, 0x8020, 0x8024, 0x8029, 0x802f, 0x8035, - 0x803c, 0x8044, 0x804d, 0x8057, 0x8062, 0x806f, 0x807e, 0x808f, - 0x80a2, 0x80b8, 0x80d0, 0x80ec, 0x810b, 0x812e, 0x8156, 0x8183, - 0x81b7, 0x81f1, 0x8232, 0x827c, 0x82d0, 0x832f, 0x839a, 0x8412, - 0x849b, 0x8535, 0x85e2, 0x86a5, 0x8781, 0x8878, 0x898e, 0x8ac6, - 0x8c24, 0x8dac, 0x8f62, 0x914b, 0x936b, 0x95c9, 0x9869, 0x9b50, - 0x9e84, 0xa20a, 0xa5e6, 0xaa1e, 0xaeb3, 0xb3aa, 0xb903, 0xbebe, - 0xc4d9, 0xcb52, 0xd221, 0xd941, 0xe0a7, 0xe847, 0xf015, 0xf803, -}; - -const q15_t tanhLTable_q15[128] = { - 0x0000, 0x0400, 0x07fd, 0x0bf7, 0x0feb, 0x13d7, 0x17b9, 0x1b90, - 0x1f59, 0x2314, 0x26bf, 0x2a58, 0x2ddf, 0x3151, 0x34ae, 0x37f6, - 0x3b27, 0x3e40, 0x4142, 0x442c, 0x46fd, 0x49b6, 0x4c56, 0x4edd, - 0x514d, 0x53a3, 0x55e2, 0x580a, 0x5a1a, 0x5c13, 0x5df6, 0x5fc4, - 0x617c, 0x6320, 0x64b0, 0x662d, 0x6797, 0x68f0, 0x6a37, 0x6b6e, - 0x6c95, 0x6dac, 0x6eb5, 0x6fb0, 0x709e, 0x717f, 0x7254, 0x731e, - 0x73dc, 0x7490, 0x753a, 0x75da, 0x7672, 0x7701, 0x7788, 0x7807, - 0x787f, 0x78f0, 0x795b, 0x79bf, 0x7a1e, 0x7a77, 0x7acb, 0x7b1b, - 0x849b, 0x84e5, 0x8535, 0x8589, 0x85e2, 0x8641, 0x86a5, 0x8710, - 0x8781, 0x87f9, 0x8878, 0x88ff, 0x898e, 0x8a26, 0x8ac6, 0x8b70, - 0x8c24, 0x8ce2, 0x8dac, 0x8e81, 0x8f62, 0x9050, 0x914b, 0x9254, - 0x936b, 0x9492, 0x95c9, 0x9710, 0x9869, 0x99d3, 0x9b50, 0x9ce0, - 0x9e84, 0xa03c, 0xa20a, 0xa3ed, 0xa5e6, 0xa7f6, 0xaa1e, 0xac5d, - 0xaeb3, 0xb123, 0xb3aa, 0xb64a, 0xb903, 0xbbd4, 0xbebe, 0xc1c0, - 0xc4d9, 0xc80a, 0xcb52, 0xceaf, 0xd221, 0xd5a8, 0xd941, 0xdcec, - 0xe0a7, 0xe470, 0xe847, 0xec29, 0xf015, 0xf409, 0xf803, 0xfc00, -}; - -const q15_t tanhHTable_q15[192] = { - 0x7b65, 0x7bee, 0x7c66, 0x7cd1, 0x7d30, 0x7d84, 0x7dce, 0x7e0f, - 0x7e49, 0x7e7d, 0x7eaa, 0x7ed2, 0x7ef5, 0x7f14, 0x7f30, 0x7f48, - 0x7f5e, 0x7f71, 0x7f82, 0x7f91, 0x7f9e, 0x7fa9, 0x7fb3, 0x7fbc, - 0x7fc4, 0x7fcb, 0x7fd1, 0x7fd7, 0x7fdc, 0x7fe0, 0x7fe4, 0x7fe7, - 0x7fea, 0x7fed, 0x7fef, 0x7ff1, 0x7ff3, 0x7ff4, 0x7ff6, 0x7ff7, - 0x7ff8, 0x7ff9, 0x7ffa, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffc, 0x7ffd, - 0x7ffd, 0x7ffd, 0x7ffe, 0x7ffe, 0x7ffe, 0x7ffe, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, - 0x8001, 0x8001, 0x8001, 0x8002, 0x8002, 0x8002, 0x8002, 0x8003, - 0x8003, 0x8003, 0x8004, 0x8004, 0x8005, 0x8006, 0x8006, 0x8007, - 0x8008, 0x8009, 0x800a, 0x800c, 0x800d, 0x800f, 0x8011, 0x8013, - 0x8016, 0x8019, 0x801c, 0x8020, 0x8024, 0x8029, 0x802f, 0x8035, - 0x803c, 0x8044, 0x804d, 0x8057, 0x8062, 0x806f, 0x807e, 0x808f, - 0x80a2, 0x80b8, 0x80d0, 0x80ec, 0x810b, 0x812e, 0x8156, 0x8183, - 0x81b7, 0x81f1, 0x8232, 0x827c, 0x82d0, 0x832f, 0x839a, 0x8412, -}; diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c deleted file mode 100644 index 56e89e85c..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_q7_to_q15_no_shift.c - * Description: Converts the elements of the Q7 vector to Q15 vector without left-shift - * - * $Date: May 29, 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup nndata_convert - * @{ - */ - -/** - * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift - * @param[in] *pSrc points to the Q7 input vector - * @param[out] *pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - * - * \par Description: - * - * The equation used for the conversion process is: - * - *
- * 	pDst[n] = (q15_t) pSrc[n];   0 <= n < blockSize.
- * 
- * - */ - -void arm_q7_to_q15_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize) -{ - const q7_t *pIn = pSrc; - uint32_t blkCnt; - -#if defined(ARM_MATH_DSP) - q31_t in; - q31_t in1, in2; - q31_t out1, out2; - - /*loop Unrolling */ - blkCnt = blockSize >> 2u; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */ - while (blkCnt > 0u) - { - in = arm_nn_read_q7x4_ia(&pIn); - - /* rotatate in by 8 and extend two q7_t values to q15_t values */ - in1 = __SXTB16(__ROR((uint32_t)in, 8)); - - /* extend remaining two q7_t values to q15_t values */ - in2 = __SXTB16(in); - -#ifndef ARM_MATH_BIG_ENDIAN - out2 = (int32_t)__PKHTB(in1, in2, 16); - out1 = (int32_t)__PKHBT(in2, in1, 16); -#else - out1 = (int32_t)__PKHTB(in1, in2, 16); - out2 = (int32_t)__PKHBT(in2, in1, 16); -#endif - write_q15x2_ia(&pDst, out1); - write_q15x2_ia(&pDst, out2); - - /* Decrement the loop counter */ - blkCnt--; - } - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4u; - -#else - - /* Run the below code for Cortex-M0 */ - - /* Loop over blockSize number of values */ - blkCnt = blockSize; - -#endif /* #ifndef ARM_MATH_CM0_FAMILY */ - - while (blkCnt > 0u) - { - /* convert from q7 to q15 and then store the results in the destination buffer */ - *pDst++ = (q15_t)*pIn++; - - /* Decrement the loop counter */ - blkCnt--; - } - -} - -/** - * @} end of nndata_convert group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c deleted file mode 100644 index 41f2cd478..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_q7_to_q15_reordered_no_shift.c - * Description: Converts the elements of the Q7 vector to reordered Q15 vector without left-shift - * - * $Date: May 29, 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup nndata_convert - * @{ - */ - -/** - * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift - * @param[in] *pSrc points to the Q7 input vector - * @param[out] *pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - * - * @details - * - * This function does the q7 to q15 expansion with re-ordering - * - *
- *                          |   A1   |   A2   |   A3   |   A4   |
- *
- *                           0      7 8     15 16    23 24    31
- * 
- * - * is converted into: - * - *
- *  |       A1       |       A3       |   and  |       A2       |       A4       |
- *
- *   0             15 16            31          0             15 16            31
- * 
- * - * - * This looks strange but is natural considering how sign-extension is done at - * assembly level. - * - * The expansion of other other oprand will follow the same rule so that the end - * results are the same. - * - * The tail (i.e., last (N % 4) elements) will still be in original order. - * - */ - -void arm_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize) -{ - const q7_t *pIn = pSrc; /* Src pointer */ - uint32_t blkCnt; /* loop counter */ - -#ifndef ARM_MATH_CM0_FAMILY - q31_t in; - q31_t in1, in2; - - /* Run the below code for Cortex-M4 and Cortex-M3 */ - - /*loop Unrolling */ - blkCnt = blockSize >> 2u; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while (blkCnt > 0u) - { - /* C = (q15_t) A << 8 */ - /* convert from q7 to q15 and then store the results in the destination buffer */ - in = arm_nn_read_q7x4_ia(&pIn); - - /* rotatate in by 8 and extend two q7_t values to q15_t values */ - in1 = __SXTB16(__ROR((uint32_t)in, 8)); - - /* extend remainig two q7_t values to q15_t values */ - in2 = __SXTB16(in); - -#ifndef ARM_MATH_BIG_ENDIAN - *__SIMD32(pDst)++ = in2; - *__SIMD32(pDst)++ = in1; -#else - *__SIMD32(pDst)++ = in1; - *__SIMD32(pDst)++ = in2; -#endif - - /* Decrement the loop counter */ - blkCnt--; - } - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4u; - -#else - - /* Run the below code for Cortex-M0 */ - - /* Loop over blockSize number of values */ - blkCnt = blockSize; - -#endif /* #ifndef ARM_MATH_CM0_FAMILY */ - - while (blkCnt > 0u) - { - /* C = (q15_t) A << 8 */ - /* convert from q7 to q15 and then store the results in the destination buffer */ - *pDst++ = (q15_t) * pIn++; - - /* Decrement the loop counter */ - blkCnt--; - } - -} - -/** - * @} end of q7_to_x group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c deleted file mode 100644 index d2c8dfc61..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_q7_to_q15_reordered_with_offset.c - * Description: Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset. The re-ordering - * is a signature of sign extension intrinsic(DSP extension). - * - * $Date: May 29, 2020 - * $Revision: V.2.0.3 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup nndata_convert - * @{ - */ - -/** - * @brief Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset. - * - * @note Refer header file for details. - * - */ - -void arm_q7_to_q15_reordered_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset) -{ - -#if defined(ARM_MATH_DSP) - uint32_t block_cnt; - /* Run the below code for cores that support SIMD instructions */ - q31_t in_q7x4; - q31_t out_q15x2_1; - q31_t out_q15x2_2; - - /*loop unrolling */ - block_cnt = block_size >> 2u; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */ - const q31_t offset_q15x2 = (q31_t)__PKHBT(offset, offset, 16); - while (block_cnt > 0u) - { - /* convert from q7 to q15 and then store the results in the destination buffer */ - in_q7x4 = arm_nn_read_q7x4_ia(&src); - - /* Extract and sign extend each of the four q7 values to q15 */ - out_q15x2_1 = __SXTAB16(offset_q15x2, __ROR((uint32_t)in_q7x4, 8)); - out_q15x2_2 = __SXTAB16(offset_q15x2, in_q7x4); - - write_q15x2_ia(&dst, out_q15x2_2); - write_q15x2_ia(&dst, out_q15x2_1); - - block_cnt--; - } - /* Handle left over samples */ - block_cnt = block_size % 0x4u; - - while (block_cnt > 0u) - { - *dst++ = (q15_t)*src++ + offset; - - /* Decrement the loop counter */ - block_cnt--; - } -#else - (void)src; - (void)dst; - (void)block_size; - (void)offset; - /* Not available */ -#endif -} - -/** - * @} end of nndata_convert group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c deleted file mode 100644 index 69c70e381..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in_q7x4 compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in_q7x4 writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_q7_to_q15_with_offset.c - * Description: Converts the elements of the Q7 vector to Q15 vector with an added offset - * - * $Date: March 3, 2020 - * $Revision: V.2.0.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup nndata_convert - * @{ - */ - -void arm_q7_to_q15_with_offset(const q7_t *src, - q15_t *dst, - uint32_t block_size, - q15_t offset) -{ - int block_cnt; - -#if defined(ARM_MATH_MVEI) - - int16x8_t source; - const int16x8_t source_offset = vdupq_n_s16(offset); - block_cnt = block_size / 8; - - while (block_cnt > 0) - { - source = vldrbq_s16(src); - source = vaddq_s16(source, source_offset); - vstrhq_s16(dst, source); - dst += 8; - src += 8; - block_cnt--; - } - - block_cnt = block_size & 0x7; - -#elif defined(ARM_MATH_DSP) - /* Run the below code for cores that support SIMD instructions */ - q31_t in_q7x4; - q31_t in_q15x2_1; - q31_t in_q15x2_2; - q31_t out_q15x2_1; - q31_t out_q15x2_2; - - /*loop unrolling */ - block_cnt = block_size >> 2; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */ - const q31_t offset_q15x2 = __PKHBT(offset, offset, 16); - while (block_cnt > 0) - { - /* convert from q7 to q15 and then store the results in the destination buffer */ - in_q7x4 = arm_nn_read_q7x4_ia(&src); - - /* Extract and sign extend each of the four q7 values to q15 */ - in_q15x2_1 = __SXTAB16(offset_q15x2, __ROR(in_q7x4, 8)); - in_q15x2_2 = __SXTAB16(offset_q15x2, in_q7x4); - - out_q15x2_2 = __PKHTB(in_q15x2_1, in_q15x2_2, 16); - out_q15x2_1 = __PKHBT(in_q15x2_2, in_q15x2_1, 16); - - write_q15x2_ia(&dst, out_q15x2_1); - write_q15x2_ia(&dst, out_q15x2_2); - - block_cnt--; - } - /* Handle left over samples */ - block_cnt = block_size % 0x4; - -#else - /* Run the below code for Cortex-M0 */ - /* Loop over block_size number of values */ - block_cnt = block_size; -#endif - - while (block_cnt > 0) - { - *dst++ = (q15_t)*src++ + offset; - - /* Decrement the loop counter */ - block_cnt--; - } -} - -/** - * @} end of nndata_convert group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c deleted file mode 100644 index 048d32bc5..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c +++ /dev/null @@ -1,363 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_avgpool_s8.c - * Description: Pooling function implementations - * - * $Date: September 3,2020 - * $Revision: V.2.0.2 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - -static void scale_q31_to_q7_and_clamp(const q31_t *buffer, - q7_t *target, - int32_t length, - const int32_t count, - const int act_min, - const int act_max) -{ - const int half_count = count / 2; - for (int i = 0; i < length; i++) - { - int32_t sum = buffer[i] > 0 ? (buffer[i] + half_count) : (buffer[i] - half_count); - sum = sum / count; - sum = MAX(sum, act_min); - sum = MIN(sum, act_max); - - target[i] = (q7_t)sum; - } -} -#endif - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Pooling - * @{ - */ - -/* - * s8 average pooling function - * - * Refer to header file for details. - * - */ - -#if defined(ARM_MATH_MVEI) - -arm_status arm_avgpool_s8(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q7_t *src, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q7_t *dst) -{ - (void)ctx; - const int32_t input_y = input_dims->h; - const int32_t input_x = input_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_x = output_dims->w; - const int32_t stride_y = pool_params->stride.h; - const int32_t stride_x = pool_params->stride.w; - const int32_t kernel_y = filter_dims->h; - const int32_t kernel_x = filter_dims->w; - const int32_t pad_y = pool_params->padding.h; - const int32_t pad_x = pool_params->padding.w; - const int32_t act_min = pool_params->activation.min; - const int32_t act_max = pool_params->activation.max; - const int32_t ch_src = input_dims->c; - - int32_t i_x, i_y; - int32_t k_x, k_y; - - for (i_y = 0; i_y < output_y; i_y++) - { - for (i_x = 0; i_x < output_x; i_x++) - { - - int32_t k_y_start, k_y_end; - int32_t k_x_start, k_x_end; - int32_t chCnt; - const int8_t *pTmp, *pTmpInner; - int8_t *pDst; - - k_y_start = MAX(0, i_y * stride_y - pad_y); - k_y_end = MIN(i_y * stride_y - pad_y + kernel_y, input_y); - - k_x_start = MAX(0, i_x * stride_x - pad_x); - k_x_end = MIN(i_x * stride_x - pad_x + kernel_x, input_x); - - pTmp = src; - pDst = &dst[ch_src * (i_x + i_y * output_x)]; - - chCnt = ch_src >> 4; - while (chCnt > 0) - { - int32x4_t sumV1, sumV2, sumV3, sumV4; - - int8x16_t tempV; - int16x8_t tempVLO, tempVHI; - int32x4_t tempVLOLO, tempVLOHI, tempVHILO, tempVHIHI; - int32_t count = 0; - - sumV1 = vdupq_n_s32(0); - sumV2 = vdupq_n_s32(0); - sumV3 = vdupq_n_s32(0); - sumV4 = vdupq_n_s32(0); - - for (k_y = k_y_start; k_y < k_y_end; k_y++) - { - for (k_x = k_x_start; k_x < k_x_end; k_x++) - { - pTmpInner = pTmp + (ch_src * (k_x + k_y * input_x)); - tempV = vldrbq_s8(pTmpInner); - - tempVLO = vmovlbq_s8(tempV); - tempVHI = vmovltq_s8(tempV); - - tempVLOLO = vmovlbq_s16(tempVLO); - tempVLOHI = vmovltq_s16(tempVLO); - - tempVHILO = vmovlbq_s16(tempVHI); - tempVHIHI = vmovltq_s16(tempVHI); - - sumV1 = vaddq_s32(sumV1, tempVLOLO); - sumV2 = vaddq_s32(sumV2, tempVLOHI); - sumV3 = vaddq_s32(sumV3, tempVHILO); - sumV4 = vaddq_s32(sumV4, tempVHIHI); - - count++; - } - } - - sumV1[0] = sumV1[0] > 0 ? (sumV1[0] + count / 2) / count : (sumV1[0] - count / 2) / count; - sumV1[1] = sumV1[1] > 0 ? (sumV1[1] + count / 2) / count : (sumV1[1] - count / 2) / count; - sumV1[2] = sumV1[2] > 0 ? (sumV1[2] + count / 2) / count : (sumV1[2] - count / 2) / count; - sumV1[3] = sumV1[3] > 0 ? (sumV1[3] + count / 2) / count : (sumV1[3] - count / 2) / count; - - sumV2[0] = sumV2[0] > 0 ? (sumV2[0] + count / 2) / count : (sumV2[0] - count / 2) / count; - sumV2[1] = sumV2[1] > 0 ? (sumV2[1] + count / 2) / count : (sumV2[1] - count / 2) / count; - sumV2[2] = sumV2[2] > 0 ? (sumV2[2] + count / 2) / count : (sumV2[2] - count / 2) / count; - sumV2[3] = sumV2[3] > 0 ? (sumV2[3] + count / 2) / count : (sumV2[3] - count / 2) / count; - - sumV3[0] = sumV3[0] > 0 ? (sumV3[0] + count / 2) / count : (sumV3[0] - count / 2) / count; - sumV3[1] = sumV3[1] > 0 ? (sumV3[1] + count / 2) / count : (sumV3[1] - count / 2) / count; - sumV3[2] = sumV3[2] > 0 ? (sumV3[2] + count / 2) / count : (sumV3[2] - count / 2) / count; - sumV3[3] = sumV3[3] > 0 ? (sumV3[3] + count / 2) / count : (sumV3[3] - count / 2) / count; - - sumV4[0] = sumV4[0] > 0 ? (sumV4[0] + count / 2) / count : (sumV4[0] - count / 2) / count; - sumV4[1] = sumV4[1] > 0 ? (sumV4[1] + count / 2) / count : (sumV4[1] - count / 2) / count; - sumV4[2] = sumV4[2] > 0 ? (sumV4[2] + count / 2) / count : (sumV4[2] - count / 2) / count; - sumV4[3] = sumV4[3] > 0 ? (sumV4[3] + count / 2) / count : (sumV4[3] - count / 2) / count; - - sumV1 = vmaxq_s32(sumV1, vdupq_n_s32(act_min)); - sumV1 = vminq_s32(sumV1, vdupq_n_s32(act_max)); - - sumV2 = vmaxq_s32(sumV2, vdupq_n_s32(act_min)); - sumV2 = vminq_s32(sumV2, vdupq_n_s32(act_max)); - - sumV3 = vmaxq_s32(sumV3, vdupq_n_s32(act_min)); - sumV3 = vminq_s32(sumV3, vdupq_n_s32(act_max)); - - sumV4 = vmaxq_s32(sumV4, vdupq_n_s32(act_min)); - sumV4 = vminq_s32(sumV4, vdupq_n_s32(act_max)); - - tempVLO = vmovnbq_s32(tempVLO, sumV1); - tempVLO = vmovntq_s32(tempVLO, sumV2); - - tempVHI = vmovnbq_s32(tempVHI, sumV3); - tempVHI = vmovntq_s32(tempVHI, sumV4); - - tempV = vmovnbq_s16(tempV, tempVLO); - tempV = vmovntq_s16(tempV, tempVHI); - - vstrbq_s8(pDst, tempV); - pDst += 16; - - chCnt--; - pTmp += 16; - } - - chCnt = ch_src & 0xF; - while (chCnt > 0) - { - int32_t sum = 0; - int32_t count = 0; - - for (k_y = k_y_start; k_y < k_y_end; k_y++) - { - for (k_x = k_x_start; k_x < k_x_end; k_x++) - { - sum += pTmp[ch_src * (k_x + k_y * input_x)]; - count++; - } - } - sum = sum > 0 ? (sum + count / 2) / count : (sum - count / 2) / count; - sum = MAX(sum, act_min); - sum = MIN(sum, act_max); - - *pDst++ = sum; - - chCnt--; - pTmp++; - } - } - } - return ARM_MATH_SUCCESS; -} - -#else -arm_status arm_avgpool_s8(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q7_t *src, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q7_t *dst) -{ - const int32_t input_y = input_dims->h; - const int32_t input_x = input_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_x = output_dims->w; - const int32_t stride_y = pool_params->stride.h; - const int32_t stride_x = pool_params->stride.w; - const int32_t kernel_y = filter_dims->h; - const int32_t kernel_x = filter_dims->w; - const int32_t pad_y = pool_params->padding.h; - const int32_t pad_x = pool_params->padding.w; - const int32_t act_min = pool_params->activation.min; - const int32_t act_max = pool_params->activation.max; - const int32_t ch_src = input_dims->c; - q31_t *buffer = (q31_t *)ctx->buf; - -#if defined(ARM_MATH_DSP) - - /* Run the following code for CPU's with DSP extension - */ - for (int i_y = 0, idx_y = -pad_y; i_y < output_y; idx_y += stride_y, i_y++) - { - for (int i_x = 0, idx_x = -pad_x; i_x < output_x; idx_x += stride_x, i_x++) - { - /* Condition for kernel start dimension: - (base_idx_ + kernel__start) >= 0 */ - const int32_t kernel_y_start = MAX(0, -idx_y); - const int32_t kernel_x_start = MAX(0, -idx_x); - - /* Condition for kernel end dimension: - (base_idx_ + kernel__end) < dim_src_ */ - const int32_t kernel_y_end = MIN(kernel_y, input_y - idx_y); - const int32_t kernel_x_end = MIN(kernel_x, input_x - idx_x); - - int count = 0; - - for (int k_y = kernel_y_start; k_y < kernel_y_end; k_y++) - { - for (int k_x = kernel_x_start; k_x < kernel_x_end; k_x++) - { - const q7_t *start = src + ch_src * (k_x + idx_x + (k_y + idx_y) * input_x); - - if (count == 0) - { - for (int i = 0; i < ch_src; i++) - { - buffer[i] = start[i]; - } - } - else - { - for (int i = 0; i < ch_src; i++) - { - buffer[i] = __QADD(start[i], buffer[i]); - } - } - count++; - } - } - scale_q31_to_q7_and_clamp(buffer, dst, ch_src, count, act_min, act_max); - dst += ch_src; - } - } -#else - - /* Reference C code adapted from CMSIS-NN arm_avepool_q7_HWC. - */ - (void)buffer; - int16_t i_ch_in, i_x, i_y; - int16_t k_x, k_y; - - for (i_y = 0; i_y < output_y; i_y++) - { - for (i_x = 0; i_x < output_x; i_x++) - { - for (i_ch_in = 0; i_ch_in < ch_src; i_ch_in++) - { - int sum = 0; - int count = 0; - for (k_y = i_y * stride_y - pad_y; k_y < i_y * stride_y - pad_y + kernel_y; k_y++) - { - for (k_x = i_x * stride_x - pad_x; k_x < i_x * stride_x - pad_x + kernel_x; k_x++) - { - if (k_y >= 0 && k_x >= 0 && k_y < input_y && k_x < input_x) - { - sum += src[i_ch_in + ch_src * (k_x + k_y * input_x)]; - count++; - } - } - } - sum = sum > 0 ? (sum + count / 2) / count : (sum - count / 2) / count; - sum = MAX(sum, act_min); - sum = MIN(sum, act_max); - - dst[i_ch_in + ch_src * (i_x + i_y * output_x)] = sum; - } - } - } - -#endif - return ARM_MATH_SUCCESS; -} - -#endif /* ARM_MATH_MVEI */ - -int32_t arm_avgpool_s8_get_buffer_size(const int output_x, - const int ch_src) -{ - (void)output_x; - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - return (ch_src * sizeof(int32_t)); -#else - (void)ch_src; - return 0; -#endif -} -/** - * @} end of Pooling group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c deleted file mode 100644 index 33a44362f..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_max_pool_s8.c - * Description: Pooling function implementations - * - * $Date: June 11, 2020 - * $Revision: V.2.0.0 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -static void compare_and_replace_if_larger_q7(q7_t *base, - const q7_t *target, - int32_t length) -{ -#if defined(ARM_MATH_MVEI) - int32_t loop_count = (length + 15) / 16; - for (int i = 0; i < loop_count; i++) - { - mve_pred16_t p = vctp16q((uint32_t)length); - const int8x16_t op_1 = vldrbq_z_s8(base, p); - const int8x16_t op_2 = vldrbq_z_s8(target, p); - const int8x16_t max = vmaxq_m_s8(vuninitializedq_s8(), op_1, op_2, p); - vstrbq_p_s8(base, max, p); - base += 16; - target += 16; - length -= 16; - } -#else - q7_t *dst = base; - const q7_t *src = target; - union arm_nnword ref_max; - union arm_nnword comp_max; - int32_t cnt = length >> 2; - - while (cnt > 0l) - { - ref_max.word = arm_nn_read_q7x4(dst); - comp_max.word = arm_nn_read_q7x4_ia(&src); - - if (comp_max.bytes[0] > ref_max.bytes[0]) - { - ref_max.bytes[0] = comp_max.bytes[0]; - } - if (comp_max.bytes[1] > ref_max.bytes[1]) - { - ref_max.bytes[1] = comp_max.bytes[1]; - } - if (comp_max.bytes[2] > ref_max.bytes[2]) - { - ref_max.bytes[2] = comp_max.bytes[2]; - } - if (comp_max.bytes[3] > ref_max.bytes[3]) - { - ref_max.bytes[3] = comp_max.bytes[3]; - } - - write_q7x4_ia(&dst, ref_max.word); - - cnt--; - } - - cnt = length & 0x3; - while (cnt > 0l) - { - if (*src > *dst) - { - *dst = *src; - } - dst++; - src++; - cnt--; - } -#endif -} - -static void -clamp_output(q7_t *source, int32_t length, const int32_t act_min, const int32_t act_max) -{ -#if defined(ARM_MATH_MVEI) - int32_t - loop_count = (length + 15) / 16; - for (int i = 0; i < loop_count; i++) - { - mve_pred16_t p = vctp16q((uint32_t)length); - length -= 16; - const int8x16_t src = vldrbq_z_s8(source, p); - const int8x16_t predicated_min = vdupq_m_n_s8(vuninitializedq_s8(), (int8_t)act_min, p); - const int8x16_t predicated_max = vdupq_m_n_s8(vuninitializedq_s8(), (int8_t)act_max, p); - int8x16_t - res = vmaxq_m_s8(vuninitializedq_s8(), src, predicated_min, p); - res = vminq_m_s8(vuninitializedq_s8(), src, predicated_max, p); - vstrbq_p_s8(source, res, p); - source += 16; - } -#else - union arm_nnword in; - int32_t cnt = length >> 2; - - while (cnt > 0l) - { - in.word = arm_nn_read_q7x4(source); - - in.bytes[0] = MAX(in.bytes[0], act_min); - in.bytes[0] = MIN(in.bytes[0], act_max); - in.bytes[1] = MAX(in.bytes[1], act_min); - in.bytes[1] = MIN(in.bytes[1], act_max); - in.bytes[2] = MAX(in.bytes[2], act_min); - in.bytes[2] = MIN(in.bytes[2], act_max); - in.bytes[3] = MAX(in.bytes[3], act_min); - in.bytes[3] = MIN(in.bytes[3], act_max); - - write_q7x4_ia(&source, in.word); - cnt--; - } - - cnt = length & 0x3; - while (cnt > 0l) - { - int32_t comp = *source; - comp = MAX(comp, act_min); - comp = MIN(comp, act_max); - *source++ = (int8_t)comp; - cnt--; - } -#endif -} - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Pooling - * @{ - */ - -/* - * Optimized s8 max pooling function - * - * Refer to header file for details. - * - */ - -arm_status -arm_max_pool_s8(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q7_t *src, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q7_t *dst) -{ - const int32_t input_y = input_dims->h; - const int32_t input_x = input_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_x = output_dims->w; - const int32_t stride_y = pool_params->stride.h; - const int32_t stride_x = pool_params->stride.w; - const int32_t kernel_y = filter_dims->h; - const int32_t kernel_x = filter_dims->w; - const int32_t pad_y = pool_params->padding.h; - const int32_t pad_x = pool_params->padding.w; - const int32_t act_min = pool_params->activation.min; - const int32_t act_max = pool_params->activation.max; - const int32_t channel_in = input_dims->c; - (void)ctx; - q7_t *dst_base = dst; - - for (int i_y = 0, base_idx_y = -pad_y; i_y < output_y; base_idx_y += stride_y, i_y++) - { - for (int i_x = 0, base_idx_x = -pad_x; i_x < output_x; base_idx_x += stride_x, i_x++) - { - /* Condition for kernel start dimension: (base_idx_ + kernel__start) >= 0 */ - const int32_t ker_y_start = MAX(0, -base_idx_y); - const int32_t ker_x_start = MAX(0, -base_idx_x); - - /* Condition for kernel end dimension: (base_idx_ + kernel__end) < dim_src_ */ - const int32_t kernel_y_end = MIN(kernel_y, input_y - base_idx_y); - const int32_t kernel_x_end = MIN(kernel_x, input_x - base_idx_x); - - int count = 0; - - for (int k_y = ker_y_start; k_y < kernel_y_end; k_y++) - { - for (int k_x = ker_x_start; k_x < kernel_x_end; k_x++) - { - const q7_t *start = src + channel_in * (k_x + base_idx_x + (k_y + base_idx_y) * input_x); - - if (count == 0) - { - memcpy(dst, start, channel_in); - count++; - } - else - { - compare_and_replace_if_larger_q7(dst, start, channel_in); - } - } - } - /* 'count' is expected to be non-zero here. */ - dst += channel_in; - } - } - - clamp_output(dst_base, output_x * output_y * channel_in, act_min, act_max); - - return ARM_MATH_SUCCESS; -} - -/** - * @} end of Pooling group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c deleted file mode 100644 index 14e0ef7a2..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c +++ /dev/null @@ -1,454 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_pool_q7_HWC.c - * Description: Pooling function implementations - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -#if defined (ARM_MATH_DSP) - -/** - * @brief A few utility functions used by pooling functions - * - * - */ - -static void buffer_scale_back_q15_to_q7(q15_t * buffer, q7_t * target, uint16_t length, uint16_t scale) -{ - int i; - - for (i = 0; i < length; i++) - { - target[i] = (q7_t) (buffer[i] / scale); - } -} - -static void compare_and_replace_if_larger_q7(q7_t * base, // base data - const q7_t * target, // compare target - const uint16_t length // data length - ) -{ - q7_t *pIn = base; - const q7_t *pCom = target; - union arm_nnword in; - union arm_nnword com; - uint16_t cnt = length >> 2; - - while (cnt > 0u) - { - in.word = arm_nn_read_q7x4((const q7_t*)pIn); - com.word = arm_nn_read_q7x4_ia((const q7_t**)&pCom); - - // if version - if (com.bytes[0] > in.bytes[0]) - in.bytes[0] = com.bytes[0]; - if (com.bytes[1] > in.bytes[1]) - in.bytes[1] = com.bytes[1]; - if (com.bytes[2] > in.bytes[2]) - in.bytes[2] = com.bytes[2]; - if (com.bytes[3] > in.bytes[3]) - in.bytes[3] = com.bytes[3]; - - *__SIMD32(pIn)++ = in.word; - - cnt--; - } - - cnt = length & 0x3; - while (cnt > 0u) - { - if (*pCom > *pIn) - { - *pIn = *pCom; - } - pIn++; - pCom++; - cnt--; - } -} - -static void accumulate_q7_to_q15(q15_t * base, q7_t * target, const uint16_t length) -{ - q15_t *pCnt = base; - q7_t *pV = target; - q31_t v1, v2, vo1, vo2; - uint16_t cnt = length >> 2; - q31_t in; - - while (cnt > 0u) - { - q31_t value = arm_nn_read_q7x4_ia((const q7_t**)&pV); - v1 = __SXTB16(__ROR(value, 8)); - v2 = __SXTB16(value); -#ifndef ARM_MATH_BIG_ENDIAN - - vo2 = __PKHTB(v1, v2, 16); - vo1 = __PKHBT(v2, v1, 16); - -#else - - vo1 = __PKHTB(v1, v2, 16); - vo2 = __PKHBT(v2, v1, 16); - -#endif - - in = arm_nn_read_q15x2(pCnt); - *__SIMD32(pCnt)++ = __QADD16(vo1, in); - - in = arm_nn_read_q15x2(pCnt); - *__SIMD32(pCnt)++ = __QADD16(vo2, in); - - cnt--; - } - cnt = length & 0x3; - while (cnt > 0u) - { - *pCnt++ += *pV++; - cnt--; - } -} - -#endif // ARM_MATH_DSP - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Pooling - * @{ - */ - - /** - * @brief Q7 max pooling function - * @param[in, out] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA Not used - * @param[in,out] Im_out pointer to output tensor - * - * @details - * - * The pooling function is implemented as split x-pooling then - * y-pooling. - * - * This pooling function is input-destructive. Input data is undefined - * after calling this function. - * - */ - -void -arm_maxpool_q7_HWC(q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, const uint16_t dim_im_out, q7_t * bufferA, q7_t * Im_out) -{ - (void)bufferA; -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_x, i_y; - - /* first does the pooling along x axis */ - for (i_y = 0; i_y < dim_im_in; i_y++) - { - - for (i_x = 0; i_x < dim_im_out; i_x++) - { - /* for each output pixel */ - q7_t *target = Im_in + (i_y * dim_im_in + i_x) * ch_im_in; - q7_t *win_start; - q7_t *win_stop; - if (i_x * stride - padding < 0) - { - win_start = target; - } else - { - win_start = Im_in + (i_y * dim_im_in + i_x * stride - padding) * ch_im_in; - } - - if (i_x * stride - padding + dim_kernel >= dim_im_in) - { - win_stop = Im_in + (i_y * dim_im_in + dim_im_in) * ch_im_in; - } else - { - win_stop = Im_in + (i_y * dim_im_in + i_x * stride - padding + dim_kernel) * ch_im_in; - } - - /* first step is to copy over initial data */ - /* arm_copy_q7(win_start, target, ch_im_in); */ - memmove(target, win_start, ch_im_in); - - /* start the max operation from the second part */ - win_start += ch_im_in; - for (; win_start < win_stop; win_start += ch_im_in) - { - compare_and_replace_if_larger_q7(target, win_start, ch_im_in); - } - } - } - - /* then does the pooling along y axis */ - for (i_y = 0; i_y < dim_im_out; i_y++) - { - - /* for each output row */ - q7_t *target = Im_out + i_y * dim_im_out * ch_im_in; - q7_t *row_start; - q7_t *row_end; - /* setting the starting row */ - if (i_y * stride - padding < 0) - { - row_start = Im_in; - } else - { - row_start = Im_in + (i_y * stride - padding) * dim_im_in * ch_im_in; - } - /* setting the stopping row */ - if (i_y * stride - padding + dim_kernel >= dim_im_in) - { - row_end = Im_in + dim_im_in * dim_im_in * ch_im_in; - } else - { - row_end = Im_in + (i_y * stride - padding + dim_kernel) * dim_im_in * ch_im_in; - } - - /* copy over the first row */ - /* arm_copy_q7(row_start, target, dim_im_out * ch_im_in); */ - memmove(target, row_start, dim_im_out * ch_im_in); - - /* move over to next row */ - row_start += ch_im_in * dim_im_in; - - for (; row_start < row_end; row_start += dim_im_in * ch_im_in) - { - compare_and_replace_if_larger_q7(target, row_start, dim_im_out * ch_im_in); - } - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int16_t i_ch_in, i_x, i_y; - int16_t k_x, k_y; - - for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++) - { - for (i_y = 0; i_y < dim_im_out; i_y++) - { - for (i_x = 0; i_x < dim_im_out; i_x++) - { - int max = -129; - for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++) - { - for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++) - { - if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in) - { - if (Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)] > max) - { - max = Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)]; - } - } - } - } - Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = max; - } - } - } - -#endif /* ARM_MATH_DSP */ - -} - - /** - * @brief Q7 average pooling function - * @param[in,out] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] Im_out pointer to output tensor - * - * @details - * - * Buffer size: - * - * bufferA size: 2*dim_im_out*ch_im_in - * - * The pooling function is implemented as split x-pooling then - * y-pooling. - * - * This pooling function is input-destructive. Input data is undefined - * after calling this function. - * - */ - -void -arm_avepool_q7_HWC(q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, const uint16_t dim_im_out, q7_t * bufferA, q7_t * Im_out) -{ - -#if defined (ARM_MATH_DSP) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - q15_t *buffer = (q15_t *) bufferA; - int16_t i_x, i_y; - int16_t count = 0; - - /* first does the pooling along x axis */ - for (i_y = 0; i_y < dim_im_in; i_y++) - { - - for (i_x = 0; i_x < dim_im_out; i_x++) - { - /* for each output pixel */ - q7_t *target = Im_in + (i_y * dim_im_in + i_x) * ch_im_in; - q7_t *win_start; - q7_t *win_stop; - if (i_x * stride - padding < 0) - { - win_start = target; - } else - { - win_start = Im_in + (i_y * dim_im_in + i_x * stride - padding) * ch_im_in; - } - - if (i_x * stride - padding + dim_kernel >= dim_im_in) - { - win_stop = Im_in + (i_y * dim_im_in + dim_im_in) * ch_im_in; - } else - { - win_stop = Im_in + (i_y * dim_im_in + i_x * stride - padding + dim_kernel) * ch_im_in; - } - - /* first step is to copy over initial data */ - arm_q7_to_q15_no_shift(win_start, buffer, ch_im_in); - count = 1; - - /* start the max operation from the second part */ - win_start += ch_im_in; - for (; win_start < win_stop; win_start += ch_im_in) - { - accumulate_q7_to_q15(buffer, win_start, ch_im_in); - count++; - } - buffer_scale_back_q15_to_q7(buffer, target, ch_im_in, count); - } - } - - /* then does the pooling along y axis */ - for (i_y = 0; i_y < dim_im_out; i_y++) - { - /* for each output row */ - q7_t *target = Im_out + i_y * dim_im_out * ch_im_in; - q7_t *row_start; - q7_t *row_end; - /* setting the starting row */ - if (i_y * stride - padding < 0) - { - row_start = Im_in; - } else - { - row_start = Im_in + (i_y * stride - padding) * dim_im_in * ch_im_in; - } - /* setting the stopping row */ - if (i_y * stride - padding + dim_kernel >= dim_im_in) - { - row_end = Im_in + dim_im_in * dim_im_in * ch_im_in; - } else - { - row_end = Im_in + (i_y * stride - padding + dim_kernel) * dim_im_in * ch_im_in; - } - - /* copy over the first row */ - arm_q7_to_q15_no_shift(row_start, buffer, dim_im_out * ch_im_in); - count = 1; - - /* move over to next row */ - row_start += ch_im_in * dim_im_in; - - for (; row_start < row_end; row_start += dim_im_in * ch_im_in) - { - accumulate_q7_to_q15(buffer, row_start, dim_im_out * ch_im_in); - count++; - } - buffer_scale_back_q15_to_q7(buffer, target, dim_im_out * ch_im_in, count); - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - - (void)bufferA; - int16_t i_ch_in, i_x, i_y; - int16_t k_x, k_y; - - for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++) - { - for (i_y = 0; i_y < dim_im_out; i_y++) - { - for (i_x = 0; i_x < dim_im_out; i_x++) - { - int sum = 0; - int count = 0; - for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++) - { - for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++) - { - if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in) - { - sum += Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)]; - count++; - } - } - } - Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = sum / count; - } - } - } - -#endif /* ARM_MATH_DSP */ - -} - -/** - * @} end of Pooling group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c deleted file mode 100644 index 066e0fffd..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_reshape_s8.c - * Description: Reshape a s8 vector - * - * $Date: September 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Reshape - * @{ - */ - -/** - * Basic s8 reshape function. - * - * Refer header file for details. - * - */ - -void arm_reshape_s8(const int8_t *input, - int8_t *output, - const uint32_t total_size) -{ - memcpy(output, input, total_size); -} - -/** - * @} end of Reshape group - */ \ No newline at end of file diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c deleted file mode 100644 index a03ed9b5e..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_svdf_s8.c - * Description: S8 basic SVDF layer function - * - * $Date: 17. August 2020 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M processors - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nn_types.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup SVDF - * @{ - */ - -/* - * S8 SVDF layer function for TensorFlow Lite - * - * Refer to header file for details. - * - */ - -arm_status -arm_svdf_s8(const cmsis_nn_context *input_ctx, - const cmsis_nn_context *output_ctx, - const cmsis_nn_svdf_params *svdf_params, - const cmsis_nn_per_tensor_quant_params *input_quant_params, - const cmsis_nn_per_tensor_quant_params *output_quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *state_dims, - q15_t *state_data, - const cmsis_nn_dims *weights_feature_dims, - const q7_t *weights_feature_data, - const cmsis_nn_dims *weights_time_dims, - const q15_t *weights_time_data, - const cmsis_nn_dims *bias_dims, - const q31_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data) -{ - (void)bias_dims; - (void)state_dims; - (void)output_dims; - - const q31_t multiplier_in = input_quant_params->multiplier; - const q31_t shift_in = input_quant_params->shift; - const q31_t multiplier_out = output_quant_params->multiplier; - const q31_t shift_2 = output_quant_params->shift; - const int32_t zp_in = svdf_params->input_offset; - const int32_t zp_out = svdf_params->output_offset; - const int32_t in_activation_min = svdf_params->input_activation.min; - const int32_t in_activation_max = svdf_params->input_activation.max; - const int32_t out_activation_min = svdf_params->output_activation.min; - const int32_t out_activation_max = svdf_params->output_activation.max; - const int16_t rank = svdf_params->rank; - - int32_t zp_32 = (-zp_in & 0xffff) | - ((-zp_in & 0xffff) << 16); - - const int32_t input_batches = input_dims->n; - const int32_t input_height = input_dims->h; - const int32_t feature_batches = weights_feature_dims->n; - const int32_t time_batches = weights_time_dims->h; - const int32_t unit_count = feature_batches / rank; - - q31_t *buffer_a = (q31_t *)input_ctx->buf; - q31_t *buffer_b = (q31_t *)output_ctx->buf; - - memmove((q15_t *)state_data, (q15_t *)state_data + 1, - (size_t)(input_batches * feature_batches * time_batches * (int32_t)sizeof(int16_t))); - - q15_t *res_ptr = state_data + (time_batches - 1); - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - const q7_t *buffer_1 = weights_feature_data; - for (int r = 0; r < feature_batches; r++) - { - q31_t dot_prod = 0; - - const q7_t *buffer_2 = input_data + i_batch * input_height; - -#if defined(ARM_MATH_DSP) - int c = 0; - int32_t block_count = input_height >> 2; - for (int i = 0; i < block_count; i++) - { - c += 4; - - q31_t r1 = arm_nn_read_q7x4_ia(&buffer_1); - q31_t r1_a = __SXTB16(r1); - q31_t r1_b = __SXTB16(__ROR((uint32_t)r1, 8)); - - q31_t r2 = arm_nn_read_q7x4_ia(&buffer_2); - q31_t r2_a = __SXTAB16(zp_32, r2); - q31_t r2_b = __SXTAB16(zp_32, __ROR((uint32_t)r2, 8)); - - dot_prod = __SMLAD(r1_a, r2_a, dot_prod); - dot_prod = __SMLAD(r1_b, r2_b, dot_prod); - } - - for (; c < input_height; c++) - { - dot_prod += *buffer_1 * (*buffer_2 - zp_in); - buffer_1++; - buffer_2++; - } -#else - for (int c = 0; c < input_height; c++) - { - dot_prod += *buffer_1 * (*buffer_2 - zp_in); - buffer_1++; - buffer_2++; - } -#endif - - dot_prod = arm_nn_requantize(dot_prod, - multiplier_in, - shift_in); - dot_prod = CLAMP(dot_prod, in_activation_max, in_activation_min); - *res_ptr = dot_prod; - res_ptr += time_batches; - } - } - - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - q31_t *ptr_a = buffer_a + i_batch * feature_batches; - - const q15_t *v1 = weights_time_data; - const q15_t *v2 = state_data + i_batch * time_batches * feature_batches; - for (int i_feature_batch = 0; i_feature_batch < feature_batches; i_feature_batch++) - { - *ptr_a = 0; - - int32_t sum = 0; -#if defined(ARM_MATH_DSP) - int j = 0; - int32_t block_count = time_batches >> 1; - for (int i = 0; i < block_count; i++) - { - j += 2; - q31_t r1 = arm_nn_read_q15x2_ia(&v1); - q31_t r2 = arm_nn_read_q15x2_ia(&v2); - - sum = __SMLAD(r1, r2, sum); - } - - // Process the remaining data - for (; j < time_batches; j++) - { - sum += *v1 * *v2; - v1++; - v2++; - } -#else - for (int j = 0; j < time_batches; j++) - { - sum += *v1 * *v2; - v1++; - v2++; - } -#endif - - *ptr_a = sum; - ptr_a++; - } - } - - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - q31_t *output_data_temp = buffer_b + i_batch * unit_count; - q31_t *ptr_a = buffer_a + i_batch * feature_batches; - - for (int i = 0; i < unit_count; i++) - { - output_data_temp[i] = bias_data[i]; - for (int j = 0; j < rank; j++) - { - output_data_temp[i] += *ptr_a; - ptr_a++; - } - } - } - - for (int i = 0; i < input_batches * unit_count; i++) - { - output_data[i] = (q7_t)CLAMP(arm_nn_requantize(buffer_b[i], multiplier_out, shift_2) + zp_out, - out_activation_max, out_activation_min); - } - - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of SVDF group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c deleted file mode 100644 index e852eec50..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_q15.c - * Description: Q15 softmax function - * - * $Date: 20. February 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ - - /** - * @brief Q15 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimention - * @param[out] p_out pointer to output vector - * - * @details - * - * Here, instead of typical e based softmax, we use - * 2-based softmax, i.e.,: - * - * y_i = 2^(x_i) / sum(2^x_j) - * - * The relative output will be different here. - * But mathematically, the gradient will be the same - * with a log(2) scaling factor. - * - */ - -void arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out) -{ - q31_t sum; - int16_t i; - uint8_t shift; - q31_t base; - base = -1 * 0x100000; - for (i = 0; i < dim_vec; i++) - { - if (vec_in[i] > base) - { - base = vec_in[i]; - } - } - - /* we ignore really small values - * anyway, they will be 0 after shrinking - * to q15_t - */ - base = base - 16; - - sum = 0; - - for (i = 0; i < dim_vec; i++) - { - if (vec_in[i] > base) - { - shift = (uint8_t)__USAT(vec_in[i] - base, 5); - sum += 0x1 << shift; - } - } - - /* This is effectively (0x1 << 32) / sum */ - int64_t div_base = 0x100000000LL; - int output_base = (int32_t)(div_base / sum); - - /* Final confidence will be output_base >> ( 17 - (vec_in[i] - base) ) - * so 32768 (0x1<<15) -> 100% confidence when sum = 0x1 << 16, output_base = 0x1 << 16 - * and vec_in[i]-base = 16 - */ - for (i = 0; i < dim_vec; i++) - { - if (vec_in[i] > base) - { - /* Here minimum value of 17+base-vec[i] will be 1 */ - shift = (uint8_t)__USAT(17+base-vec_in[i], 5); - p_out[i] = (q15_t) __SSAT((output_base >> shift), 16); - } else - { - p_out[i] = 0; - } - } - -} - -/** - * @} end of Softmax group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c deleted file mode 100644 index a0ef85e01..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_q7.c - * Description: Q7 softmax function - * - * $Date: June 8, 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ - - /** - * @brief Q7 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimention - * @param[out] p_out pointer to output vector - * - * @details - * - * Here, instead of typical natural logarithm e based softmax, we use - * 2-based softmax here, i.e.,: - * - * y_i = 2^(x_i) / sum(2^x_j) - * - * The relative output will be different here. - * But mathematically, the gradient will be the same - * with a log(2) scaling factor. - * - */ - -void arm_softmax_q7(const q7_t * vec_in, const uint16_t dim_vec, q7_t * p_out ) -{ - q31_t sum; - int16_t i; - uint8_t shift; - q15_t base; - base = -128; - - /* We first search for the maximum */ - for (i = 0; i < dim_vec; i++) - { - if (vec_in[i] > base) - { - base = vec_in[i]; - } - } - - /* - * So the base is set to max-8, meaning - * that we ignore really small values. - * anyway, they will be 0 after shrinking to q7_t. - */ - base = base - (1 << 3); - - sum = 0; - - for (i = 0; i < dim_vec; i++) - { - shift = (uint8_t)__USAT(vec_in[i] - base, 3); - sum += 0x1 << shift; - } - - /* This is effectively (0x1 << 20) / sum */ - int output_base = (1 << 20) / sum; - - for (i = 0; i < dim_vec; i++) - { - - /* Here minimum value of 13+base-vec_in[i] will be 5 */ - shift = (uint8_t)__USAT(13 + base - vec_in[i], 5); - p_out[i] = (q7_t)__SSAT((output_base >> shift), 8); - } -} - -/** - * @} end of Softmax group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c deleted file mode 100644 index 1d5dcf677..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_s8.c - * Description: S8 softmax function - * - * $Date: April 6, 2020 - * $Revision: V.2.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -#define ACCUM_BITS 12 - -#ifdef ARM_MATH_MVEI -static int32x4_t arm_exp_on_negative_values_mve_32x4(int32x4_t val) -{ -#define SHIFT_START (24) - int32_t shift = SHIFT_START; - int32x4_t mask; - - const int32x4_t val_mod_minus_quarter = vandq_s32(val, vdupq_n_s32((1 << SHIFT_START) - 1)) - vdupq_n_s32(1 << SHIFT_START); - const int32x4_t remainder = vsubq_s32(val_mod_minus_quarter, val); - const int32x4_t x = vaddq_n_s32(val_mod_minus_quarter << 5, 1 << 28); - const int32x4_t x2 = MUL_SAT_MVE(x, x); - const int32x4_t op_1 = DIV_POW2_MVE(MUL_SAT_MVE(x2, x2), 2) + MUL_SAT_MVE(x2, x); - const int32x4_t op_2 = x + DIV_POW2_MVE(MUL_SAT_MVE(op_1, vdupq_n_s32(715827883)) + x2, 1); - int32x4_t result = vdupq_n_s32(1895147668) + MUL_SAT_MVE(vdupq_n_s32(1895147668), op_2); - -#define SELECT_IF_NON_ZERO(x) \ - { \ - mve_pred16_t p = vcmpneq_n_s32(remainder & vdupq_n_s32(1 << shift++), 0); \ - mask = vmvnq_m_s32(vdupq_n_s32(0), vdupq_n_s32(0), p); \ - result = SELECT_USING_MASK(mask, MUL_SAT_MVE(result, vdupq_n_s32(x)), result); \ - } - - SELECT_IF_NON_ZERO(1672461947) - SELECT_IF_NON_ZERO(1302514674) - SELECT_IF_NON_ZERO(790015084) - SELECT_IF_NON_ZERO(290630308) - SELECT_IF_NON_ZERO(39332535) - SELECT_IF_NON_ZERO(720401) - SELECT_IF_NON_ZERO(242) - -#undef SELECT_IF_NON_ZERO - - mve_pred16_t p = vcmpeqq_n_s32(val, 0); - mask = vmvnq_m_s32(vdupq_n_s32(0), vdupq_n_s32(0), p); - - result = SELECT_USING_MASK(mask, vdupq_n_s32(Q31_MAX), result); - return result; -} -#endif - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ - -void arm_softmax_s8(const int8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - int8_t *output) -{ -#ifdef ARM_MATH_MVEI - -#define ACT_MIN ((int8_t)Q7_MIN) -#define ACT_MAX ((int8_t)Q7_MAX) - - const int32_t mask = (1 << shift); - - for (int i_num_rows = 0; i_num_rows < num_rows; ++i_num_rows) - { - int8_t max = ACT_MIN; - - int32_t vec_count = (row_size + 15) / 16; - uint32_t r_count = (uint32_t)row_size; - for (int i = 0; i < vec_count; i++) - { - mve_pred16_t p = vctp8q(r_count); - const int8x16_t ip = vldrbq_z_s8(&input[i * 16], p); - max = vmaxvq_p_s8(max, ip, p); - r_count -= 16; - } - - vec_count = row_size / 4; - int32_t idx = 0; - int32_t sum = 0; - - while (vec_count) - { - int32x4_t ip = vldrbq_s32(&input[idx * 4]); - ip = vsubq_n_s32(ip, max); - mve_pred16_t p = vcmpgeq_n_s32(ip, diff_min); - if (p != 0) - { - ip = vmulq_n_s32(ip, mask); - - int32x4_t res = MUL_SAT_MVE(ip, vdupq_n_s32(mult)); - - res = arm_exp_on_negative_values_mve_32x4(res); - res = DIV_POW2_MVE(res, ACCUM_BITS); - res = vpselq_s32(res, vdupq_n_s32(0), p); - sum += vaddvq_s32(res); - } - - vec_count--; - idx++; - } - - const int32_t tail_idx = row_size & ~3; - for (int i = 0; i < (row_size & 3); i++) - { - const int32_t diff = input[tail_idx + i] - max; - if (diff >= diff_min) - { - sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS); - } - } - - const int32_t headroom = __CLZ((uint32_t)sum); - const int32_t bits_over_unit = ACCUM_BITS - headroom + 23; - const int32_t shifted_scale = ONE_OVER1((sum << headroom) - (1 << 31)); - - vec_count = row_size / 4; - idx = 0; - - while (vec_count) - { - int32x4_t ip = vldrbq_s32(&input[idx]); - ip = vsubq_n_s32(ip, max); - - mve_pred16_t p = vcmpgeq_n_s32(ip, diff_min); - - int32x4_t tmp_res; - - if (p != 0) - { - ip = vmulq_n_s32(ip, mask); - - tmp_res = MUL_SAT_MVE(ip, vdupq_n_s32(mult)); - tmp_res = arm_exp_on_negative_values_mve_32x4(tmp_res); - tmp_res = MUL_SAT_MVE(vdupq_n_s32(shifted_scale), tmp_res); - tmp_res = DIV_POW2_MVE(tmp_res, bits_over_unit); - tmp_res += vdupq_n_s32(ACT_MIN); - - tmp_res = vmaxq_s32(tmp_res, vdupq_n_s32(ACT_MIN)); - tmp_res = vminq_s32(tmp_res, vdupq_n_s32(ACT_MAX)); - tmp_res = vpselq_s32(tmp_res, vdupq_n_s32(ACT_MIN), p); - } - else - { - tmp_res = vdupq_n_s32(ACT_MIN); - } - vstrbq_s32(&output[idx], tmp_res); - vec_count--; - idx += 4; - } - - for (int i = 0; i < (row_size & 3); i++) - { - int32_t diff = input[tail_idx + i] - max; - if (diff >= diff_min) - { - const int32_t res = DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) - 128; - output[tail_idx + i] = (int8_t)CLAMP(res, (int32_t)ACT_MAX, (int32_t)ACT_MIN); - } - else - { - output[tail_idx + i] = ACT_MIN; - } - } - - input += row_size; - output += row_size; - } -#else - const int32_t mask = (1 << shift); - - int32_t col = 0; - int32_t row_idx; - - for (row_idx = 0; row_idx < num_rows; ++row_idx) - { - // Find the maximum value in order to ensure numerical stability - int8_t max = *input; - - for (col = 1; col < row_size; ++col) - { - max = MAX(max, input[col]); - } - - int32_t diff = 0; - int32_t sum = 0; - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - if (diff >= diff_min) - { - sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS); - } - } - - const int32_t headroom = __CLZ(sum); - const int32_t bits_over_unit = ACCUM_BITS - headroom + 23; - const int32_t shifted_scale = ONE_OVER1((sum << headroom) - (1 << 31)); - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - if (diff >= diff_min) - { - const int32_t res = DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) - 128; - output[col] = (int8_t)CLAMP(res, (int32_t)127, (int32_t)-128); - } - else - { - output[col] = -128; - } - } - input += row_size; - output += row_size; - } - -#endif -} -/** - * @} end of Softmax group - */ diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c deleted file mode 100644 index b2eebd22b..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_u8.c - * Description: U8 softmax function - * - * $Date: May 29, 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -#define ACCUM_BITS 12 - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ -void arm_softmax_u8(const uint8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - uint8_t *output) -{ - const int32_t mask = (1 << shift); - - int32_t col = 0; - int32_t row_idx; - - for(row_idx = 0; row_idx < num_rows; ++row_idx) - { - // Find the maximum value in order to ensure numerical stability - uint8_t max = *input; - - for (col = 1; col < row_size; ++col) - { - max = MAX(max, input[col]); - } - - int32_t diff = 0; - int32_t sum = 0; - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - if(diff >= diff_min) - { - sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS); - } - } - - const int32_t headroom = __CLZ((uint32_t)sum); - const int32_t bits_over_unit = ACCUM_BITS - headroom + 23; - const int32_t shifted_scale = ONE_OVER1((sum << headroom) - (1 << 31)); - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - if (diff >= diff_min) - { - const int32_t res = DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit); - output[col] = (uint8_t) CLAMP(res, (int32_t)255, (int32_t)0); - } - else - { - output[col] = 0; - } - } - input += row_size; - output += row_size; - } -} -/** - * @} end of Softmax group - */ \ No newline at end of file diff --git a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c b/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c deleted file mode 100644 index 4c27e336c..000000000 --- a/APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_with_batch_q7.c - * Description: Q7 softmax function - * - * $Date: 05. August 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * - * -------------------------------------------------------------------- */ - -#include "arm_math.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ - - /** - * @brief Q7 softmax function with batch parameter - * @param[in] vec_in pointer to input vector - * @param[in] nb_batches number of batches - * @param[in] dim_vec input vector dimention - * @param[out] p_out pointer to output vector - * - * @details - * - * Here, instead of typical natural logarithm e based softmax, we use - * 2-based softmax here, i.e.,: - * - * y_i = 2^(x_i) / sum(2^x_j) - * - * The relative output will be different here. - * But mathematically, the gradient will be the same - * with a log(2) scaling factor. - * - */ - -void arm_softmax_with_batch_q7(const q7_t * vec_in, const uint16_t nb_batches,const uint16_t dim_vec, q7_t * p_out ) -{ - for(int i=0; i +#ifdef OV2640_JPEG_MODE #define JPEG_BUF_SIZE (1024*200) +#else +#define JPEG_BUF_SIZE (2*OV2640_X_RESOLUTION_IMAGE_OUTSIZE*OV2640_Y_RESOLUTION_IMAGE_OUTSIZE) +#endif #define UART_NUMBER2 "uart2" - +void lcd_show_ov2640_thread(uint16_t* rgbbuffer); +static int fd = 0; +static _ioctl_shoot_para shoot_para_t = {0}; void ov2640_test(int argc, char **argv) { - + rt_thread_t tid; rt_err_t ret = 0; - int fd = 0; fd = open("/dev/ov2640",O_RDONLY); if(fd < 0) { @@ -26,7 +31,6 @@ void ov2640_test(int argc, char **argv) return; } rt_uint8_t* JpegBuffer = rt_malloc(JPEG_BUF_SIZE); - _ioctl_shoot_para shoot_para_t = {0}; if (RT_NULL == JpegBuffer) { printf("JpegBuffer senddata buf malloc error!\n"); @@ -34,27 +38,36 @@ void ov2640_test(int argc, char **argv) } printf("ov2640 test by printing the image value in memory \r\n"); shoot_para_t.pdata = (uint32_t)JpegBuffer; + #ifdef OV2640_RGB565_MODE + shoot_para_t.length = JPEG_BUF_SIZE/2; + #elif defined OV2640_JPEG_MODE shoot_para_t.length = JPEG_BUF_SIZE; + #endif ret = ioctl(fd,IOCTRL_CAMERA_START_SHOT,&shoot_para_t); if(RT_ERROR == ret) { printf("ov2640 can't wait event flag"); return; } - printf("print the vaule:\r\n\r\n"); - ret = rt_ov2640_calculate_jpeg_len(JpegBuffer,JPEG_BUF_SIZE); - printf("photo leghth is %d :\r\n\r\n",ret); - #ifdef BSP_USING_UART2 - void img_output_uart2(rt_uint8_t* jpegbuf,rt_uint16_t len); - img_output_uart2(JpegBuffer,ret); + #ifdef OV2640_JPEG_MODE + printf("print the vaule:\r\n\r\n"); + ret = rt_ov2640_calculate_jpeg_len(JpegBuffer,JPEG_BUF_SIZE); + printf("photo leghth is %d :\r\n\r\n",ret); + #ifdef BSP_USING_UART2 + void img_output_uart2(rt_uint8_t* jpegbuf,rt_uint16_t len); + img_output_uart2(JpegBuffer,ret); + #endif + for(int i =0;i FSMC_NOE PD5 ------> FSMC_NWE PG10 ------> FSMC_NE3 + PG12 ------> FSMC_NE4 PE0 ------> FSMC_NBL0 PE1 ------> FSMC_NBL1 */ @@ -1085,8 +1086,8 @@ static void HAL_FSMC_MspInit(void){ GPIO_InitStruct.Alternate = GPIO_AF12_FSMC; HAL_GPIO_Init(GPIOF, &GPIO_InitStruct); - GPIO_InitStruct.Pin = GPIO_PIN_0|GPIO_PIN_1|GPIO_PIN_2|GPIO_PIN_3 - |GPIO_PIN_4|GPIO_PIN_5|GPIO_PIN_10; + GPIO_InitStruct.Pin = GPIO_PIN_0|GPIO_PIN_1|GPIO_PIN_2|GPIO_PIN_3 + |GPIO_PIN_4|GPIO_PIN_5|GPIO_PIN_10|GPIO_PIN_12; GPIO_InitStruct.Mode = GPIO_MODE_AF_PP; GPIO_InitStruct.Pull = GPIO_NOPULL; GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH; @@ -1178,6 +1179,7 @@ static void HAL_FSMC_MspDeInit(void){ PD4 ------> FSMC_NOE PD5 ------> FSMC_NWE PG10 ------> FSMC_NE3 + PG12 ------> FSMC_NE4 PE0 ------> FSMC_NBL0 PE1 ------> FSMC_NBL1 */ diff --git a/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/Kconfig b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/Kconfig index 9977dcdc2..e2bc35409 100644 --- a/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/Kconfig +++ b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/Kconfig @@ -32,6 +32,16 @@ menu "Onboard Peripheral Drivers" select BSP_USING_EXT_FMC_IO select BSP_USING_FMC default n + + config BSP_USING_MCU_LCD + bool "Enable ATK LCD" + select BSP_USING_SRAM + default n + if BSP_USING_MCU_LCD + config BSP_USING_MCU_LCD_TEST + bool "Enable lcd fill test" + default y + endif config BSP_USING_SPI_FLASH bool "Enable SPI FLASH (W25Q128 spi2)" diff --git a/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/SConscript b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/SConscript index a34a90866..93e5a3f3d 100644 --- a/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/SConscript +++ b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/SConscript @@ -23,7 +23,10 @@ if GetDepend(['BSP_USING_SDCARD']): if GetDepend(['BSP_USING_SRAM']): src += Glob('ports/drv_sram.c') - + +if GetDepend(['BSP_USING_MCU_LCD']): + src += Glob('ports/drv_lcd.c') + if GetDepend(['BSP_USING_DCMI']): src += ['drv_dcmi.c'] src += ['../../../../rt-thread/bsp/stm32/libraries/STM32F4xx_HAL/STM32F4xx_HAL_Driver/Src/stm32f4xx_hal_dcmi.c'] diff --git a/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/drv_lcd.c b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/drv_lcd.c new file mode 100644 index 000000000..1813559b4 --- /dev/null +++ b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/drv_lcd.c @@ -0,0 +1,2074 @@ +/* + * Copyright (c) 2006-2021, RT-Thread Development Team + * + * SPDX-License-Identifier: Apache-2.0 + * + * Change Logs: + * Date Author Notes + * 2021-12-28 unknow copy by STemwin + * 2021-12-29 xiangxistu port for lvgl + */ + +#include +#include "drv_lcd.h" +#include "string.h" +#include "font.h" +//#define DRV_DEBUG +#define LOG_TAG "drv.lcd" +#include + +_lcd_dev lcddev; +SRAM_HandleTypeDef hsram1; + +#define LCD_BL GET_PIN(B, 15) +#define LCD_BASE ((uint32_t)(0x6C000000 | 0x0000007E)) +#define LCD ((LCD_CONTROLLER_TypeDef *)LCD_BASE) + +#define LCD_DEVICE(dev) (struct drv_lcd_device *)(dev) + +struct drv_lcd_device +{ + struct rt_device parent; + + struct rt_device_graphic_info lcd_info; +}; + +static struct drv_lcd_device _lcd; + +//写寄存器函数 +//regval:寄存器值 +void LCD_WR_REG(uint16_t regval) +{ + LCD->REG = regval; //写入è¦å†™çš„寄存器åºå· +} +//写LCDæ•°æ® +//data:è¦å†™å…¥çš„值 +void LCD_WR_DATA(uint16_t data) +{ + LCD->RAM = data; +} +//读LCDæ•°æ® +//返回值:读到的值 +uint16_t LCD_RD_DATA(void) +{ + return LCD->RAM; +} +//写寄存器 +//LCD_Reg:å¯„å­˜å™¨åœ°å€ +//LCD_RegValue:è¦å†™å…¥çš„æ•°æ® +void LCD_WriteReg(uint16_t LCD_Reg, uint16_t LCD_RegValue) +{ + LCD->REG = LCD_Reg; //写入è¦å†™çš„寄存器åºå· + LCD->RAM = LCD_RegValue; //å†™å…¥æ•°æ® +} +//读寄存器 +//LCD_Reg:å¯„å­˜å™¨åœ°å€ +//返回值:è¯»åˆ°çš„æ•°æ® +uint16_t LCD_ReadReg(uint16_t LCD_Reg) +{ + LCD_WR_REG(LCD_Reg); //写入è¦è¯»çš„寄存器åºå· + return LCD_RD_DATA(); //返回读到的值 +} +//开始写GRAM +void LCD_WriteRAM_Prepare(void) +{ + LCD->REG = lcddev.wramcmd; +} +//LCD写GRAM +//RGB_Code:颜色值 +void LCD_WriteRAM(uint16_t RGB_Code) +{ + LCD->RAM = RGB_Code; //写åå…­ä½GRAM +} + +//从ILI93xx读出的数æ®ä¸ºGBRæ ¼å¼ï¼Œè€Œæˆ‘们写入的时候为RGBæ ¼å¼ã€‚ +//é€šè¿‡è¯¥å‡½æ•°è½¬æ¢ +//c:GBRæ ¼å¼çš„颜色值 +//返回值:RGBæ ¼å¼çš„颜色值 +uint16_t LCD_BGR2RGB(uint16_t c) +{ + uint16_t r, g, b, rgb; + b = (c >> 0) & 0x1f; + g = (c >> 5) & 0x3f; + r = (c >> 11) & 0x1f; + rgb = (b << 11) + (g << 5) + (r << 0); + return (rgb); +} + +//设置光标ä½ç½®(对RGB屿— æ•ˆ) +//Xpos:æ¨ªåæ ‡ +//Ypos:çºµåæ ‡ +void LCD_SetCursor(uint16_t Xpos, uint16_t Ypos) +{ + if (lcddev.id == 0X9341 || lcddev.id == 0X5310) + { + LCD_WR_REG(lcddev.setxcmd); + LCD_WR_DATA(Xpos >> 8); + LCD_WR_DATA(Xpos & 0XFF); + LCD_WR_REG(lcddev.setycmd); + LCD_WR_DATA(Ypos >> 8); + LCD_WR_DATA(Ypos & 0XFF); + } + else if (lcddev.id == 0X1963) + { + if (lcddev.dir == 0) //xåæ ‡éœ€è¦å˜æ¢ + { + Xpos = lcddev.width - 1 - Xpos; + LCD_WR_REG(lcddev.setxcmd); + LCD_WR_DATA(0); + LCD_WR_DATA(0); + LCD_WR_DATA(Xpos >> 8); + LCD_WR_DATA(Xpos & 0XFF); + } + else + { + LCD_WR_REG(lcddev.setxcmd); + LCD_WR_DATA(Xpos >> 8); + LCD_WR_DATA(Xpos & 0XFF); + LCD_WR_DATA((lcddev.width - 1) >> 8); + LCD_WR_DATA((lcddev.width - 1) & 0XFF); + } + LCD_WR_REG(lcddev.setycmd); + LCD_WR_DATA(Ypos >> 8); + LCD_WR_DATA(Ypos & 0XFF); + LCD_WR_DATA((lcddev.height - 1) >> 8); + LCD_WR_DATA((lcddev.height - 1) & 0XFF); + } + else if (lcddev.id == 0X5510) + { + LCD_WR_REG(lcddev.setxcmd); + LCD_WR_DATA(Xpos >> 8); + LCD_WR_REG(lcddev.setxcmd + 1); + LCD_WR_DATA(Xpos & 0XFF); + LCD_WR_REG(lcddev.setycmd); + LCD_WR_DATA(Ypos >> 8); + LCD_WR_REG(lcddev.setycmd + 1); + LCD_WR_DATA(Ypos & 0XFF); + } +} + +//读å–个æŸç‚¹çš„颜色值 +//x,y:åæ ‡ +//返回值:此点的颜色 +void LCD_ReadPoint(char *pixel, int x, int y) +{ + uint16_t *color = (uint16_t *)pixel; + uint16_t r = 0, g = 0, b = 0; + if (x >= lcddev.width || y >= lcddev.height) + { + *color = 0; //超过了范围,直接返回 + return; + } + LCD_SetCursor(x, y); + if (lcddev.id == 0X9341 || lcddev.id == 0X5310 || lcddev.id == 0X1963) + LCD_WR_REG(0X2E); //9341/3510/1963 å‘é€è¯»GRAM指令 + else if (lcddev.id == 0X5510) + LCD_WR_REG(0X2E00); //5510 å‘é€è¯»GRAM指令 + r = LCD_RD_DATA(); //dummy Read + if (lcddev.id == 0X1963) + { + *color = r; + return; //1963直接读就å¯ä»¥ + } + + r = LCD_RD_DATA(); //å®žé™…åæ ‡é¢œè‰² + //9341/NT35310/NT35510è¦åˆ†2次读出 + + b = LCD_RD_DATA(); + g = r & 0XFF; //对于9341/5310/5510,第一次读å–的是RG的值,R在å‰,G在åŽ,å„å 8ä½ + g <<= 8; + *color = (((r >> 11) << 11) | ((g >> 10) << 5) | (b >> 11)); //ILI9341/NT35310/NT35510需è¦å…¬å¼è½¬æ¢ä¸€ä¸‹ +} +//LCD开坿˜¾ç¤º +void LCD_DisplayOn(void) +{ + if (lcddev.id == 0X9341 || lcddev.id == 0X5310 || lcddev.id == 0X1963) + LCD_WR_REG(0X29); //开坿˜¾ç¤º + else if (lcddev.id == 0X5510) + LCD_WR_REG(0X2900); //开坿˜¾ç¤º +} +//LCD关闭显示 +void LCD_DisplayOff(void) +{ + if (lcddev.id == 0X9341 || lcddev.id == 0X5310 || lcddev.id == 0X1963) + LCD_WR_REG(0X28); //关闭显示 + else if (lcddev.id == 0X5510) + LCD_WR_REG(0X2800); //关闭显示 +} + +//设置LCDçš„è‡ªåŠ¨æ‰«ææ–¹å‘(对RGB屿— æ•ˆ) +//注æ„:其他函数å¯èƒ½ä¼šå—到此函数设置的影å“(尤其是9341), +//所以,一般设置为L2R_U2Då³å¯,å¦‚æžœè®¾ç½®ä¸ºå…¶ä»–æ‰«ææ–¹å¼,å¯èƒ½å¯¼è‡´æ˜¾ç¤ºä¸æ­£å¸¸. +//dir:0~7,代表8个方å‘(具体定义è§lcd.h) +//9341/5310/5510/1963ç­‰ICå·²ç»å®žé™…测试 +void LCD_Scan_Dir(uint8_t dir) +{ + uint16_t regval = 0; + uint16_t dirreg = 0; + uint16_t temp; + if ((lcddev.dir == 1 && lcddev.id != 0X1963) || (lcddev.dir == 0 && lcddev.id == 0X1963)) //æ¨ªå±æ—¶ï¼Œå¯¹1963䏿”¹å˜æ‰«ææ–¹å‘ï¼ç«–屿—¶1963æ”¹å˜æ–¹å‘ + { + switch (dir) //æ–¹å‘è½¬æ¢ + { + case 0: + dir = 6; + break; + case 1: + dir = 7; + break; + case 2: + dir = 4; + break; + case 3: + dir = 5; + break; + case 4: + dir = 1; + break; + case 5: + dir = 0; + break; + case 6: + dir = 3; + break; + case 7: + dir = 2; + break; + } + } + if (lcddev.id == 0x9341 || lcddev.id == 0X5310 || lcddev.id == 0X5510 || lcddev.id == 0X1963) //9341/5310/5510/1963,ç‰¹æ®Šå¤„ç† + { + switch (dir) + { + case L2R_U2D: //从左到å³,从上到下 + regval |= (0 << 7) | (0 << 6) | (0 << 5); + break; + case L2R_D2U: //从左到å³,从下到上 + regval |= (1 << 7) | (0 << 6) | (0 << 5); + break; + case R2L_U2D: //从å³åˆ°å·¦,从上到下 + regval |= (0 << 7) | (1 << 6) | (0 << 5); + break; + case R2L_D2U: //从å³åˆ°å·¦,从下到上 + regval |= (1 << 7) | (1 << 6) | (0 << 5); + break; + case U2D_L2R: //从上到下,ä»Žå·¦åˆ°å³ + regval |= (0 << 7) | (0 << 6) | (1 << 5); + break; + case U2D_R2L: //从上到下,从å³åˆ°å·¦ + regval |= (0 << 7) | (1 << 6) | (1 << 5); + break; + case D2U_L2R: //从下到上,ä»Žå·¦åˆ°å³ + regval |= (1 << 7) | (0 << 6) | (1 << 5); + break; + case D2U_R2L: //从下到上,从å³åˆ°å·¦ + regval |= (1 << 7) | (1 << 6) | (1 << 5); + break; + } + if (lcddev.id == 0X5510) + dirreg = 0X3600; + else + dirreg = 0X36; + if ((lcddev.id != 0X5310) && (lcddev.id != 0X5510) && (lcddev.id != 0X1963)) + regval |= 0X08; //5310/5510/1963ä¸éœ€è¦BGR + LCD_WriteReg(dirreg, regval); + if (lcddev.id != 0X1963) //1963ä¸åšåæ ‡å¤„ç† + { + if (regval & 0X20) + { + if (lcddev.width < lcddev.height) //交æ¢X,Y + { + temp = lcddev.width; + lcddev.width = lcddev.height; + lcddev.height = temp; + } + } + else + { + if (lcddev.width > lcddev.height) //交æ¢X,Y + { + temp = lcddev.width; + lcddev.width = lcddev.height; + lcddev.height = temp; + } + } + } + if (lcddev.id == 0X5510) + { + LCD_WR_REG(lcddev.setxcmd); + LCD_WR_DATA(0); + LCD_WR_REG(lcddev.setxcmd + 1); + LCD_WR_DATA(0); + LCD_WR_REG(lcddev.setxcmd + 2); + LCD_WR_DATA((lcddev.width - 1) >> 8); + LCD_WR_REG(lcddev.setxcmd + 3); + LCD_WR_DATA((lcddev.width - 1) & 0XFF); + LCD_WR_REG(lcddev.setycmd); + LCD_WR_DATA(0); + LCD_WR_REG(lcddev.setycmd + 1); + LCD_WR_DATA(0); + LCD_WR_REG(lcddev.setycmd + 2); + LCD_WR_DATA((lcddev.height - 1) >> 8); + LCD_WR_REG(lcddev.setycmd + 3); + LCD_WR_DATA((lcddev.height - 1) & 0XFF); + } + else + { + LCD_WR_REG(lcddev.setxcmd); + LCD_WR_DATA(0); + LCD_WR_DATA(0); + LCD_WR_DATA((lcddev.width - 1) >> 8); + LCD_WR_DATA((lcddev.width - 1) & 0XFF); + LCD_WR_REG(lcddev.setycmd); + LCD_WR_DATA(0); + LCD_WR_DATA(0); + LCD_WR_DATA((lcddev.height - 1) >> 8); + LCD_WR_DATA((lcddev.height - 1) & 0XFF); + } + } +} + +//快速画点 +//x,y:åæ ‡ +//color:颜色 +static void LCD_Fast_DrawPoint(const char *pixel, int x, int y) +{ + uint16_t color = *((uint16_t *)pixel); + if (lcddev.id == 0X9341 || lcddev.id == 0X5310) + { + LCD_WR_REG(lcddev.setxcmd); + LCD_WR_DATA(x >> 8); + LCD_WR_DATA(x & 0XFF); + LCD_WR_REG(lcddev.setycmd); + LCD_WR_DATA(y >> 8); + LCD_WR_DATA(y & 0XFF); + } + else if (lcddev.id == 0X5510) + { + LCD_WR_REG(lcddev.setxcmd); + LCD_WR_DATA(x >> 8); + LCD_WR_REG(lcddev.setxcmd + 1); + LCD_WR_DATA(x & 0XFF); + LCD_WR_REG(lcddev.setycmd); + LCD_WR_DATA(y >> 8); + LCD_WR_REG(lcddev.setycmd + 1); + LCD_WR_DATA(y & 0XFF); + } + else if (lcddev.id == 0X1963) + { + if (lcddev.dir == 0) + x = lcddev.width - 1 - x; + LCD_WR_REG(lcddev.setxcmd); + LCD_WR_DATA(x >> 8); + LCD_WR_DATA(x & 0XFF); + LCD_WR_DATA(x >> 8); + LCD_WR_DATA(x & 0XFF); + LCD_WR_REG(lcddev.setycmd); + LCD_WR_DATA(y >> 8); + LCD_WR_DATA(y & 0XFF); + LCD_WR_DATA(y >> 8); + LCD_WR_DATA(y & 0XFF); + } + LCD->REG = lcddev.wramcmd; + LCD->RAM = color; +} +//SSD1963 背光设置 +//pwm:背光等级,0~100.越大越亮. +void LCD_SSD_BackLightSet(uint8_t pwm) +{ + LCD_WR_REG(0xBE); //é…ç½®PWM输出 + LCD_WR_DATA(0x05); //1设置PWM频率 + LCD_WR_DATA(pwm * 2.55); //2设置PWMå ç©ºæ¯” + LCD_WR_DATA(0x01); //3设置C + LCD_WR_DATA(0xFF); //4设置D + LCD_WR_DATA(0x00); //5设置E + LCD_WR_DATA(0x00); //6设置F +} + +//设置LCDæ˜¾ç¤ºæ–¹å‘ +//dir:0,ç«–å±ï¼›1,æ¨ªå± +void LCD_Display_Dir(uint8_t dir) +{ + lcddev.dir = dir; //ç«–å±/æ¨ªå± + if (dir == 0) //ç«–å± + { + lcddev.width = 240; + lcddev.height = 320; + if (lcddev.id == 0X9341 || lcddev.id == 0X5310) + { + lcddev.wramcmd = 0X2C; + lcddev.setxcmd = 0X2A; + lcddev.setycmd = 0X2B; + if (lcddev.id == 0X5310) + { + lcddev.width = 320; + lcddev.height = 480; + } + } + else if (lcddev.id == 0x5510) + { + lcddev.wramcmd = 0X2C00; + lcddev.setxcmd = 0X2A00; + lcddev.setycmd = 0X2B00; + lcddev.width = 480; + lcddev.height = 800; + } + else if (lcddev.id == 0X1963) + { + lcddev.wramcmd = 0X2C; //设置写入GRAM的指令 + lcddev.setxcmd = 0X2B; //设置写Xåæ ‡æŒ‡ä»¤ + lcddev.setycmd = 0X2A; //设置写Yåæ ‡æŒ‡ä»¤ + lcddev.width = 480; //设置宽度480 + lcddev.height = 800; //设置高度800 + } + } + else //æ¨ªå± + { + lcddev.width = 320; + lcddev.height = 240; + if (lcddev.id == 0X9341 || lcddev.id == 0X5310) + { + lcddev.wramcmd = 0X2C; + lcddev.setxcmd = 0X2A; + lcddev.setycmd = 0X2B; + } + else if (lcddev.id == 0x5510) + { + lcddev.wramcmd = 0X2C00; + lcddev.setxcmd = 0X2A00; + lcddev.setycmd = 0X2B00; + lcddev.width = 800; + lcddev.height = 480; + } + else if (lcddev.id == 0X1963) + { + lcddev.wramcmd = 0X2C; //设置写入GRAM的指令 + lcddev.setxcmd = 0X2A; //设置写Xåæ ‡æŒ‡ä»¤ + lcddev.setycmd = 0X2B; //设置写Yåæ ‡æŒ‡ä»¤ + lcddev.width = 800; //设置宽度800 + lcddev.height = 480; //设置高度480 + } + if (lcddev.id == 0X5310) + { + lcddev.width = 480; + lcddev.height = 320; + } + } + LCD_Scan_Dir(DFT_SCAN_DIR); //é»˜è®¤æ‰«ææ–¹å‘ +} + +//清å±å‡½æ•° +//color:è¦æ¸…å±çš„填充色 +void LCD_Clear(uint32_t color) +{ + uint32_t index = 0; + uint32_t totalpoint = lcddev.width; + totalpoint *= lcddev.height; //得到总点数 + LCD_SetCursor(0x00, 0x0000); //设置光标ä½ç½® + LCD_WriteRAM_Prepare(); //开始写入GRAM + for (index = 0; index < totalpoint; index++) + { + LCD->RAM = color; + } +} + +void LCD_HLine(const char *pixel, int x1, int x2, int y) +{ + int xsize = x2 - x1 + 1; + LCD_SetCursor(x1, y); + LCD_WriteRAM_Prepare(); + uint16_t *p = (uint16_t *)pixel; + for (; xsize > 0; xsize--) + LCD->RAM = *p; +} + +void LCD_BlitLine(const char *pixel, int x, int y, rt_size_t size) +{ + LCD_SetCursor(x, y); + LCD_WriteRAM_Prepare(); + uint16_t *p = (uint16_t *)pixel; + for (; size > 0; size--, p++) + LCD->RAM = *p; +} + +void lcd_fill_array(rt_uint16_t x_start, rt_uint16_t y_start, rt_uint16_t x_end, rt_uint16_t y_end, void *pcolor) +{ + rt_uint16_t *pixel = RT_NULL; + rt_uint16_t cycle_y, x_offset = 0; + + pixel = (rt_uint16_t *)pcolor; + + for(cycle_y = y_start; cycle_y <= y_end-1;) + { + LCD_SetCursor(x_start, cycle_y); + LCD_WriteRAM_Prepare(); + for(x_offset = 0;x_start + x_offset <= x_end-1; x_offset++) + { + LCD->RAM = *pixel++; + } + cycle_y++; + } +} + +//LCD的画笔颜色和背景色 +rt_uint16_t POINT_COLOR=RED; //画笔颜色 +rt_uint16_t BACK_COLOR =WHITE; //背景色 +/** + * @description: + * @param {rt_uint16_t} x_start + * @param {rt_uint16_t} y_start + * @param {rt_uint8_t} num show char + * @param {rt_uint8_t} size 12/16/24/32 + * @param {rt_uint32_t} color + * @return {*} + */ +void lcd_show_char(rt_uint16_t x, rt_uint16_t y, rt_uint8_t num, rt_uint8_t size, rt_uint16_t color) +{ + rt_uint8_t temp,t1,t; + rt_uint16_t y0=y; + rt_uint8_t csize=(size/8+((size%8)?1:0))*(size/2); //得到字体一个字符对应点阵集所å çš„字节数 + num=num-' ';//得到åç§»åŽçš„值(ASCIIå­—åº“æ˜¯ä»Žç©ºæ ¼å¼€å§‹å–æ¨¡ï¼Œæ‰€ä»¥-' '就是对应字符的字库) + for(t=0;t=lcddev.height)return; //超区域了 + if((y-y0)==size) + { + y=y0; + x++; + if(x>=lcddev.width)return; //超区域了 + break; + } + } + } +} + +//显示字符串 +//x,y:èµ·ç‚¹åæ ‡ +//width,height:åŒºåŸŸå¤§å° +//size:å­—ä½“å¤§å° +//*p:å­—ç¬¦ä¸²èµ·å§‹åœ°å€ +void lcd_show_string(rt_uint16_t x,rt_uint16_t y,rt_uint16_t width,rt_uint16_t height,rt_uint8_t size,rt_uint8_t *p,rt_uint16_t color) +{ + rt_uint8_t x0=x; + width+=x; + height+=y; + while((*p<='~')&&(*p>=' '))//åˆ¤æ–­æ˜¯ä¸æ˜¯éžæ³•字符! + { + if(x>=width){x=x0;y+=size;} + if(y>=height)break;//退出 + lcd_show_char(x,y,*p,size,color); + x+=size/2; + p++; + } +} + +static rt_err_t drv_lcd_init(struct rt_device *device) +{ + + __HAL_RCC_GPIOD_CLK_ENABLE(); + __HAL_RCC_GPIOE_CLK_ENABLE(); + __HAL_RCC_GPIOF_CLK_ENABLE(); + __HAL_RCC_GPIOG_CLK_ENABLE(); + __HAL_RCC_GPIOB_CLK_ENABLE(); + + FSMC_NORSRAM_TimingTypeDef Timing; + + rt_pin_mode(LCD_BL, PIN_MODE_OUTPUT); + + /** Perform the SRAM1 memory initialization sequence + */ + hsram1.Instance = FSMC_NORSRAM_DEVICE; + hsram1.Extended = FSMC_NORSRAM_EXTENDED_DEVICE; + /* hsram1.Init */ + hsram1.Init.NSBank = FSMC_NORSRAM_BANK4; + hsram1.Init.DataAddressMux = FSMC_DATA_ADDRESS_MUX_DISABLE; + hsram1.Init.MemoryType = FSMC_MEMORY_TYPE_SRAM; + hsram1.Init.MemoryDataWidth = FSMC_NORSRAM_MEM_BUS_WIDTH_16; + hsram1.Init.BurstAccessMode = FSMC_BURST_ACCESS_MODE_DISABLE; + hsram1.Init.WaitSignalPolarity = FSMC_WAIT_SIGNAL_POLARITY_LOW; + hsram1.Init.WrapMode = FSMC_WRAP_MODE_DISABLE; + hsram1.Init.WaitSignalActive = FSMC_WAIT_TIMING_BEFORE_WS; + hsram1.Init.WriteOperation = FSMC_WRITE_OPERATION_ENABLE; + hsram1.Init.WaitSignal = FSMC_WAIT_SIGNAL_DISABLE; + hsram1.Init.ExtendedMode = FSMC_EXTENDED_MODE_DISABLE; + hsram1.Init.AsynchronousWait = FSMC_ASYNCHRONOUS_WAIT_DISABLE; + hsram1.Init.WriteBurst = FSMC_WRITE_BURST_DISABLE; + hsram1.Init.PageSize = FSMC_PAGE_SIZE_NONE; + /* Timing */ + Timing.AddressSetupTime = 5; + Timing.AddressHoldTime = 1; + Timing.DataSetupTime = 9; + Timing.BusTurnAroundDuration = 0; + Timing.CLKDivision = 2; + Timing.DataLatency = 2; + Timing.AccessMode = FSMC_ACCESS_MODE_A; + /* ExtTiming */ + + if (HAL_SRAM_Init(&hsram1, &Timing, &Timing) != HAL_OK) + { + Error_Handler(); + } + + rt_thread_mdelay(50); + + //å°è¯•9341 IDçš„è¯»å– + LCD_WR_REG(0XD3); + lcddev.id = LCD_RD_DATA(); //dummy read + lcddev.id = LCD_RD_DATA(); //读到0X00 + lcddev.id = LCD_RD_DATA(); //读å–93 + lcddev.id <<= 8; + lcddev.id |= LCD_RD_DATA(); //读å–41 + if (lcddev.id != 0X9341) //éž9341,å°è¯•çœ‹çœ‹æ˜¯ä¸æ˜¯NT35310 + { + LCD_WR_REG(0XD4); + lcddev.id = LCD_RD_DATA(); //dummy read + lcddev.id = LCD_RD_DATA(); //读回0X01 + lcddev.id = LCD_RD_DATA(); //读回0X53 + lcddev.id <<= 8; + lcddev.id |= LCD_RD_DATA(); //这里读回0X10 + if (lcddev.id != 0X5310) //ä¹Ÿä¸æ˜¯NT35310,å°è¯•çœ‹çœ‹æ˜¯ä¸æ˜¯NT35510 + { + LCD_WR_REG(0XDA00); + lcddev.id = LCD_RD_DATA(); //读回0X00 + LCD_WR_REG(0XDB00); + lcddev.id = LCD_RD_DATA(); //读回0X80 + lcddev.id <<= 8; + LCD_WR_REG(0XDC00); + lcddev.id |= LCD_RD_DATA(); //读回0X00 + if (lcddev.id == 0x8000) + lcddev.id = 0x5510; //NT35510读回的ID是8000H,为方便区分,我们强制设置为5510 + if (lcddev.id != 0X5510) //ä¹Ÿä¸æ˜¯NT5510,å°è¯•çœ‹çœ‹æ˜¯ä¸æ˜¯SSD1963 + { + LCD_WR_REG(0XA1); + lcddev.id = LCD_RD_DATA(); + lcddev.id = LCD_RD_DATA(); //读回0X57 + lcddev.id <<= 8; + lcddev.id |= LCD_RD_DATA(); //读回0X61 + if (lcddev.id == 0X5761) + lcddev.id = 0X1963; //SSD1963读回的ID是5761H,为方便区分,我们强制设置为1963 + } + } + } + LOG_I(" LCD ID:%x", lcddev.id); //打å°LCD ID + if (lcddev.id == 0X9341) //9341åˆå§‹åŒ– + { + LCD_WR_REG(0xCF); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xC1); + LCD_WR_DATA(0X30); + LCD_WR_REG(0xED); + LCD_WR_DATA(0x64); + LCD_WR_DATA(0x03); + LCD_WR_DATA(0X12); + LCD_WR_DATA(0X81); + LCD_WR_REG(0xE8); + LCD_WR_DATA(0x85); + LCD_WR_DATA(0x10); + LCD_WR_DATA(0x7A); + LCD_WR_REG(0xCB); + LCD_WR_DATA(0x39); + LCD_WR_DATA(0x2C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x34); + LCD_WR_DATA(0x02); + LCD_WR_REG(0xF7); + LCD_WR_DATA(0x20); + LCD_WR_REG(0xEA); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_REG(0xC0); //Power control + LCD_WR_DATA(0x1B); //VRH[5:0] + LCD_WR_REG(0xC1); //Power control + LCD_WR_DATA(0x01); //SAP[2:0];BT[3:0] + LCD_WR_REG(0xC5); //VCM control + LCD_WR_DATA(0x30); //3F + LCD_WR_DATA(0x30); //3C + LCD_WR_REG(0xC7); //VCM control2 + LCD_WR_DATA(0XB7); + LCD_WR_REG(0x36); // Memory Access Control + LCD_WR_DATA(0x48); + LCD_WR_REG(0x3A); + LCD_WR_DATA(0x55); + LCD_WR_REG(0xB1); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x1A); + LCD_WR_REG(0xB6); // Display Function Control + LCD_WR_DATA(0x0A); + LCD_WR_DATA(0xA2); + LCD_WR_REG(0xF2); // 3Gamma Function Disable + LCD_WR_DATA(0x00); + LCD_WR_REG(0x26); //Gamma curve selected + LCD_WR_DATA(0x01); + LCD_WR_REG(0xE0); //Set Gamma + LCD_WR_DATA(0x0F); + LCD_WR_DATA(0x2A); + LCD_WR_DATA(0x28); + LCD_WR_DATA(0x08); + LCD_WR_DATA(0x0E); + LCD_WR_DATA(0x08); + LCD_WR_DATA(0x54); + LCD_WR_DATA(0XA9); + LCD_WR_DATA(0x43); + LCD_WR_DATA(0x0A); + LCD_WR_DATA(0x0F); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_REG(0XE1); //Set Gamma + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x15); + LCD_WR_DATA(0x17); + LCD_WR_DATA(0x07); + LCD_WR_DATA(0x11); + LCD_WR_DATA(0x06); + LCD_WR_DATA(0x2B); + LCD_WR_DATA(0x56); + LCD_WR_DATA(0x3C); + LCD_WR_DATA(0x05); + LCD_WR_DATA(0x10); + LCD_WR_DATA(0x0F); + LCD_WR_DATA(0x3F); + LCD_WR_DATA(0x3F); + LCD_WR_DATA(0x0F); + LCD_WR_REG(0x2B); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x01); + LCD_WR_DATA(0x3f); + LCD_WR_REG(0x2A); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xef); + LCD_WR_REG(0x11); //Exit Sleep + rt_thread_mdelay(120); + LCD_WR_REG(0x29); //display on + } + else if (lcddev.id == 0x5310) + { + LCD_WR_REG(0xED); + LCD_WR_DATA(0x01); + LCD_WR_DATA(0xFE); + + LCD_WR_REG(0xEE); + LCD_WR_DATA(0xDE); + LCD_WR_DATA(0x21); + + LCD_WR_REG(0xF1); + LCD_WR_DATA(0x01); + LCD_WR_REG(0xDF); + LCD_WR_DATA(0x10); + + //VCOMvoltage// + LCD_WR_REG(0xC4); + LCD_WR_DATA(0x8F); //5f + + LCD_WR_REG(0xC6); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xE2); + LCD_WR_DATA(0xE2); + LCD_WR_DATA(0xE2); + LCD_WR_REG(0xBF); + LCD_WR_DATA(0xAA); + + LCD_WR_REG(0xB0); + LCD_WR_DATA(0x0D); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x0D); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x11); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x19); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x21); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x2D); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x3D); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x5D); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x5D); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xB1); + LCD_WR_DATA(0x80); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x8B); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x96); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xB2); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x02); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x03); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xB3); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xB4); + LCD_WR_DATA(0x8B); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x96); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xA1); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xB5); + LCD_WR_DATA(0x02); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x03); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x04); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xB6); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xB7); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x3F); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x5E); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x64); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x8C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xAC); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xDC); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x70); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x90); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xEB); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xDC); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xB8); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xBA); + LCD_WR_DATA(0x24); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xC1); + LCD_WR_DATA(0x20); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x54); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xFF); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xC2); + LCD_WR_DATA(0x0A); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x04); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xC3); + LCD_WR_DATA(0x3C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x3A); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x39); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x37); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x3C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x36); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x32); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x2F); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x2C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x29); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x26); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x24); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x24); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x23); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x3C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x36); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x32); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x2F); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x2C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x29); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x26); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x24); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x24); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x23); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xC4); + LCD_WR_DATA(0x62); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x05); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x84); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xF0); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x18); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xA4); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x18); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x50); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x0C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x17); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x95); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xF3); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xE6); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xC5); + LCD_WR_DATA(0x32); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x44); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x65); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x76); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x88); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xC6); + LCD_WR_DATA(0x20); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x17); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x01); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xC7); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xC8); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xC9); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xE0); + LCD_WR_DATA(0x16); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x1C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x21); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x36); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x46); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x52); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x64); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x7A); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x8B); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x99); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xA8); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xB9); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xC4); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xCA); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xD2); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xD9); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xE0); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xF3); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xE1); + LCD_WR_DATA(0x16); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x1C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x22); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x36); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x45); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x52); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x64); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x7A); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x8B); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x99); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xA8); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xB9); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xC4); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xCA); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xD2); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xD8); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xE0); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xF3); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xE2); + LCD_WR_DATA(0x05); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x0B); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x1B); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x34); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x44); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x4F); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x61); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x79); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x88); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x97); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xA6); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xB7); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xC2); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xC7); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xD1); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xD6); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xDD); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xF3); + LCD_WR_DATA(0x00); + LCD_WR_REG(0xE3); + LCD_WR_DATA(0x05); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xA); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x1C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x33); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x44); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x50); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x62); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x78); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x88); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x97); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xA6); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xB7); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xC2); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xC7); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xD1); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xD5); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xDD); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xF3); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xE4); + LCD_WR_DATA(0x01); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x01); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x02); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x2A); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x3C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x4B); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x5D); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x74); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x84); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x93); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xA2); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xB3); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xBE); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xC4); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xCD); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xD3); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xDD); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xF3); + LCD_WR_DATA(0x00); + LCD_WR_REG(0xE5); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x02); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x29); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x3C); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x4B); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x5D); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x74); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x84); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x93); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xA2); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xB3); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xBE); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xC4); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xCD); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xD3); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xDC); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xF3); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xE6); + LCD_WR_DATA(0x11); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x34); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x56); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x76); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x77); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x66); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x88); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x99); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xBB); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x99); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x66); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x55); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x55); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x45); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x43); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x44); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xE7); + LCD_WR_DATA(0x32); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x55); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x76); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x66); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x67); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x67); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x87); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x99); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xBB); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x99); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x77); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x44); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x56); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x23); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x33); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x45); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xE8); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x99); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x87); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x88); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x77); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x66); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x88); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xAA); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0xBB); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x99); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x66); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x55); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x55); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x44); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x44); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x55); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xE9); + LCD_WR_DATA(0xAA); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0x00); + LCD_WR_DATA(0xAA); + + LCD_WR_REG(0xCF); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xF0); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x50); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xF3); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xF9); + LCD_WR_DATA(0x06); + LCD_WR_DATA(0x10); + LCD_WR_DATA(0x29); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0x3A); + LCD_WR_DATA(0x55); //66 + + LCD_WR_REG(0x11); + rt_thread_mdelay(100); + LCD_WR_REG(0x29); + LCD_WR_REG(0x35); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0x51); + LCD_WR_DATA(0xFF); + LCD_WR_REG(0x53); + LCD_WR_DATA(0x2C); + LCD_WR_REG(0x55); + LCD_WR_DATA(0x82); + LCD_WR_REG(0x2c); + } + else if (lcddev.id == 0x5510) + { + LCD_WriteReg(0xF000, 0x55); + LCD_WriteReg(0xF001, 0xAA); + LCD_WriteReg(0xF002, 0x52); + LCD_WriteReg(0xF003, 0x08); + LCD_WriteReg(0xF004, 0x01); + //AVDD Set AVDD 5.2V + LCD_WriteReg(0xB000, 0x0D); + LCD_WriteReg(0xB001, 0x0D); + LCD_WriteReg(0xB002, 0x0D); + //AVDD ratio + LCD_WriteReg(0xB600, 0x34); + LCD_WriteReg(0xB601, 0x34); + LCD_WriteReg(0xB602, 0x34); + //AVEE -5.2V + LCD_WriteReg(0xB100, 0x0D); + LCD_WriteReg(0xB101, 0x0D); + LCD_WriteReg(0xB102, 0x0D); + //AVEE ratio + LCD_WriteReg(0xB700, 0x34); + LCD_WriteReg(0xB701, 0x34); + LCD_WriteReg(0xB702, 0x34); + //VCL -2.5V + LCD_WriteReg(0xB200, 0x00); + LCD_WriteReg(0xB201, 0x00); + LCD_WriteReg(0xB202, 0x00); + //VCL ratio + LCD_WriteReg(0xB800, 0x24); + LCD_WriteReg(0xB801, 0x24); + LCD_WriteReg(0xB802, 0x24); + //VGH 15V (Free pump) + LCD_WriteReg(0xBF00, 0x01); + LCD_WriteReg(0xB300, 0x0F); + LCD_WriteReg(0xB301, 0x0F); + LCD_WriteReg(0xB302, 0x0F); + //VGH ratio + LCD_WriteReg(0xB900, 0x34); + LCD_WriteReg(0xB901, 0x34); + LCD_WriteReg(0xB902, 0x34); + //VGL_REG -10V + LCD_WriteReg(0xB500, 0x08); + LCD_WriteReg(0xB501, 0x08); + LCD_WriteReg(0xB502, 0x08); + LCD_WriteReg(0xC200, 0x03); + //VGLX ratio + LCD_WriteReg(0xBA00, 0x24); + LCD_WriteReg(0xBA01, 0x24); + LCD_WriteReg(0xBA02, 0x24); + //VGMP/VGSP 4.5V/0V + LCD_WriteReg(0xBC00, 0x00); + LCD_WriteReg(0xBC01, 0x78); + LCD_WriteReg(0xBC02, 0x00); + //VGMN/VGSN -4.5V/0V + LCD_WriteReg(0xBD00, 0x00); + LCD_WriteReg(0xBD01, 0x78); + LCD_WriteReg(0xBD02, 0x00); + //VCOM + LCD_WriteReg(0xBE00, 0x00); + LCD_WriteReg(0xBE01, 0x64); + //Gamma Setting + LCD_WriteReg(0xD100, 0x00); + LCD_WriteReg(0xD101, 0x33); + LCD_WriteReg(0xD102, 0x00); + LCD_WriteReg(0xD103, 0x34); + LCD_WriteReg(0xD104, 0x00); + LCD_WriteReg(0xD105, 0x3A); + LCD_WriteReg(0xD106, 0x00); + LCD_WriteReg(0xD107, 0x4A); + LCD_WriteReg(0xD108, 0x00); + LCD_WriteReg(0xD109, 0x5C); + LCD_WriteReg(0xD10A, 0x00); + LCD_WriteReg(0xD10B, 0x81); + LCD_WriteReg(0xD10C, 0x00); + LCD_WriteReg(0xD10D, 0xA6); + LCD_WriteReg(0xD10E, 0x00); + LCD_WriteReg(0xD10F, 0xE5); + LCD_WriteReg(0xD110, 0x01); + LCD_WriteReg(0xD111, 0x13); + LCD_WriteReg(0xD112, 0x01); + LCD_WriteReg(0xD113, 0x54); + LCD_WriteReg(0xD114, 0x01); + LCD_WriteReg(0xD115, 0x82); + LCD_WriteReg(0xD116, 0x01); + LCD_WriteReg(0xD117, 0xCA); + LCD_WriteReg(0xD118, 0x02); + LCD_WriteReg(0xD119, 0x00); + LCD_WriteReg(0xD11A, 0x02); + LCD_WriteReg(0xD11B, 0x01); + LCD_WriteReg(0xD11C, 0x02); + LCD_WriteReg(0xD11D, 0x34); + LCD_WriteReg(0xD11E, 0x02); + LCD_WriteReg(0xD11F, 0x67); + LCD_WriteReg(0xD120, 0x02); + LCD_WriteReg(0xD121, 0x84); + LCD_WriteReg(0xD122, 0x02); + LCD_WriteReg(0xD123, 0xA4); + LCD_WriteReg(0xD124, 0x02); + LCD_WriteReg(0xD125, 0xB7); + LCD_WriteReg(0xD126, 0x02); + LCD_WriteReg(0xD127, 0xCF); + LCD_WriteReg(0xD128, 0x02); + LCD_WriteReg(0xD129, 0xDE); + LCD_WriteReg(0xD12A, 0x02); + LCD_WriteReg(0xD12B, 0xF2); + LCD_WriteReg(0xD12C, 0x02); + LCD_WriteReg(0xD12D, 0xFE); + LCD_WriteReg(0xD12E, 0x03); + LCD_WriteReg(0xD12F, 0x10); + LCD_WriteReg(0xD130, 0x03); + LCD_WriteReg(0xD131, 0x33); + LCD_WriteReg(0xD132, 0x03); + LCD_WriteReg(0xD133, 0x6D); + LCD_WriteReg(0xD200, 0x00); + LCD_WriteReg(0xD201, 0x33); + LCD_WriteReg(0xD202, 0x00); + LCD_WriteReg(0xD203, 0x34); + LCD_WriteReg(0xD204, 0x00); + LCD_WriteReg(0xD205, 0x3A); + LCD_WriteReg(0xD206, 0x00); + LCD_WriteReg(0xD207, 0x4A); + LCD_WriteReg(0xD208, 0x00); + LCD_WriteReg(0xD209, 0x5C); + LCD_WriteReg(0xD20A, 0x00); + + LCD_WriteReg(0xD20B, 0x81); + LCD_WriteReg(0xD20C, 0x00); + LCD_WriteReg(0xD20D, 0xA6); + LCD_WriteReg(0xD20E, 0x00); + LCD_WriteReg(0xD20F, 0xE5); + LCD_WriteReg(0xD210, 0x01); + LCD_WriteReg(0xD211, 0x13); + LCD_WriteReg(0xD212, 0x01); + LCD_WriteReg(0xD213, 0x54); + LCD_WriteReg(0xD214, 0x01); + LCD_WriteReg(0xD215, 0x82); + LCD_WriteReg(0xD216, 0x01); + LCD_WriteReg(0xD217, 0xCA); + LCD_WriteReg(0xD218, 0x02); + LCD_WriteReg(0xD219, 0x00); + LCD_WriteReg(0xD21A, 0x02); + LCD_WriteReg(0xD21B, 0x01); + LCD_WriteReg(0xD21C, 0x02); + LCD_WriteReg(0xD21D, 0x34); + LCD_WriteReg(0xD21E, 0x02); + LCD_WriteReg(0xD21F, 0x67); + LCD_WriteReg(0xD220, 0x02); + LCD_WriteReg(0xD221, 0x84); + LCD_WriteReg(0xD222, 0x02); + LCD_WriteReg(0xD223, 0xA4); + LCD_WriteReg(0xD224, 0x02); + LCD_WriteReg(0xD225, 0xB7); + LCD_WriteReg(0xD226, 0x02); + LCD_WriteReg(0xD227, 0xCF); + LCD_WriteReg(0xD228, 0x02); + LCD_WriteReg(0xD229, 0xDE); + LCD_WriteReg(0xD22A, 0x02); + LCD_WriteReg(0xD22B, 0xF2); + LCD_WriteReg(0xD22C, 0x02); + LCD_WriteReg(0xD22D, 0xFE); + LCD_WriteReg(0xD22E, 0x03); + LCD_WriteReg(0xD22F, 0x10); + LCD_WriteReg(0xD230, 0x03); + LCD_WriteReg(0xD231, 0x33); + LCD_WriteReg(0xD232, 0x03); + LCD_WriteReg(0xD233, 0x6D); + LCD_WriteReg(0xD300, 0x00); + LCD_WriteReg(0xD301, 0x33); + LCD_WriteReg(0xD302, 0x00); + LCD_WriteReg(0xD303, 0x34); + LCD_WriteReg(0xD304, 0x00); + LCD_WriteReg(0xD305, 0x3A); + LCD_WriteReg(0xD306, 0x00); + LCD_WriteReg(0xD307, 0x4A); + LCD_WriteReg(0xD308, 0x00); + LCD_WriteReg(0xD309, 0x5C); + LCD_WriteReg(0xD30A, 0x00); + + LCD_WriteReg(0xD30B, 0x81); + LCD_WriteReg(0xD30C, 0x00); + LCD_WriteReg(0xD30D, 0xA6); + LCD_WriteReg(0xD30E, 0x00); + LCD_WriteReg(0xD30F, 0xE5); + LCD_WriteReg(0xD310, 0x01); + LCD_WriteReg(0xD311, 0x13); + LCD_WriteReg(0xD312, 0x01); + LCD_WriteReg(0xD313, 0x54); + LCD_WriteReg(0xD314, 0x01); + LCD_WriteReg(0xD315, 0x82); + LCD_WriteReg(0xD316, 0x01); + LCD_WriteReg(0xD317, 0xCA); + LCD_WriteReg(0xD318, 0x02); + LCD_WriteReg(0xD319, 0x00); + LCD_WriteReg(0xD31A, 0x02); + LCD_WriteReg(0xD31B, 0x01); + LCD_WriteReg(0xD31C, 0x02); + LCD_WriteReg(0xD31D, 0x34); + LCD_WriteReg(0xD31E, 0x02); + LCD_WriteReg(0xD31F, 0x67); + LCD_WriteReg(0xD320, 0x02); + LCD_WriteReg(0xD321, 0x84); + LCD_WriteReg(0xD322, 0x02); + LCD_WriteReg(0xD323, 0xA4); + LCD_WriteReg(0xD324, 0x02); + LCD_WriteReg(0xD325, 0xB7); + LCD_WriteReg(0xD326, 0x02); + LCD_WriteReg(0xD327, 0xCF); + LCD_WriteReg(0xD328, 0x02); + LCD_WriteReg(0xD329, 0xDE); + LCD_WriteReg(0xD32A, 0x02); + LCD_WriteReg(0xD32B, 0xF2); + LCD_WriteReg(0xD32C, 0x02); + LCD_WriteReg(0xD32D, 0xFE); + LCD_WriteReg(0xD32E, 0x03); + LCD_WriteReg(0xD32F, 0x10); + LCD_WriteReg(0xD330, 0x03); + LCD_WriteReg(0xD331, 0x33); + LCD_WriteReg(0xD332, 0x03); + LCD_WriteReg(0xD333, 0x6D); + LCD_WriteReg(0xD400, 0x00); + LCD_WriteReg(0xD401, 0x33); + LCD_WriteReg(0xD402, 0x00); + LCD_WriteReg(0xD403, 0x34); + LCD_WriteReg(0xD404, 0x00); + LCD_WriteReg(0xD405, 0x3A); + LCD_WriteReg(0xD406, 0x00); + LCD_WriteReg(0xD407, 0x4A); + LCD_WriteReg(0xD408, 0x00); + LCD_WriteReg(0xD409, 0x5C); + LCD_WriteReg(0xD40A, 0x00); + LCD_WriteReg(0xD40B, 0x81); + + LCD_WriteReg(0xD40C, 0x00); + LCD_WriteReg(0xD40D, 0xA6); + LCD_WriteReg(0xD40E, 0x00); + LCD_WriteReg(0xD40F, 0xE5); + LCD_WriteReg(0xD410, 0x01); + LCD_WriteReg(0xD411, 0x13); + LCD_WriteReg(0xD412, 0x01); + LCD_WriteReg(0xD413, 0x54); + LCD_WriteReg(0xD414, 0x01); + LCD_WriteReg(0xD415, 0x82); + LCD_WriteReg(0xD416, 0x01); + LCD_WriteReg(0xD417, 0xCA); + LCD_WriteReg(0xD418, 0x02); + LCD_WriteReg(0xD419, 0x00); + LCD_WriteReg(0xD41A, 0x02); + LCD_WriteReg(0xD41B, 0x01); + LCD_WriteReg(0xD41C, 0x02); + LCD_WriteReg(0xD41D, 0x34); + LCD_WriteReg(0xD41E, 0x02); + LCD_WriteReg(0xD41F, 0x67); + LCD_WriteReg(0xD420, 0x02); + LCD_WriteReg(0xD421, 0x84); + LCD_WriteReg(0xD422, 0x02); + LCD_WriteReg(0xD423, 0xA4); + LCD_WriteReg(0xD424, 0x02); + LCD_WriteReg(0xD425, 0xB7); + LCD_WriteReg(0xD426, 0x02); + LCD_WriteReg(0xD427, 0xCF); + LCD_WriteReg(0xD428, 0x02); + LCD_WriteReg(0xD429, 0xDE); + LCD_WriteReg(0xD42A, 0x02); + LCD_WriteReg(0xD42B, 0xF2); + LCD_WriteReg(0xD42C, 0x02); + LCD_WriteReg(0xD42D, 0xFE); + LCD_WriteReg(0xD42E, 0x03); + LCD_WriteReg(0xD42F, 0x10); + LCD_WriteReg(0xD430, 0x03); + LCD_WriteReg(0xD431, 0x33); + LCD_WriteReg(0xD432, 0x03); + LCD_WriteReg(0xD433, 0x6D); + LCD_WriteReg(0xD500, 0x00); + LCD_WriteReg(0xD501, 0x33); + LCD_WriteReg(0xD502, 0x00); + LCD_WriteReg(0xD503, 0x34); + LCD_WriteReg(0xD504, 0x00); + LCD_WriteReg(0xD505, 0x3A); + LCD_WriteReg(0xD506, 0x00); + LCD_WriteReg(0xD507, 0x4A); + LCD_WriteReg(0xD508, 0x00); + LCD_WriteReg(0xD509, 0x5C); + LCD_WriteReg(0xD50A, 0x00); + LCD_WriteReg(0xD50B, 0x81); + + LCD_WriteReg(0xD50C, 0x00); + LCD_WriteReg(0xD50D, 0xA6); + LCD_WriteReg(0xD50E, 0x00); + LCD_WriteReg(0xD50F, 0xE5); + LCD_WriteReg(0xD510, 0x01); + LCD_WriteReg(0xD511, 0x13); + LCD_WriteReg(0xD512, 0x01); + LCD_WriteReg(0xD513, 0x54); + LCD_WriteReg(0xD514, 0x01); + LCD_WriteReg(0xD515, 0x82); + LCD_WriteReg(0xD516, 0x01); + LCD_WriteReg(0xD517, 0xCA); + LCD_WriteReg(0xD518, 0x02); + LCD_WriteReg(0xD519, 0x00); + LCD_WriteReg(0xD51A, 0x02); + LCD_WriteReg(0xD51B, 0x01); + LCD_WriteReg(0xD51C, 0x02); + LCD_WriteReg(0xD51D, 0x34); + LCD_WriteReg(0xD51E, 0x02); + LCD_WriteReg(0xD51F, 0x67); + LCD_WriteReg(0xD520, 0x02); + LCD_WriteReg(0xD521, 0x84); + LCD_WriteReg(0xD522, 0x02); + LCD_WriteReg(0xD523, 0xA4); + LCD_WriteReg(0xD524, 0x02); + LCD_WriteReg(0xD525, 0xB7); + LCD_WriteReg(0xD526, 0x02); + LCD_WriteReg(0xD527, 0xCF); + LCD_WriteReg(0xD528, 0x02); + LCD_WriteReg(0xD529, 0xDE); + LCD_WriteReg(0xD52A, 0x02); + LCD_WriteReg(0xD52B, 0xF2); + LCD_WriteReg(0xD52C, 0x02); + LCD_WriteReg(0xD52D, 0xFE); + LCD_WriteReg(0xD52E, 0x03); + LCD_WriteReg(0xD52F, 0x10); + LCD_WriteReg(0xD530, 0x03); + LCD_WriteReg(0xD531, 0x33); + LCD_WriteReg(0xD532, 0x03); + LCD_WriteReg(0xD533, 0x6D); + LCD_WriteReg(0xD600, 0x00); + LCD_WriteReg(0xD601, 0x33); + LCD_WriteReg(0xD602, 0x00); + LCD_WriteReg(0xD603, 0x34); + LCD_WriteReg(0xD604, 0x00); + LCD_WriteReg(0xD605, 0x3A); + LCD_WriteReg(0xD606, 0x00); + LCD_WriteReg(0xD607, 0x4A); + LCD_WriteReg(0xD608, 0x00); + LCD_WriteReg(0xD609, 0x5C); + LCD_WriteReg(0xD60A, 0x00); + LCD_WriteReg(0xD60B, 0x81); + + LCD_WriteReg(0xD60C, 0x00); + LCD_WriteReg(0xD60D, 0xA6); + LCD_WriteReg(0xD60E, 0x00); + LCD_WriteReg(0xD60F, 0xE5); + LCD_WriteReg(0xD610, 0x01); + LCD_WriteReg(0xD611, 0x13); + LCD_WriteReg(0xD612, 0x01); + LCD_WriteReg(0xD613, 0x54); + LCD_WriteReg(0xD614, 0x01); + LCD_WriteReg(0xD615, 0x82); + LCD_WriteReg(0xD616, 0x01); + LCD_WriteReg(0xD617, 0xCA); + LCD_WriteReg(0xD618, 0x02); + LCD_WriteReg(0xD619, 0x00); + LCD_WriteReg(0xD61A, 0x02); + LCD_WriteReg(0xD61B, 0x01); + LCD_WriteReg(0xD61C, 0x02); + LCD_WriteReg(0xD61D, 0x34); + LCD_WriteReg(0xD61E, 0x02); + LCD_WriteReg(0xD61F, 0x67); + LCD_WriteReg(0xD620, 0x02); + LCD_WriteReg(0xD621, 0x84); + LCD_WriteReg(0xD622, 0x02); + LCD_WriteReg(0xD623, 0xA4); + LCD_WriteReg(0xD624, 0x02); + LCD_WriteReg(0xD625, 0xB7); + LCD_WriteReg(0xD626, 0x02); + LCD_WriteReg(0xD627, 0xCF); + LCD_WriteReg(0xD628, 0x02); + LCD_WriteReg(0xD629, 0xDE); + LCD_WriteReg(0xD62A, 0x02); + LCD_WriteReg(0xD62B, 0xF2); + LCD_WriteReg(0xD62C, 0x02); + LCD_WriteReg(0xD62D, 0xFE); + LCD_WriteReg(0xD62E, 0x03); + LCD_WriteReg(0xD62F, 0x10); + LCD_WriteReg(0xD630, 0x03); + LCD_WriteReg(0xD631, 0x33); + LCD_WriteReg(0xD632, 0x03); + LCD_WriteReg(0xD633, 0x6D); + //LV2 Page 0 enable + LCD_WriteReg(0xF000, 0x55); + LCD_WriteReg(0xF001, 0xAA); + LCD_WriteReg(0xF002, 0x52); + LCD_WriteReg(0xF003, 0x08); + LCD_WriteReg(0xF004, 0x00); + //Display control + LCD_WriteReg(0xB100, 0xCC); + LCD_WriteReg(0xB101, 0x00); + //Source hold time + LCD_WriteReg(0xB600, 0x05); + //Gate EQ control + LCD_WriteReg(0xB700, 0x70); + LCD_WriteReg(0xB701, 0x70); + //Source EQ control (Mode 2) + LCD_WriteReg(0xB800, 0x01); + LCD_WriteReg(0xB801, 0x03); + LCD_WriteReg(0xB802, 0x03); + LCD_WriteReg(0xB803, 0x03); + //Inversion mode (2-dot) + LCD_WriteReg(0xBC00, 0x02); + LCD_WriteReg(0xBC01, 0x00); + LCD_WriteReg(0xBC02, 0x00); + //Timing control 4H w/ 4-delay + LCD_WriteReg(0xC900, 0xD0); + LCD_WriteReg(0xC901, 0x02); + LCD_WriteReg(0xC902, 0x50); + LCD_WriteReg(0xC903, 0x50); + LCD_WriteReg(0xC904, 0x50); + LCD_WriteReg(0x3500, 0x00); + LCD_WriteReg(0x3A00, 0x55); //16-bit/pixel + LCD_WR_REG(0x1100); + rt_thread_mdelay(1); + LCD_WR_REG(0x2900); + } + else if (lcddev.id == 0X1963) + { + LCD_WR_REG(0xE2); //Set PLL with OSC = 10MHz (hardware), Multiplier N = 35, 250MHz < VCO < 800MHz = OSC*(N+1), VCO = 300MHz + LCD_WR_DATA(0x1D); //傿•°1 + LCD_WR_DATA(0x02); //傿•°2 Divider M = 2, PLL = 300/(M+1) = 100MHz + LCD_WR_DATA(0x04); //傿•°3 Validate M and N values + rt_thread_mdelay(1); + LCD_WR_REG(0xE0); // Start PLL command + LCD_WR_DATA(0x01); // enable PLL + rt_thread_mdelay(10); + LCD_WR_REG(0xE0); // Start PLL command again + LCD_WR_DATA(0x03); // now, use PLL output as system clock + rt_thread_mdelay(12); + LCD_WR_REG(0x01); //软å¤ä½ + rt_thread_mdelay(10); + + LCD_WR_REG(0xE6); //设置åƒç´ é¢‘率,33Mhz + LCD_WR_DATA(0x2F); + LCD_WR_DATA(0xFF); + LCD_WR_DATA(0xFF); + + LCD_WR_REG(0xB0); //设置LCDæ¨¡å¼ + LCD_WR_DATA(0x20); //24使¨¡å¼ + LCD_WR_DATA(0x00); //TFT æ¨¡å¼ + + LCD_WR_DATA((SSD_HOR_RESOLUTION - 1) >> 8); //设置LCDæ°´å¹³åƒç´  + LCD_WR_DATA(SSD_HOR_RESOLUTION - 1); + LCD_WR_DATA((SSD_VER_RESOLUTION - 1) >> 8); //设置LCD垂直åƒç´  + LCD_WR_DATA(SSD_VER_RESOLUTION - 1); + LCD_WR_DATA(0x00); //RGBåºåˆ— + + LCD_WR_REG(0xB4); //Set horizontal period + LCD_WR_DATA((SSD_HT - 1) >> 8); + LCD_WR_DATA(SSD_HT - 1); + LCD_WR_DATA(SSD_HPS >> 8); + LCD_WR_DATA(SSD_HPS); + LCD_WR_DATA(SSD_HOR_PULSE_WIDTH - 1); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + LCD_WR_REG(0xB6); //Set vertical period + LCD_WR_DATA((SSD_VT - 1) >> 8); + LCD_WR_DATA(SSD_VT - 1); + LCD_WR_DATA(SSD_VPS >> 8); + LCD_WR_DATA(SSD_VPS); + LCD_WR_DATA(SSD_VER_FRONT_PORCH - 1); + LCD_WR_DATA(0x00); + LCD_WR_DATA(0x00); + + LCD_WR_REG(0xF0); //设置SSD1963与CPU接å£ä¸º16bit + LCD_WR_DATA(0x03); //16-bit(565 format) data for 16bpp + + LCD_WR_REG(0x29); //开坿˜¾ç¤º + //设置PWM输出 背光通过å ç©ºæ¯”å¯è°ƒ + LCD_WR_REG(0xD0); //设置自动白平衡DBC + LCD_WR_DATA(0x00); //disable + + LCD_WR_REG(0xBE); //é…ç½®PWM输出 + LCD_WR_DATA(0x05); //1设置PWM频率 + LCD_WR_DATA(0xFE); //2设置PWMå ç©ºæ¯” + LCD_WR_DATA(0x01); //3设置C + LCD_WR_DATA(0x00); //4设置D + LCD_WR_DATA(0x00); //5设置E + LCD_WR_DATA(0x00); //6设置F + + LCD_WR_REG(0xB8); //设置GPIOé…ç½® + LCD_WR_DATA(0x03); //2个IOå£è®¾ç½®æˆè¾“出 + LCD_WR_DATA(0x01); //GPIO使用正常的IO功能 + LCD_WR_REG(0xBA); + LCD_WR_DATA(0X01); //GPIO[1:0]=01,控制LCDæ–¹å‘ + + LCD_SSD_BackLightSet(100); //背光设置为最亮 + } + //åˆå§‹åŒ–完æˆä»¥åŽ,æé€Ÿ + if (lcddev.id == 0X9341 || lcddev.id == 0X5310 || lcddev.id == 0X5510 || lcddev.id == 0X1963) //如果是这几个IC,则设置WRæ—¶åºä¸ºæœ€å¿« + { + //釿–°é…ç½®å†™æ—¶åºæŽ§åˆ¶å¯„å­˜å™¨çš„æ—¶åº + FSMC_Bank1E->BWTR[6] &= ~(0XF << 0); //地å€å»ºç«‹æ—¶é—´(ADDSET)清零 + FSMC_Bank1E->BWTR[6] &= ~(0XF << 8); //æ•°æ®ä¿å­˜æ—¶é—´æ¸…é›¶ + FSMC_Bank1E->BWTR[6] |= 3 << 0; //地å€å»ºç«‹æ—¶é—´(ADDSET)为3个HCLK =18ns + FSMC_Bank1E->BWTR[6] |= 2 << 8; //æ•°æ®ä¿å­˜æ—¶é—´(DATAST)为6ns*3个HCLK=18ns + } + LCD_Display_Dir(0); //é»˜è®¤ä¸ºç«–å± + rt_pin_write(LCD_BL, PIN_HIGH); + lcddev.pointcolor = RED; + lcddev.backcolor = WHITE; + LCD_Clear(0xffff); + return RT_EOK; +} + +struct rt_device_graphic_ops fsmc_lcd_ops = + { + LCD_Fast_DrawPoint, + LCD_ReadPoint, + LCD_HLine, + RT_NULL, + LCD_BlitLine, +}; + +static rt_err_t drv_lcd_control(struct rt_device *device, int cmd, void *args) +{ + struct drv_lcd_device *lcd = LCD_DEVICE(device); + switch (cmd) + { + case RTGRAPHIC_CTRL_GET_INFO: + { + struct rt_device_graphic_info *info = (struct rt_device_graphic_info *)args; + + RT_ASSERT(info != RT_NULL); + + //this needs to be replaced by the customer + info->pixel_format = lcd->lcd_info.pixel_format; + info->bits_per_pixel = lcd->lcd_info.bits_per_pixel; + info->width = lcddev.width; + info->height = lcddev.height; + } + break; + } + + return RT_EOK; +} + +#ifdef RT_USING_DEVICE_OPS +const static struct rt_device_ops lcd_ops = + { + drv_lcd_init, + RT_NULL, + RT_NULL, + RT_NULL, + RT_NULL, + drv_lcd_control}; +#endif + +int drv_lcd_hw_init(void) +{ + rt_device_t lcd = RT_NULL; + rt_err_t result = RT_EOK; + struct rt_device *device = &_lcd.parent; + /* memset _lcd to zero */ + memset(&_lcd, 0x00, sizeof(_lcd)); + + _lcd.lcd_info.bits_per_pixel = 16; + _lcd.lcd_info.pixel_format = RTGRAPHIC_PIXEL_FORMAT_RGB565; + + device->type = RT_Device_Class_Graphic; +#ifdef RT_USING_DEVICE_OPS + device->ops = &lcd_ops; +#else + device->init = drv_lcd_init; + device->control = drv_lcd_control; +#endif + device->user_data = &fsmc_lcd_ops; + /* register lcd device */ + rt_device_register(device, "lcd", RT_DEVICE_FLAG_RDWR | RT_DEVICE_FLAG_STANDALONE); + lcd = rt_device_find("lcd"); + result = rt_device_init(lcd); + if(RT_EOK == result) + { + LOG_D("lcd init success \n\r"); + } + else + { + LOG_E("lcd init fail ! \n\r"); + } + return result; +} +INIT_DEVICE_EXPORT(drv_lcd_hw_init); + +#ifdef BSP_USING_MCU_LCD_TEST +void lcd_auto_fill(void *para) +{ + int num = (int)para; + do + { + LCD_Clear(rt_tick_get()); + rt_thread_mdelay(500); + }while(--num); +} + +#include /* atoi */ +void lcd_fill(int argc, void **argv) +{ + if(argc == 1) + { + lcd_auto_fill((void *)1); + } + else if(argc == 3) + { + if(rt_strcmp(argv[1], "-t")==0) + { + rt_thread_t tid = RT_NULL; + tid = rt_thread_create("lcd_fill", lcd_auto_fill, (void *)atoi(argv[2]), 512, 23,10); + rt_thread_startup(tid); + } + } +} +MSH_CMD_EXPORT(lcd_fill, lcd fill test for mcu lcd); + + + + //Show_Str(30,280,200,25," ",16,0); + //Show_Str(30,280,200,25,detection_label[top_ind],16,0); +void lcd_showstring_test(int argc, void **argv) +{ + char tmp[50] = "hello aiit board stm32"; + + if(argc == 1) + { + lcd_show_string(30,260,240,16,16,tmp,RED); + return; + } + memset(tmp,0,50); + memcpy(tmp,argv[1],50); + lcd_show_string(30,250,240,16,16,tmp,RED); +} +MSH_CMD_EXPORT(lcd_showstring_test, lcd show string test); +#endif diff --git a/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/drv_lcd.h b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/drv_lcd.h new file mode 100644 index 000000000..bc42e6a98 --- /dev/null +++ b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/drv_lcd.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2006-2021, RT-Thread Development Team + * + * SPDX-License-Identifier: Apache-2.0 + * + * Change Logs: + * Date Author Notes + * 2021-12-28 unknow copy by STemwin + */ +#ifndef __DRV_LCD_H +#define __DRV_LCD_H +#include +#include "rtdevice.h" +#include +#define LCD_W 800 +#define LCD_H 480 + + +//LCDé‡è¦å‚数集 +typedef struct +{ + uint16_t width; //LCD 宽度 + uint16_t height; //LCD 高度 + uint16_t id; //LCD ID + uint8_t dir; //横å±è¿˜æ˜¯ç«–å±æŽ§åˆ¶ï¼š0,竖å±ï¼›1,横å±ã€‚ + uint16_t wramcmd; //开始写gram指令 + uint16_t setxcmd; //设置xåæ ‡æŒ‡ä»¤ + uint16_t setycmd; //设置yåæ ‡æŒ‡ä»¤ + uint16_t pointcolor; // 画笔颜色 + uint16_t backcolor; //背景色 +}_lcd_dev; + +//LCD傿•° +extern _lcd_dev lcddev; //管ç†LCDé‡è¦å‚æ•° + +typedef struct +{ + __IO uint16_t REG; + __IO uint16_t RAM; +}LCD_CONTROLLER_TypeDef; + +//æ‰«ææ–¹å‘定义 +#define L2R_U2D 0 //从左到å³,从上到下 +#define L2R_D2U 1 //从左到å³,从下到上 +#define R2L_U2D 2 //从å³åˆ°å·¦,从上到下 +#define R2L_D2U 3 //从å³åˆ°å·¦,从下到上 + +#define U2D_L2R 4 //从上到下,ä»Žå·¦åˆ°å³ +#define U2D_R2L 5 //从上到下,从å³åˆ°å·¦ +#define D2U_L2R 6 //从下到上,ä»Žå·¦åˆ°å³ +#define D2U_R2L 7 //从下到上,从å³åˆ°å·¦ + +#define DFT_SCAN_DIR L2R_U2D //é»˜è®¤çš„æ‰«ææ–¹å‘ + +//LCD分辨率设置 +#define SSD_HOR_RESOLUTION 800 //LCD水平分辨率 +#define SSD_VER_RESOLUTION 480 //LCD垂直分辨率 +//LCDé©±åŠ¨å‚æ•°è®¾ç½® +#define SSD_HOR_PULSE_WIDTH 1 //水平脉宽 +#define SSD_HOR_BACK_PORCH 46 //æ°´å¹³å‰å»Š +#define SSD_HOR_FRONT_PORCH 210 //æ°´å¹³åŽå»Š + +#define SSD_VER_PULSE_WIDTH 1 //垂直脉宽 +#define SSD_VER_BACK_PORCH 23 //垂直å‰å»Š +#define SSD_VER_FRONT_PORCH 22 //垂直å‰å»Š +//å¦‚ä¸‹å‡ ä¸ªå‚æ•°ï¼Œè‡ªåŠ¨è®¡ç®— +#define SSD_HT (SSD_HOR_RESOLUTION+SSD_HOR_BACK_PORCH+SSD_HOR_FRONT_PORCH) +#define SSD_HPS (SSD_HOR_BACK_PORCH) +#define SSD_VT (SSD_VER_RESOLUTION+SSD_VER_BACK_PORCH+SSD_VER_FRONT_PORCH) +#define SSD_VPS (SSD_VER_BACK_PORCH) + + +//画笔颜色 +#define WHITE 0xFFFF +#define BLACK 0x0000 +#define BLUE 0x001F +#define BRED 0XF81F +#define GRED 0XFFE0 +#define GBLUE 0X07FF +#define RED 0xF800 +#define MAGENTA 0xF81F +#define GREEN 0x07E0 +#define CYAN 0x7FFF +#define YELLOW 0xFFE0 +#define BROWN 0XBC40 //棕色 +#define BRRED 0XFC07 //棕红色 +#define GRAY 0X8430 //ç°è‰² +//GUI颜色 + +#define DARKBLUE 0X01CF //æ·±è“色 +#define LIGHTBLUE 0X7D7C //æµ…è“色 +#define GRAYBLUE 0X5458 //ç°è“色 +//以上三色为PANEL的颜色 + +#define LIGHTGREEN 0X841F //浅绿色 +//#define LIGHTGRAY 0XEF5B //æµ…ç°è‰²(PANNEL) +#define LGRAY 0XC618 //æµ…ç°è‰²(PANNEL),窗体背景色 + +#define LGRAYBLUE 0XA651 //æµ…ç°è“色(中间层颜色) +#define LBBLUE 0X2B12 //浅棕è“色(选择æ¡ç›®çš„å色) + +void lcd_fill_array(rt_uint16_t x_start, rt_uint16_t y_start, rt_uint16_t x_end, rt_uint16_t y_end, void *pcolor); + +#endif diff --git a/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/drv_sram.c b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/drv_sram.c index cd5e8b761..723a56b2c 100644 --- a/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/drv_sram.c +++ b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/drv_sram.c @@ -161,7 +161,7 @@ MSH_CMD_EXPORT(sram_test, sram test); static void sram_test2(void) { char *p =NULL; - p = rt_malloc(1024*1000); + p = rt_malloc(1024*800); if(p == NULL) { LOG_E("apply for 1MB memory fail ~!!!"); diff --git a/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/font.h b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/font.h new file mode 100644 index 000000000..1f82ced02 --- /dev/null +++ b/Ubiquitous/RT_Thread/aiit_board/stm32f407_core/board/ports/font.h @@ -0,0 +1,403 @@ +#ifndef __FONT_H +#define __FONT_H +//³£ÓÃASCII±í +//Æ«ÒÆÁ¿32 +//ASCII×Ö·û¼¯: !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +//PC2LCD2002ȡģ·½Ê½ÉèÖãºÒõÂë+ÖðÁÐʽ+˳Ïò+C51¸ñʽ +//×ܹ²£º3¸ö×Ö·û¼¯£¨12*12¡¢16*16¡¢24*24ºÍ32*32£©£¬Óû§¿ÉÒÔ×ÔÐÐÐÂÔöÆäËû·Ö±æÂʵÄ×Ö·û¼¯¡£ +//ÿ¸ö×Ö·ûËùÕ¼ÓõÄ×Ö½ÚÊýΪ:(size/8+((size%8)?1:0))*(size/2),ÆäÖÐsize:ÊÇ×Ö¿âÉú³ÉʱµÄµãÕó´óС(12/16/24/32...) + +//12*12 ASCII×Ö·û¼¯µãÕó +const unsigned char asc2_1206[95][12]={ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*" ",0*/ +{0x00,0x00,0x00,0x00,0x3F,0x40,0x00,0x00,0x00,0x00,0x00,0x00},/*"!",1*/ +{0x00,0x00,0x30,0x00,0x40,0x00,0x30,0x00,0x40,0x00,0x00,0x00},/*""",2*/ +{0x09,0x00,0x0B,0xC0,0x3D,0x00,0x0B,0xC0,0x3D,0x00,0x09,0x00},/*"#",3*/ +{0x18,0xC0,0x24,0x40,0x7F,0xE0,0x22,0x40,0x31,0x80,0x00,0x00},/*"$",4*/ +{0x18,0x00,0x24,0xC0,0x1B,0x00,0x0D,0x80,0x32,0x40,0x01,0x80},/*"%",5*/ +{0x03,0x80,0x1C,0x40,0x27,0x40,0x1C,0x80,0x07,0x40,0x00,0x40},/*"&",6*/ +{0x10,0x00,0x60,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"'",7*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x1F,0x80,0x20,0x40,0x40,0x20},/*"(",8*/ +{0x00,0x00,0x40,0x20,0x20,0x40,0x1F,0x80,0x00,0x00,0x00,0x00},/*")",9*/ +{0x09,0x00,0x06,0x00,0x1F,0x80,0x06,0x00,0x09,0x00,0x00,0x00},/*"*",10*/ +{0x04,0x00,0x04,0x00,0x3F,0x80,0x04,0x00,0x04,0x00,0x00,0x00},/*"+",11*/ +{0x00,0x10,0x00,0x60,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*",",12*/ +{0x04,0x00,0x04,0x00,0x04,0x00,0x04,0x00,0x04,0x00,0x00,0x00},/*"-",13*/ +{0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*".",14*/ +{0x00,0x20,0x01,0xC0,0x06,0x00,0x38,0x00,0x40,0x00,0x00,0x00},/*"/",15*/ +{0x1F,0x80,0x20,0x40,0x20,0x40,0x20,0x40,0x1F,0x80,0x00,0x00},/*"0",16*/ +{0x00,0x00,0x10,0x40,0x3F,0xC0,0x00,0x40,0x00,0x00,0x00,0x00},/*"1",17*/ +{0x18,0xC0,0x21,0x40,0x22,0x40,0x24,0x40,0x18,0x40,0x00,0x00},/*"2",18*/ +{0x10,0x80,0x20,0x40,0x24,0x40,0x24,0x40,0x1B,0x80,0x00,0x00},/*"3",19*/ +{0x02,0x00,0x0D,0x00,0x11,0x00,0x3F,0xC0,0x01,0x40,0x00,0x00},/*"4",20*/ +{0x3C,0x80,0x24,0x40,0x24,0x40,0x24,0x40,0x23,0x80,0x00,0x00},/*"5",21*/ +{0x1F,0x80,0x24,0x40,0x24,0x40,0x34,0x40,0x03,0x80,0x00,0x00},/*"6",22*/ +{0x30,0x00,0x20,0x00,0x27,0xC0,0x38,0x00,0x20,0x00,0x00,0x00},/*"7",23*/ +{0x1B,0x80,0x24,0x40,0x24,0x40,0x24,0x40,0x1B,0x80,0x00,0x00},/*"8",24*/ +{0x1C,0x00,0x22,0xC0,0x22,0x40,0x22,0x40,0x1F,0x80,0x00,0x00},/*"9",25*/ +{0x00,0x00,0x00,0x00,0x08,0x40,0x00,0x00,0x00,0x00,0x00,0x00},/*":",26*/ +{0x00,0x00,0x00,0x00,0x04,0x60,0x00,0x00,0x00,0x00,0x00,0x00},/*";",27*/ +{0x00,0x00,0x04,0x00,0x0A,0x00,0x11,0x00,0x20,0x80,0x40,0x40},/*"<",28*/ +{0x09,0x00,0x09,0x00,0x09,0x00,0x09,0x00,0x09,0x00,0x00,0x00},/*"=",29*/ +{0x00,0x00,0x40,0x40,0x20,0x80,0x11,0x00,0x0A,0x00,0x04,0x00},/*">",30*/ +{0x18,0x00,0x20,0x00,0x23,0x40,0x24,0x00,0x18,0x00,0x00,0x00},/*"?",31*/ +{0x1F,0x80,0x20,0x40,0x27,0x40,0x29,0x40,0x1F,0x40,0x00,0x00},/*"@",32*/ +{0x00,0x40,0x07,0xC0,0x39,0x00,0x0F,0x00,0x01,0xC0,0x00,0x40},/*"A",33*/ +{0x20,0x40,0x3F,0xC0,0x24,0x40,0x24,0x40,0x1B,0x80,0x00,0x00},/*"B",34*/ +{0x1F,0x80,0x20,0x40,0x20,0x40,0x20,0x40,0x30,0x80,0x00,0x00},/*"C",35*/ +{0x20,0x40,0x3F,0xC0,0x20,0x40,0x20,0x40,0x1F,0x80,0x00,0x00},/*"D",36*/ +{0x20,0x40,0x3F,0xC0,0x24,0x40,0x2E,0x40,0x30,0xC0,0x00,0x00},/*"E",37*/ +{0x20,0x40,0x3F,0xC0,0x24,0x40,0x2E,0x00,0x30,0x00,0x00,0x00},/*"F",38*/ +{0x0F,0x00,0x10,0x80,0x20,0x40,0x22,0x40,0x33,0x80,0x02,0x00},/*"G",39*/ +{0x20,0x40,0x3F,0xC0,0x04,0x00,0x04,0x00,0x3F,0xC0,0x20,0x40},/*"H",40*/ +{0x20,0x40,0x20,0x40,0x3F,0xC0,0x20,0x40,0x20,0x40,0x00,0x00},/*"I",41*/ +{0x00,0x60,0x20,0x20,0x20,0x20,0x3F,0xC0,0x20,0x00,0x20,0x00},/*"J",42*/ +{0x20,0x40,0x3F,0xC0,0x24,0x40,0x0B,0x00,0x30,0xC0,0x20,0x40},/*"K",43*/ +{0x20,0x40,0x3F,0xC0,0x20,0x40,0x00,0x40,0x00,0x40,0x00,0xC0},/*"L",44*/ +{0x3F,0xC0,0x3C,0x00,0x03,0xC0,0x3C,0x00,0x3F,0xC0,0x00,0x00},/*"M",45*/ +{0x20,0x40,0x3F,0xC0,0x0C,0x40,0x23,0x00,0x3F,0xC0,0x20,0x00},/*"N",46*/ +{0x1F,0x80,0x20,0x40,0x20,0x40,0x20,0x40,0x1F,0x80,0x00,0x00},/*"O",47*/ +{0x20,0x40,0x3F,0xC0,0x24,0x40,0x24,0x00,0x18,0x00,0x00,0x00},/*"P",48*/ +{0x1F,0x80,0x21,0x40,0x21,0x40,0x20,0xE0,0x1F,0xA0,0x00,0x00},/*"Q",49*/ +{0x20,0x40,0x3F,0xC0,0x24,0x40,0x26,0x00,0x19,0xC0,0x00,0x40},/*"R",50*/ +{0x18,0xC0,0x24,0x40,0x24,0x40,0x22,0x40,0x31,0x80,0x00,0x00},/*"S",51*/ +{0x30,0x00,0x20,0x40,0x3F,0xC0,0x20,0x40,0x30,0x00,0x00,0x00},/*"T",52*/ +{0x20,0x00,0x3F,0x80,0x00,0x40,0x00,0x40,0x3F,0x80,0x20,0x00},/*"U",53*/ +{0x20,0x00,0x3E,0x00,0x01,0xC0,0x07,0x00,0x38,0x00,0x20,0x00},/*"V",54*/ +{0x38,0x00,0x07,0xC0,0x3C,0x00,0x07,0xC0,0x38,0x00,0x00,0x00},/*"W",55*/ +{0x20,0x40,0x39,0xC0,0x06,0x00,0x39,0xC0,0x20,0x40,0x00,0x00},/*"X",56*/ +{0x20,0x00,0x38,0x40,0x07,0xC0,0x38,0x40,0x20,0x00,0x00,0x00},/*"Y",57*/ +{0x30,0x40,0x21,0xC0,0x26,0x40,0x38,0x40,0x20,0xC0,0x00,0x00},/*"Z",58*/ +{0x00,0x00,0x00,0x00,0x7F,0xE0,0x40,0x20,0x40,0x20,0x00,0x00},/*"[",59*/ +{0x00,0x00,0x70,0x00,0x0C,0x00,0x03,0x80,0x00,0x40,0x00,0x00},/*"\",60*/ +{0x00,0x00,0x40,0x20,0x40,0x20,0x7F,0xE0,0x00,0x00,0x00,0x00},/*"]",61*/ +{0x00,0x00,0x20,0x00,0x40,0x00,0x20,0x00,0x00,0x00,0x00,0x00},/*"^",62*/ +{0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10},/*"_",63*/ +{0x00,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"`",64*/ +{0x00,0x00,0x02,0x80,0x05,0x40,0x05,0x40,0x03,0xC0,0x00,0x40},/*"a",65*/ +{0x20,0x00,0x3F,0xC0,0x04,0x40,0x04,0x40,0x03,0x80,0x00,0x00},/*"b",66*/ +{0x00,0x00,0x03,0x80,0x04,0x40,0x04,0x40,0x06,0x40,0x00,0x00},/*"c",67*/ +{0x00,0x00,0x03,0x80,0x04,0x40,0x24,0x40,0x3F,0xC0,0x00,0x40},/*"d",68*/ +{0x00,0x00,0x03,0x80,0x05,0x40,0x05,0x40,0x03,0x40,0x00,0x00},/*"e",69*/ +{0x00,0x00,0x04,0x40,0x1F,0xC0,0x24,0x40,0x24,0x40,0x20,0x00},/*"f",70*/ +{0x00,0x00,0x02,0xE0,0x05,0x50,0x05,0x50,0x06,0x50,0x04,0x20},/*"g",71*/ +{0x20,0x40,0x3F,0xC0,0x04,0x40,0x04,0x00,0x03,0xC0,0x00,0x40},/*"h",72*/ +{0x00,0x00,0x04,0x40,0x27,0xC0,0x00,0x40,0x00,0x00,0x00,0x00},/*"i",73*/ +{0x00,0x10,0x00,0x10,0x04,0x10,0x27,0xE0,0x00,0x00,0x00,0x00},/*"j",74*/ +{0x20,0x40,0x3F,0xC0,0x01,0x40,0x07,0x00,0x04,0xC0,0x04,0x40},/*"k",75*/ +{0x20,0x40,0x20,0x40,0x3F,0xC0,0x00,0x40,0x00,0x40,0x00,0x00},/*"l",76*/ +{0x07,0xC0,0x04,0x00,0x07,0xC0,0x04,0x00,0x03,0xC0,0x00,0x00},/*"m",77*/ +{0x04,0x40,0x07,0xC0,0x04,0x40,0x04,0x00,0x03,0xC0,0x00,0x40},/*"n",78*/ +{0x00,0x00,0x03,0x80,0x04,0x40,0x04,0x40,0x03,0x80,0x00,0x00},/*"o",79*/ +{0x04,0x10,0x07,0xF0,0x04,0x50,0x04,0x40,0x03,0x80,0x00,0x00},/*"p",80*/ +{0x00,0x00,0x03,0x80,0x04,0x40,0x04,0x50,0x07,0xF0,0x00,0x10},/*"q",81*/ +{0x04,0x40,0x07,0xC0,0x02,0x40,0x04,0x00,0x04,0x00,0x00,0x00},/*"r",82*/ +{0x00,0x00,0x06,0x40,0x05,0x40,0x05,0x40,0x04,0xC0,0x00,0x00},/*"s",83*/ +{0x00,0x00,0x04,0x00,0x1F,0x80,0x04,0x40,0x00,0x40,0x00,0x00},/*"t",84*/ +{0x04,0x00,0x07,0x80,0x00,0x40,0x04,0x40,0x07,0xC0,0x00,0x40},/*"u",85*/ +{0x04,0x00,0x07,0x00,0x04,0xC0,0x01,0x80,0x06,0x00,0x04,0x00},/*"v",86*/ +{0x06,0x00,0x01,0xC0,0x07,0x00,0x01,0xC0,0x06,0x00,0x00,0x00},/*"w",87*/ +{0x04,0x40,0x06,0xC0,0x01,0x00,0x06,0xC0,0x04,0x40,0x00,0x00},/*"x",88*/ +{0x04,0x10,0x07,0x10,0x04,0xE0,0x01,0x80,0x06,0x00,0x04,0x00},/*"y",89*/ +{0x00,0x00,0x04,0x40,0x05,0xC0,0x06,0x40,0x04,0x40,0x00,0x00},/*"z",90*/ +{0x00,0x00,0x00,0x00,0x04,0x00,0x7B,0xE0,0x40,0x20,0x00,0x00},/*"{",91*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xF0,0x00,0x00,0x00,0x00},/*"|",92*/ +{0x00,0x00,0x40,0x20,0x7B,0xE0,0x04,0x00,0x00,0x00,0x00,0x00},/*"}",93*/ +{0x40,0x00,0x80,0x00,0x40,0x00,0x20,0x00,0x20,0x00,0x40,0x00},/*"~",94*/ +}; +//16*16 ASCII×Ö·û¼¯µãÕó +const unsigned char asc2_1608[95][16]={ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*" ",0*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x1F,0xCC,0x00,0x0C,0x00,0x00,0x00,0x00,0x00,0x00},/*"!",1*/ +{0x00,0x00,0x08,0x00,0x30,0x00,0x60,0x00,0x08,0x00,0x30,0x00,0x60,0x00,0x00,0x00},/*""",2*/ +{0x02,0x20,0x03,0xFC,0x1E,0x20,0x02,0x20,0x03,0xFC,0x1E,0x20,0x02,0x20,0x00,0x00},/*"#",3*/ +{0x00,0x00,0x0E,0x18,0x11,0x04,0x3F,0xFF,0x10,0x84,0x0C,0x78,0x00,0x00,0x00,0x00},/*"$",4*/ +{0x0F,0x00,0x10,0x84,0x0F,0x38,0x00,0xC0,0x07,0x78,0x18,0x84,0x00,0x78,0x00,0x00},/*"%",5*/ +{0x00,0x78,0x0F,0x84,0x10,0xC4,0x11,0x24,0x0E,0x98,0x00,0xE4,0x00,0x84,0x00,0x08},/*"&",6*/ +{0x08,0x00,0x68,0x00,0x70,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"'",7*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x07,0xE0,0x18,0x18,0x20,0x04,0x40,0x02,0x00,0x00},/*"(",8*/ +{0x00,0x00,0x40,0x02,0x20,0x04,0x18,0x18,0x07,0xE0,0x00,0x00,0x00,0x00,0x00,0x00},/*")",9*/ +{0x02,0x40,0x02,0x40,0x01,0x80,0x0F,0xF0,0x01,0x80,0x02,0x40,0x02,0x40,0x00,0x00},/*"*",10*/ +{0x00,0x80,0x00,0x80,0x00,0x80,0x0F,0xF8,0x00,0x80,0x00,0x80,0x00,0x80,0x00,0x00},/*"+",11*/ +{0x00,0x01,0x00,0x0D,0x00,0x0E,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*",",12*/ +{0x00,0x00,0x00,0x80,0x00,0x80,0x00,0x80,0x00,0x80,0x00,0x80,0x00,0x80,0x00,0x80},/*"-",13*/ +{0x00,0x00,0x00,0x0C,0x00,0x0C,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*".",14*/ +{0x00,0x00,0x00,0x06,0x00,0x18,0x00,0x60,0x01,0x80,0x06,0x00,0x18,0x00,0x20,0x00},/*"/",15*/ +{0x00,0x00,0x07,0xF0,0x08,0x08,0x10,0x04,0x10,0x04,0x08,0x08,0x07,0xF0,0x00,0x00},/*"0",16*/ +{0x00,0x00,0x08,0x04,0x08,0x04,0x1F,0xFC,0x00,0x04,0x00,0x04,0x00,0x00,0x00,0x00},/*"1",17*/ +{0x00,0x00,0x0E,0x0C,0x10,0x14,0x10,0x24,0x10,0x44,0x11,0x84,0x0E,0x0C,0x00,0x00},/*"2",18*/ +{0x00,0x00,0x0C,0x18,0x10,0x04,0x11,0x04,0x11,0x04,0x12,0x88,0x0C,0x70,0x00,0x00},/*"3",19*/ +{0x00,0x00,0x00,0xE0,0x03,0x20,0x04,0x24,0x08,0x24,0x1F,0xFC,0x00,0x24,0x00,0x00},/*"4",20*/ +{0x00,0x00,0x1F,0x98,0x10,0x84,0x11,0x04,0x11,0x04,0x10,0x88,0x10,0x70,0x00,0x00},/*"5",21*/ +{0x00,0x00,0x07,0xF0,0x08,0x88,0x11,0x04,0x11,0x04,0x18,0x88,0x00,0x70,0x00,0x00},/*"6",22*/ +{0x00,0x00,0x1C,0x00,0x10,0x00,0x10,0xFC,0x13,0x00,0x1C,0x00,0x10,0x00,0x00,0x00},/*"7",23*/ +{0x00,0x00,0x0E,0x38,0x11,0x44,0x10,0x84,0x10,0x84,0x11,0x44,0x0E,0x38,0x00,0x00},/*"8",24*/ +{0x00,0x00,0x07,0x00,0x08,0x8C,0x10,0x44,0x10,0x44,0x08,0x88,0x07,0xF0,0x00,0x00},/*"9",25*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x0C,0x03,0x0C,0x00,0x00,0x00,0x00,0x00,0x00},/*":",26*/ +{0x00,0x00,0x00,0x00,0x00,0x01,0x01,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*";",27*/ +{0x00,0x00,0x00,0x80,0x01,0x40,0x02,0x20,0x04,0x10,0x08,0x08,0x10,0x04,0x00,0x00},/*"<",28*/ +{0x02,0x20,0x02,0x20,0x02,0x20,0x02,0x20,0x02,0x20,0x02,0x20,0x02,0x20,0x00,0x00},/*"=",29*/ +{0x00,0x00,0x10,0x04,0x08,0x08,0x04,0x10,0x02,0x20,0x01,0x40,0x00,0x80,0x00,0x00},/*">",30*/ +{0x00,0x00,0x0E,0x00,0x12,0x00,0x10,0x0C,0x10,0x6C,0x10,0x80,0x0F,0x00,0x00,0x00},/*"?",31*/ +{0x03,0xE0,0x0C,0x18,0x13,0xE4,0x14,0x24,0x17,0xC4,0x08,0x28,0x07,0xD0,0x00,0x00},/*"@",32*/ +{0x00,0x04,0x00,0x3C,0x03,0xC4,0x1C,0x40,0x07,0x40,0x00,0xE4,0x00,0x1C,0x00,0x04},/*"A",33*/ +{0x10,0x04,0x1F,0xFC,0x11,0x04,0x11,0x04,0x11,0x04,0x0E,0x88,0x00,0x70,0x00,0x00},/*"B",34*/ +{0x03,0xE0,0x0C,0x18,0x10,0x04,0x10,0x04,0x10,0x04,0x10,0x08,0x1C,0x10,0x00,0x00},/*"C",35*/ +{0x10,0x04,0x1F,0xFC,0x10,0x04,0x10,0x04,0x10,0x04,0x08,0x08,0x07,0xF0,0x00,0x00},/*"D",36*/ +{0x10,0x04,0x1F,0xFC,0x11,0x04,0x11,0x04,0x17,0xC4,0x10,0x04,0x08,0x18,0x00,0x00},/*"E",37*/ +{0x10,0x04,0x1F,0xFC,0x11,0x04,0x11,0x00,0x17,0xC0,0x10,0x00,0x08,0x00,0x00,0x00},/*"F",38*/ +{0x03,0xE0,0x0C,0x18,0x10,0x04,0x10,0x04,0x10,0x44,0x1C,0x78,0x00,0x40,0x00,0x00},/*"G",39*/ +{0x10,0x04,0x1F,0xFC,0x10,0x84,0x00,0x80,0x00,0x80,0x10,0x84,0x1F,0xFC,0x10,0x04},/*"H",40*/ +{0x00,0x00,0x10,0x04,0x10,0x04,0x1F,0xFC,0x10,0x04,0x10,0x04,0x00,0x00,0x00,0x00},/*"I",41*/ +{0x00,0x03,0x00,0x01,0x10,0x01,0x10,0x01,0x1F,0xFE,0x10,0x00,0x10,0x00,0x00,0x00},/*"J",42*/ +{0x10,0x04,0x1F,0xFC,0x11,0x04,0x03,0x80,0x14,0x64,0x18,0x1C,0x10,0x04,0x00,0x00},/*"K",43*/ +{0x10,0x04,0x1F,0xFC,0x10,0x04,0x00,0x04,0x00,0x04,0x00,0x04,0x00,0x0C,0x00,0x00},/*"L",44*/ +{0x10,0x04,0x1F,0xFC,0x1F,0x00,0x00,0xFC,0x1F,0x00,0x1F,0xFC,0x10,0x04,0x00,0x00},/*"M",45*/ +{0x10,0x04,0x1F,0xFC,0x0C,0x04,0x03,0x00,0x00,0xE0,0x10,0x18,0x1F,0xFC,0x10,0x00},/*"N",46*/ +{0x07,0xF0,0x08,0x08,0x10,0x04,0x10,0x04,0x10,0x04,0x08,0x08,0x07,0xF0,0x00,0x00},/*"O",47*/ +{0x10,0x04,0x1F,0xFC,0x10,0x84,0x10,0x80,0x10,0x80,0x10,0x80,0x0F,0x00,0x00,0x00},/*"P",48*/ +{0x07,0xF0,0x08,0x18,0x10,0x24,0x10,0x24,0x10,0x1C,0x08,0x0A,0x07,0xF2,0x00,0x00},/*"Q",49*/ +{0x10,0x04,0x1F,0xFC,0x11,0x04,0x11,0x00,0x11,0xC0,0x11,0x30,0x0E,0x0C,0x00,0x04},/*"R",50*/ +{0x00,0x00,0x0E,0x1C,0x11,0x04,0x10,0x84,0x10,0x84,0x10,0x44,0x1C,0x38,0x00,0x00},/*"S",51*/ +{0x18,0x00,0x10,0x00,0x10,0x04,0x1F,0xFC,0x10,0x04,0x10,0x00,0x18,0x00,0x00,0x00},/*"T",52*/ +{0x10,0x00,0x1F,0xF8,0x10,0x04,0x00,0x04,0x00,0x04,0x10,0x04,0x1F,0xF8,0x10,0x00},/*"U",53*/ +{0x10,0x00,0x1E,0x00,0x11,0xE0,0x00,0x1C,0x00,0x70,0x13,0x80,0x1C,0x00,0x10,0x00},/*"V",54*/ +{0x1F,0xC0,0x10,0x3C,0x00,0xE0,0x1F,0x00,0x00,0xE0,0x10,0x3C,0x1F,0xC0,0x00,0x00},/*"W",55*/ +{0x10,0x04,0x18,0x0C,0x16,0x34,0x01,0xC0,0x01,0xC0,0x16,0x34,0x18,0x0C,0x10,0x04},/*"X",56*/ +{0x10,0x00,0x1C,0x00,0x13,0x04,0x00,0xFC,0x13,0x04,0x1C,0x00,0x10,0x00,0x00,0x00},/*"Y",57*/ +{0x08,0x04,0x10,0x1C,0x10,0x64,0x10,0x84,0x13,0x04,0x1C,0x04,0x10,0x18,0x00,0x00},/*"Z",58*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x7F,0xFE,0x40,0x02,0x40,0x02,0x40,0x02,0x00,0x00},/*"[",59*/ +{0x00,0x00,0x30,0x00,0x0C,0x00,0x03,0x80,0x00,0x60,0x00,0x1C,0x00,0x03,0x00,0x00},/*"\",60*/ +{0x00,0x00,0x40,0x02,0x40,0x02,0x40,0x02,0x7F,0xFE,0x00,0x00,0x00,0x00,0x00,0x00},/*"]",61*/ +{0x00,0x00,0x00,0x00,0x20,0x00,0x40,0x00,0x40,0x00,0x40,0x00,0x20,0x00,0x00,0x00},/*"^",62*/ +{0x00,0x01,0x00,0x01,0x00,0x01,0x00,0x01,0x00,0x01,0x00,0x01,0x00,0x01,0x00,0x01},/*"_",63*/ +{0x00,0x00,0x40,0x00,0x40,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"`",64*/ +{0x00,0x00,0x00,0x98,0x01,0x24,0x01,0x44,0x01,0x44,0x01,0x44,0x00,0xFC,0x00,0x04},/*"a",65*/ +{0x10,0x00,0x1F,0xFC,0x00,0x88,0x01,0x04,0x01,0x04,0x00,0x88,0x00,0x70,0x00,0x00},/*"b",66*/ +{0x00,0x00,0x00,0x70,0x00,0x88,0x01,0x04,0x01,0x04,0x01,0x04,0x00,0x88,0x00,0x00},/*"c",67*/ +{0x00,0x00,0x00,0x70,0x00,0x88,0x01,0x04,0x01,0x04,0x11,0x08,0x1F,0xFC,0x00,0x04},/*"d",68*/ +{0x00,0x00,0x00,0xF8,0x01,0x44,0x01,0x44,0x01,0x44,0x01,0x44,0x00,0xC8,0x00,0x00},/*"e",69*/ +{0x00,0x00,0x01,0x04,0x01,0x04,0x0F,0xFC,0x11,0x04,0x11,0x04,0x11,0x00,0x18,0x00},/*"f",70*/ +{0x00,0x00,0x00,0xD6,0x01,0x29,0x01,0x29,0x01,0x29,0x01,0xC9,0x01,0x06,0x00,0x00},/*"g",71*/ +{0x10,0x04,0x1F,0xFC,0x00,0x84,0x01,0x00,0x01,0x00,0x01,0x04,0x00,0xFC,0x00,0x04},/*"h",72*/ +{0x00,0x00,0x01,0x04,0x19,0x04,0x19,0xFC,0x00,0x04,0x00,0x04,0x00,0x00,0x00,0x00},/*"i",73*/ +{0x00,0x00,0x00,0x03,0x00,0x01,0x01,0x01,0x19,0x01,0x19,0xFE,0x00,0x00,0x00,0x00},/*"j",74*/ +{0x10,0x04,0x1F,0xFC,0x00,0x24,0x00,0x40,0x01,0xB4,0x01,0x0C,0x01,0x04,0x00,0x00},/*"k",75*/ +{0x00,0x00,0x10,0x04,0x10,0x04,0x1F,0xFC,0x00,0x04,0x00,0x04,0x00,0x00,0x00,0x00},/*"l",76*/ +{0x01,0x04,0x01,0xFC,0x01,0x04,0x01,0x00,0x01,0xFC,0x01,0x04,0x01,0x00,0x00,0xFC},/*"m",77*/ +{0x01,0x04,0x01,0xFC,0x00,0x84,0x01,0x00,0x01,0x00,0x01,0x04,0x00,0xFC,0x00,0x04},/*"n",78*/ +{0x00,0x00,0x00,0xF8,0x01,0x04,0x01,0x04,0x01,0x04,0x01,0x04,0x00,0xF8,0x00,0x00},/*"o",79*/ +{0x01,0x01,0x01,0xFF,0x00,0x85,0x01,0x04,0x01,0x04,0x00,0x88,0x00,0x70,0x00,0x00},/*"p",80*/ +{0x00,0x00,0x00,0x70,0x00,0x88,0x01,0x04,0x01,0x04,0x01,0x05,0x01,0xFF,0x00,0x01},/*"q",81*/ +{0x01,0x04,0x01,0x04,0x01,0xFC,0x00,0x84,0x01,0x04,0x01,0x00,0x01,0x80,0x00,0x00},/*"r",82*/ +{0x00,0x00,0x00,0xCC,0x01,0x24,0x01,0x24,0x01,0x24,0x01,0x24,0x01,0x98,0x00,0x00},/*"s",83*/ +{0x00,0x00,0x01,0x00,0x01,0x00,0x07,0xF8,0x01,0x04,0x01,0x04,0x00,0x00,0x00,0x00},/*"t",84*/ +{0x01,0x00,0x01,0xF8,0x00,0x04,0x00,0x04,0x00,0x04,0x01,0x08,0x01,0xFC,0x00,0x04},/*"u",85*/ +{0x01,0x00,0x01,0x80,0x01,0x70,0x00,0x0C,0x00,0x10,0x01,0x60,0x01,0x80,0x01,0x00},/*"v",86*/ +{0x01,0xF0,0x01,0x0C,0x00,0x30,0x01,0xC0,0x00,0x30,0x01,0x0C,0x01,0xF0,0x01,0x00},/*"w",87*/ +{0x00,0x00,0x01,0x04,0x01,0x8C,0x00,0x74,0x01,0x70,0x01,0x8C,0x01,0x04,0x00,0x00},/*"x",88*/ +{0x01,0x01,0x01,0x81,0x01,0x71,0x00,0x0E,0x00,0x18,0x01,0x60,0x01,0x80,0x01,0x00},/*"y",89*/ +{0x00,0x00,0x01,0x84,0x01,0x0C,0x01,0x34,0x01,0x44,0x01,0x84,0x01,0x0C,0x00,0x00},/*"z",90*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x3E,0xFC,0x40,0x02,0x40,0x02},/*"{",91*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00},/*"|",92*/ +{0x00,0x00,0x40,0x02,0x40,0x02,0x3E,0xFC,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"}",93*/ +{0x00,0x00,0x60,0x00,0x80,0x00,0x80,0x00,0x40,0x00,0x40,0x00,0x20,0x00,0x20,0x00},/*"~",94*/ +}; +//24*24 ASICII×Ö·û¼¯µãÕó +const unsigned char asc2_2412[95][36]={ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*" ",0*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x80,0x38,0x0F,0xFE,0x38,0x0F,0x80,0x38,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"!",1*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x06,0x00,0x00,0x0C,0x00,0x00,0x38,0x00,0x00,0x31,0x00,0x00,0x06,0x00,0x00,0x0C,0x00,0x00,0x38,0x00,0x00,0x30,0x00,0x00,0x00,0x00,0x00},/*""",2*/ +{0x00,0x00,0x00,0x00,0x61,0x80,0x00,0x67,0xF8,0x07,0xF9,0x80,0x00,0x61,0x80,0x00,0x61,0x80,0x00,0x61,0x80,0x00,0x61,0x80,0x00,0x67,0xF8,0x07,0xF9,0x80,0x00,0x61,0x80,0x00,0x00,0x00},/*"#",3*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x01,0xC0,0xE0,0x03,0xE0,0xF0,0x06,0x30,0x08,0x04,0x18,0x08,0x1F,0xFF,0xFE,0x04,0x0E,0x08,0x07,0x87,0xF0,0x03,0x81,0xE0,0x00,0x00,0x00,0x00,0x00,0x00},/*"$",4*/ +{0x01,0xF0,0x00,0x06,0x0C,0x00,0x04,0x04,0x08,0x06,0x0C,0x70,0x01,0xF9,0xC0,0x00,0x0E,0x00,0x00,0x3B,0xE0,0x00,0xEC,0x18,0x07,0x08,0x08,0x04,0x0C,0x18,0x00,0x03,0xE0,0x00,0x00,0x00},/*"%",5*/ +{0x00,0x01,0xE0,0x00,0x07,0xF0,0x03,0xF8,0x18,0x04,0x1C,0x08,0x04,0x17,0x08,0x07,0xE1,0xD0,0x03,0xC0,0xE0,0x00,0x23,0xB0,0x00,0x3C,0x08,0x00,0x20,0x08,0x00,0x00,0x10,0x00,0x00,0x00},/*"&",6*/ +{0x00,0x00,0x00,0x01,0x00,0x00,0x31,0x00,0x00,0x32,0x00,0x00,0x1C,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"'",7*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x7F,0x00,0x01,0xFF,0xC0,0x07,0x80,0xF0,0x0C,0x00,0x18,0x10,0x00,0x04,0x20,0x00,0x02,0x00,0x00,0x00},/*"(",8*/ +{0x00,0x00,0x00,0x20,0x00,0x02,0x10,0x00,0x04,0x0C,0x00,0x18,0x07,0x80,0xF0,0x01,0xFF,0xC0,0x00,0x7F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*")",9*/ +{0x00,0x00,0x00,0x00,0x42,0x00,0x00,0x66,0x00,0x00,0x66,0x00,0x00,0x3C,0x00,0x00,0x18,0x00,0x03,0xFF,0xC0,0x00,0x18,0x00,0x00,0x3C,0x00,0x00,0x66,0x00,0x00,0x66,0x00,0x00,0x42,0x00},/*"*",10*/ +{0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x01,0xFF,0xC0,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00},/*"+",11*/ +{0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x31,0x00,0x00,0x32,0x00,0x00,0x1C,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*",",12*/ +{0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x00,0x00},/*"-",13*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x38,0x00,0x00,0x38,0x00,0x00,0x38,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*".",14*/ +{0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x1C,0x00,0x00,0x70,0x00,0x01,0x80,0x00,0x0E,0x00,0x00,0x38,0x00,0x00,0xC0,0x00,0x07,0x00,0x00,0x1C,0x00,0x00,0x30,0x00,0x00,0x00,0x00,0x00},/*"/",15*/ +{0x00,0x00,0x00,0x00,0x7F,0x80,0x01,0xFF,0xE0,0x03,0x80,0x70,0x06,0x00,0x18,0x04,0x00,0x08,0x04,0x00,0x08,0x06,0x00,0x18,0x03,0x80,0x70,0x01,0xFF,0xE0,0x00,0x7F,0x80,0x00,0x00,0x00},/*"0",16*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x08,0x01,0x00,0x08,0x01,0x00,0x08,0x03,0xFF,0xF8,0x07,0xFF,0xF8,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00},/*"1",17*/ +{0x00,0x00,0x00,0x01,0xC0,0x38,0x02,0xC0,0x58,0x04,0x00,0x98,0x04,0x01,0x18,0x04,0x02,0x18,0x04,0x04,0x18,0x06,0x1C,0x18,0x03,0xF8,0x18,0x01,0xE0,0xF8,0x00,0x00,0x00,0x00,0x00,0x00},/*"2",18*/ +{0x00,0x00,0x00,0x01,0xC0,0xE0,0x03,0xC0,0xF0,0x04,0x00,0x08,0x04,0x08,0x08,0x04,0x08,0x08,0x06,0x18,0x08,0x03,0xF4,0x18,0x01,0xE7,0xF0,0x00,0x01,0xE0,0x00,0x00,0x00,0x00,0x00,0x00},/*"3",19*/ +{0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x0D,0x00,0x00,0x11,0x00,0x00,0x61,0x00,0x00,0x81,0x08,0x03,0x01,0x08,0x07,0xFF,0xF8,0x0F,0xFF,0xF8,0x00,0x01,0x08,0x00,0x01,0x08,0x00,0x00,0x00},/*"4",20*/ +{0x00,0x00,0x00,0x00,0x00,0xE0,0x07,0xFC,0xD0,0x06,0x08,0x08,0x06,0x10,0x08,0x06,0x10,0x08,0x06,0x10,0x08,0x06,0x18,0x38,0x06,0x0F,0xF0,0x06,0x07,0xC0,0x00,0x00,0x00,0x00,0x00,0x00},/*"5",21*/ +{0x00,0x00,0x00,0x00,0x3F,0x80,0x01,0xFF,0xE0,0x03,0x84,0x30,0x02,0x08,0x18,0x04,0x10,0x08,0x04,0x10,0x08,0x04,0x10,0x08,0x07,0x18,0x10,0x03,0x0F,0xF0,0x00,0x07,0xC0,0x00,0x00,0x00},/*"6",22*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x03,0xC0,0x00,0x07,0x00,0x00,0x06,0x00,0x00,0x06,0x00,0xF8,0x06,0x07,0xF8,0x06,0x18,0x00,0x06,0xE0,0x00,0x07,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00},/*"7",23*/ +{0x00,0x00,0x00,0x01,0xE1,0xE0,0x03,0xF7,0xF0,0x06,0x34,0x10,0x04,0x18,0x08,0x04,0x18,0x08,0x04,0x0C,0x08,0x04,0x0C,0x08,0x06,0x16,0x18,0x03,0xF3,0xF0,0x01,0xC1,0xE0,0x00,0x00,0x00},/*"8",24*/ +{0x00,0x00,0x00,0x00,0xF8,0x00,0x03,0xFC,0x30,0x03,0x06,0x38,0x04,0x02,0x08,0x04,0x02,0x08,0x04,0x02,0x08,0x04,0x04,0x10,0x03,0x08,0xF0,0x01,0xFF,0xC0,0x00,0x7F,0x00,0x00,0x00,0x00},/*"9",25*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x70,0x38,0x00,0x70,0x38,0x00,0x70,0x38,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*":",26*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x30,0x1A,0x00,0x30,0x1C,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*";",27*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x14,0x00,0x00,0x22,0x00,0x00,0x41,0x00,0x00,0x80,0x80,0x01,0x00,0x40,0x02,0x00,0x20,0x04,0x00,0x10,0x08,0x00,0x08,0x00,0x00,0x00},/*"<",28*/ +{0x00,0x00,0x00,0x00,0x21,0x00,0x00,0x21,0x00,0x00,0x21,0x00,0x00,0x21,0x00,0x00,0x21,0x00,0x00,0x21,0x00,0x00,0x21,0x00,0x00,0x21,0x00,0x00,0x21,0x00,0x00,0x21,0x00,0x00,0x00,0x00},/*"=",29*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x08,0x04,0x00,0x10,0x02,0x00,0x20,0x01,0x00,0x40,0x00,0x80,0x80,0x00,0x41,0x00,0x00,0x22,0x00,0x00,0x14,0x00,0x00,0x08,0x00,0x00,0x00,0x00},/*">",30*/ +{0x00,0x00,0x00,0x03,0xC0,0x00,0x04,0xC0,0x00,0x04,0x00,0x00,0x08,0x00,0x38,0x08,0x0F,0x38,0x08,0x08,0x38,0x08,0x10,0x00,0x0C,0x30,0x00,0x07,0xE0,0x00,0x03,0xC0,0x00,0x00,0x00,0x00},/*"?",31*/ +{0x00,0x00,0x00,0x00,0x3F,0x80,0x00,0xFF,0xE0,0x03,0x80,0x70,0x02,0x0F,0x10,0x06,0x70,0x88,0x04,0xC0,0x88,0x04,0x83,0x08,0x04,0x7F,0x88,0x02,0xC0,0x90,0x03,0x01,0x20,0x00,0xFE,0x40},/*"@",32*/ +{0x00,0x00,0x08,0x00,0x00,0x18,0x00,0x01,0xF8,0x00,0x3E,0x08,0x01,0xC2,0x00,0x07,0x02,0x00,0x07,0xE2,0x00,0x00,0xFE,0x00,0x00,0x1F,0xC8,0x00,0x01,0xF8,0x00,0x00,0x38,0x00,0x00,0x08},/*"A",33*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x08,0x08,0x04,0x08,0x08,0x04,0x08,0x08,0x04,0x08,0x08,0x06,0x18,0x08,0x03,0xF4,0x18,0x01,0xE7,0xF0,0x00,0x01,0xE0,0x00,0x00,0x00},/*"B",34*/ +{0x00,0x00,0x00,0x00,0x3F,0x80,0x01,0xFF,0xE0,0x03,0x80,0x70,0x02,0x00,0x18,0x04,0x00,0x08,0x04,0x00,0x08,0x04,0x00,0x08,0x04,0x00,0x10,0x06,0x00,0x20,0x07,0x80,0xC0,0x00,0x00,0x00},/*"C",35*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x00,0x08,0x04,0x00,0x08,0x04,0x00,0x08,0x04,0x00,0x18,0x02,0x00,0x10,0x03,0x80,0x70,0x01,0xFF,0xE0,0x00,0x7F,0x80,0x00,0x00,0x00},/*"D",36*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x08,0x08,0x04,0x08,0x08,0x04,0x08,0x08,0x04,0x08,0x08,0x04,0x3E,0x08,0x04,0x00,0x08,0x06,0x00,0x18,0x01,0x00,0x60,0x00,0x00,0x00},/*"E",37*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x08,0x08,0x04,0x08,0x00,0x04,0x08,0x00,0x04,0x08,0x00,0x04,0x3E,0x00,0x06,0x00,0x00,0x06,0x00,0x00,0x01,0x80,0x00,0x00,0x00,0x00},/*"F",38*/ +{0x00,0x00,0x00,0x00,0x3F,0x80,0x01,0xFF,0xE0,0x03,0x80,0x70,0x06,0x00,0x18,0x04,0x00,0x08,0x04,0x02,0x08,0x04,0x02,0x08,0x02,0x03,0xF0,0x07,0x83,0xF0,0x00,0x02,0x00,0x00,0x02,0x00},/*"G",39*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x08,0x08,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x04,0x08,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x00,0x08},/*"H",40*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x08,0x04,0x00,0x08,0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x00,0x08,0x04,0x00,0x08,0x04,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00},/*"I",41*/ +{0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x07,0x00,0x00,0x01,0x04,0x00,0x01,0x04,0x00,0x01,0x04,0x00,0x03,0x07,0xFF,0xFE,0x07,0xFF,0xFC,0x04,0x00,0x00,0x04,0x00,0x00,0x04,0x00,0x00},/*"J",42*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x0C,0x08,0x00,0x18,0x00,0x00,0x3E,0x00,0x04,0xC7,0x80,0x05,0x03,0xC8,0x06,0x00,0xF8,0x04,0x00,0x38,0x04,0x00,0x18,0x00,0x00,0x08},/*"K",43*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x18,0x00,0x00,0x60,0x00,0x00,0x00},/*"L",44*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0x80,0x08,0x07,0xFC,0x00,0x00,0x7F,0xC0,0x00,0x03,0xF8,0x00,0x07,0xC0,0x00,0x78,0x00,0x07,0x80,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x00,0x08},/*"M",45*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0x00,0x08,0x03,0xC0,0x00,0x00,0xE0,0x00,0x00,0x38,0x00,0x00,0x1E,0x00,0x00,0x07,0x00,0x00,0x01,0xC0,0x04,0x00,0xF0,0x07,0xFF,0xF8,0x04,0x00,0x00},/*"N",46*/ +{0x00,0x00,0x00,0x00,0x7F,0x80,0x01,0xFF,0xE0,0x03,0x80,0x70,0x06,0x00,0x18,0x04,0x00,0x08,0x04,0x00,0x08,0x06,0x00,0x18,0x03,0x00,0x30,0x01,0xFF,0xE0,0x00,0x7F,0x80,0x00,0x00,0x00},/*"O",47*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x04,0x08,0x04,0x04,0x00,0x04,0x04,0x00,0x04,0x04,0x00,0x04,0x04,0x00,0x06,0x0C,0x00,0x03,0xF8,0x00,0x01,0xF0,0x00,0x00,0x00,0x00},/*"P",48*/ +{0x00,0x00,0x00,0x00,0x7F,0x80,0x01,0xFF,0xE0,0x03,0x80,0x70,0x06,0x00,0x88,0x04,0x00,0x88,0x04,0x00,0xC8,0x06,0x00,0x3C,0x03,0x00,0x3E,0x01,0xFF,0xE6,0x00,0x7F,0x84,0x00,0x00,0x00},/*"Q",49*/ +{0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x08,0x08,0x04,0x08,0x00,0x04,0x0C,0x00,0x04,0x0F,0x00,0x04,0x0B,0xC0,0x06,0x10,0xF0,0x03,0xF0,0x38,0x01,0xE0,0x08,0x00,0x00,0x08},/*"R",50*/ +{0x00,0x00,0x00,0x01,0xE0,0xF8,0x03,0xF0,0x30,0x06,0x30,0x10,0x04,0x18,0x08,0x04,0x18,0x08,0x04,0x0C,0x08,0x04,0x0C,0x08,0x02,0x06,0x18,0x02,0x07,0xF0,0x07,0x81,0xE0,0x00,0x00,0x00},/*"S",51*/ +{0x01,0x80,0x00,0x06,0x00,0x00,0x04,0x00,0x00,0x04,0x00,0x00,0x04,0x00,0x08,0x07,0xFF,0xF8,0x07,0xFF,0xF8,0x04,0x00,0x08,0x04,0x00,0x00,0x04,0x00,0x00,0x06,0x00,0x00,0x01,0x80,0x00},/*"T",52*/ +{0x04,0x00,0x00,0x07,0xFF,0xE0,0x07,0xFF,0xF0,0x04,0x00,0x18,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x04,0x00,0x10,0x07,0xFF,0xE0,0x04,0x00,0x00},/*"U",53*/ +{0x04,0x00,0x00,0x06,0x00,0x00,0x07,0xE0,0x00,0x07,0xFE,0x00,0x04,0x1F,0xE0,0x00,0x01,0xF8,0x00,0x00,0x38,0x00,0x01,0xE0,0x04,0x3E,0x00,0x07,0xC0,0x00,0x06,0x00,0x00,0x04,0x00,0x00},/*"V",54*/ +{0x04,0x00,0x00,0x07,0xE0,0x00,0x07,0xFF,0xC0,0x04,0x1F,0xF8,0x00,0x07,0xC0,0x07,0xF8,0x00,0x07,0xFF,0x80,0x04,0x3F,0xF8,0x00,0x07,0xC0,0x04,0xF8,0x00,0x07,0x00,0x00,0x04,0x00,0x00},/*"W",55*/ +{0x00,0x00,0x00,0x04,0x00,0x08,0x06,0x00,0x18,0x07,0xC0,0x78,0x05,0xF1,0xC8,0x00,0x3E,0x00,0x00,0x1F,0x80,0x04,0x63,0xE8,0x07,0x80,0xF8,0x06,0x00,0x18,0x04,0x00,0x08,0x00,0x00,0x00},/*"X",56*/ +{0x04,0x00,0x00,0x06,0x00,0x00,0x07,0x80,0x00,0x07,0xE0,0x08,0x04,0x7C,0x08,0x00,0x1F,0xF8,0x00,0x07,0xF8,0x00,0x18,0x08,0x04,0xE0,0x08,0x07,0x00,0x00,0x06,0x00,0x00,0x04,0x00,0x00},/*"Y",57*/ +{0x00,0x00,0x00,0x01,0x00,0x08,0x06,0x00,0x38,0x04,0x00,0xF8,0x04,0x03,0xE8,0x04,0x0F,0x08,0x04,0x7C,0x08,0x05,0xF0,0x08,0x07,0xC0,0x08,0x07,0x00,0x18,0x04,0x00,0x60,0x00,0x00,0x00},/*"Z",58*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x3F,0xFF,0xFE,0x20,0x00,0x02,0x20,0x00,0x02,0x20,0x00,0x02,0x20,0x00,0x02,0x20,0x00,0x02,0x00,0x00,0x00},/*"[",59*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x07,0x00,0x00,0x00,0xC0,0x00,0x00,0x38,0x00,0x00,0x06,0x00,0x00,0x01,0xC0,0x00,0x00,0x30,0x00,0x00,0x0E,0x00,0x00,0x01,0x00,0x00,0x00},/*"\",60*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x02,0x20,0x00,0x02,0x20,0x00,0x02,0x20,0x00,0x02,0x20,0x00,0x02,0x3F,0xFF,0xFE,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"]",61*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x10,0x00,0x00,0x30,0x00,0x00,0x20,0x00,0x00,0x30,0x00,0x00,0x10,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"^",62*/ +{0x00,0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x01},/*"_",63*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x20,0x00,0x00,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"`",64*/ +{0x00,0x00,0x00,0x00,0x00,0xF0,0x00,0x19,0xF8,0x00,0x1B,0x18,0x00,0x22,0x08,0x00,0x26,0x08,0x00,0x24,0x08,0x00,0x24,0x10,0x00,0x3F,0xF8,0x00,0x1F,0xF8,0x00,0x00,0x08,0x00,0x00,0x18},/*"a",65*/ +{0x00,0x00,0x00,0x04,0x00,0x00,0x07,0xFF,0xF8,0x0F,0xFF,0xF0,0x00,0x18,0x18,0x00,0x10,0x08,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x30,0x18,0x00,0x1F,0xF0,0x00,0x0F,0xC0,0x00,0x00,0x00},/*"b",66*/ +{0x00,0x00,0x00,0x00,0x07,0xC0,0x00,0x1F,0xF0,0x00,0x18,0x30,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x3C,0x08,0x00,0x1C,0x10,0x00,0x00,0x60,0x00,0x00,0x00,0x00,0x00,0x00},/*"c",67*/ +{0x00,0x00,0x00,0x00,0x07,0xC0,0x00,0x1F,0xF0,0x00,0x38,0x18,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x20,0x08,0x04,0x10,0x10,0x07,0xFF,0xF8,0x0F,0xFF,0xF0,0x00,0x00,0x10,0x00,0x00,0x00},/*"d",68*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0xC0,0x00,0x1F,0xF0,0x00,0x12,0x30,0x00,0x22,0x18,0x00,0x22,0x08,0x00,0x22,0x08,0x00,0x32,0x08,0x00,0x1E,0x10,0x00,0x0E,0x20,0x00,0x00,0x00},/*"e",69*/ +{0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x20,0x08,0x00,0x20,0x08,0x01,0xFF,0xF8,0x03,0xFF,0xF8,0x06,0x20,0x08,0x04,0x20,0x08,0x04,0x20,0x08,0x07,0x20,0x00,0x03,0x00,0x00,0x00,0x00,0x00},/*"f",70*/ +{0x00,0x00,0x00,0x00,0x00,0x0E,0x00,0x0E,0x6E,0x00,0x1F,0xF3,0x00,0x31,0xB1,0x00,0x20,0xB1,0x00,0x20,0xB1,0x00,0x31,0x91,0x00,0x1F,0x13,0x00,0x2E,0x1E,0x00,0x20,0x0E,0x00,0x30,0x00},/*"g",71*/ +{0x00,0x00,0x00,0x04,0x00,0x08,0x07,0xFF,0xF8,0x0F,0xFF,0xF8,0x00,0x10,0x08,0x00,0x20,0x00,0x00,0x20,0x00,0x00,0x20,0x08,0x00,0x3F,0xF8,0x00,0x1F,0xF8,0x00,0x00,0x08,0x00,0x00,0x00},/*"h",72*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x20,0x08,0x06,0x3F,0xF8,0x06,0x3F,0xF8,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00},/*"i",73*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x03,0x00,0x20,0x01,0x00,0x20,0x01,0x00,0x20,0x03,0x06,0x3F,0xFE,0x06,0x3F,0xFC,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"j",74*/ +{0x00,0x00,0x00,0x04,0x00,0x08,0x07,0xFF,0xF8,0x0F,0xFF,0xF8,0x00,0x01,0x88,0x00,0x03,0x00,0x00,0x2F,0xC0,0x00,0x38,0xF8,0x00,0x20,0x38,0x00,0x20,0x08,0x00,0x00,0x08,0x00,0x00,0x00},/*"k",75*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x08,0x04,0x00,0x08,0x04,0x00,0x08,0x07,0xFF,0xF8,0x0F,0xFF,0xF8,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00},/*"l",76*/ +{0x00,0x20,0x08,0x00,0x3F,0xF8,0x00,0x3F,0xF8,0x00,0x10,0x08,0x00,0x20,0x00,0x00,0x3F,0xF8,0x00,0x3F,0xF8,0x00,0x10,0x08,0x00,0x20,0x00,0x00,0x3F,0xF8,0x00,0x3F,0xF8,0x00,0x00,0x08},/*"m",77*/ +{0x00,0x00,0x00,0x00,0x20,0x08,0x00,0x3F,0xF8,0x00,0x3F,0xF8,0x00,0x10,0x08,0x00,0x10,0x00,0x00,0x20,0x00,0x00,0x20,0x08,0x00,0x3F,0xF8,0x00,0x1F,0xF8,0x00,0x00,0x08,0x00,0x00,0x00},/*"n",78*/ +{0x00,0x00,0x00,0x00,0x07,0xC0,0x00,0x0F,0xF0,0x00,0x18,0x30,0x00,0x30,0x08,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x30,0x08,0x00,0x18,0x30,0x00,0x0F,0xF0,0x00,0x07,0xC0,0x00,0x00,0x00},/*"o",79*/ +{0x00,0x00,0x00,0x00,0x20,0x01,0x00,0x3F,0xFF,0x00,0x3F,0xFF,0x00,0x10,0x11,0x00,0x20,0x09,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x30,0x38,0x00,0x1F,0xF0,0x00,0x0F,0xC0,0x00,0x00,0x00},/*"p",80*/ +{0x00,0x00,0x00,0x00,0x07,0xC0,0x00,0x1F,0xF0,0x00,0x38,0x18,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x20,0x09,0x00,0x10,0x11,0x00,0x1F,0xFF,0x00,0x3F,0xFF,0x00,0x00,0x01,0x00,0x00,0x00},/*"q",81*/ +{0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x3F,0xF8,0x00,0x3F,0xF8,0x00,0x08,0x08,0x00,0x10,0x08,0x00,0x20,0x08,0x00,0x20,0x00,0x00,0x30,0x00,0x00,0x30,0x00,0x00,0x00,0x00},/*"r",82*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0C,0x78,0x00,0x1E,0x18,0x00,0x33,0x08,0x00,0x23,0x08,0x00,0x21,0x08,0x00,0x21,0x88,0x00,0x21,0x98,0x00,0x30,0xF0,0x00,0x38,0x60,0x00,0x00,0x00},/*"s",83*/ +{0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x20,0x00,0x00,0x20,0x00,0x00,0xFF,0xF0,0x03,0xFF,0xF8,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x00,0x30,0x00,0x00,0x00,0x00,0x00,0x00},/*"t",84*/ +{0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x3F,0xF0,0x00,0x7F,0xF8,0x00,0x00,0x18,0x00,0x00,0x08,0x00,0x00,0x08,0x00,0x20,0x10,0x00,0x3F,0xF8,0x00,0x7F,0xF0,0x00,0x00,0x10,0x00,0x00,0x00},/*"u",85*/ +{0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x30,0x00,0x00,0x3C,0x00,0x00,0x3F,0x80,0x00,0x23,0xF0,0x00,0x00,0x78,0x00,0x00,0x70,0x00,0x23,0x80,0x00,0x3C,0x00,0x00,0x30,0x00,0x00,0x20,0x00},/*"v",86*/ +{0x00,0x20,0x00,0x00,0x3C,0x00,0x00,0x3F,0xE0,0x00,0x23,0xF8,0x00,0x00,0xE0,0x00,0x27,0x00,0x00,0x3E,0x00,0x00,0x3F,0xE0,0x00,0x21,0xF8,0x00,0x01,0xE0,0x00,0x3E,0x00,0x00,0x20,0x00},/*"w",87*/ +{0x00,0x00,0x00,0x00,0x20,0x08,0x00,0x20,0x08,0x00,0x38,0x38,0x00,0x3E,0x68,0x00,0x27,0x80,0x00,0x03,0xC8,0x00,0x2C,0xF8,0x00,0x38,0x38,0x00,0x20,0x18,0x00,0x20,0x08,0x00,0x00,0x00},/*"x",88*/ +{0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x30,0x03,0x00,0x3C,0x01,0x00,0x3F,0x83,0x00,0x23,0xEC,0x00,0x00,0x70,0x00,0x23,0x80,0x00,0x3C,0x00,0x00,0x20,0x00,0x00,0x20,0x00,0x00,0x00,0x00},/*"y",89*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x38,0x08,0x00,0x20,0x38,0x00,0x20,0xF8,0x00,0x23,0xE8,0x00,0x2F,0x88,0x00,0x3E,0x08,0x00,0x38,0x08,0x00,0x20,0x18,0x00,0x00,0x70,0x00,0x00,0x00},/*"z",90*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x14,0x00,0x1F,0xF7,0xFC,0x30,0x00,0x06,0x20,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00},/*"{",91*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"|",92*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x02,0x30,0x00,0x06,0x1F,0xF7,0xFC,0x00,0x14,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"}",93*/ +{0x00,0x00,0x00,0x18,0x00,0x00,0x60,0x00,0x00,0x40,0x00,0x00,0x40,0x00,0x00,0x20,0x00,0x00,0x10,0x00,0x00,0x08,0x00,0x00,0x04,0x00,0x00,0x04,0x00,0x00,0x0C,0x00,0x00,0x10,0x00,0x00},/*"~",94*/ +}; + +//32*32 ASCII×Ö·û¼¯µãÕó +const unsigned char asc2_3216[95][128]={ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*" ",0*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0xF0,0x00,0xC0,0x07,0xFF,0xE1,0xE0,0x07,0xF0,0x01,0xE0,0x00,0x00,0x00,0xC0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"!",1*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x01,0xC0,0x00,0x00,0x07,0x80,0x00,0x00,0x1F,0x00,0x00,0x00,0x1E,0x00,0x00,0x00,0x1C,0x20,0x00,0x00,0x01,0xC0,0x00,0x00,0x07,0x80,0x00,0x00,0x1F,0x00,0x00,0x00,0x1E,0x00,0x00,0x00,0x1C,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*""",2*/ +{0x00,0x00,0x00,0x00,0x00,0x18,0x0C,0x00,0x00,0x18,0x0C,0x00,0x00,0x18,0x0F,0xE0,0x00,0x1F,0xFC,0x00,0x03,0xF8,0x0C,0x00,0x00,0x18,0x0C,0x00,0x00,0x18,0x0C,0x00,0x00,0x18,0x0C,0x00,0x00,0x18,0x0C,0x00,0x00,0x18,0x0F,0xE0,0x00,0x1F,0xFC,0x00,0x03,0xF8,0x0C,0x00,0x00,0x18,0x0C,0x00,0x00,0x18,0x0C,0x00,0x00,0x00,0x00,0x00},/*"#",3*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x80,0x00,0x78,0x07,0xC0,0x00,0xFC,0x06,0x40,0x01,0x0E,0x00,0x20,0x03,0x07,0x00,0x20,0x02,0x03,0x80,0x20,0x0F,0xFF,0xFF,0xFC,0x02,0x01,0xC0,0x20,0x02,0x00,0xE0,0x60,0x01,0x30,0x70,0x40,0x01,0xF0,0x3F,0x80,0x00,0xF0,0x1F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"$",4*/ +{0x00,0xFE,0x00,0x00,0x01,0xFF,0x00,0x00,0x03,0x01,0x80,0x00,0x02,0x00,0x80,0x60,0x03,0x01,0x81,0xC0,0x01,0xFF,0x07,0x00,0x00,0xFE,0x18,0x00,0x00,0x00,0xE0,0x00,0x00,0x03,0xBF,0x00,0x00,0x0C,0xFF,0xC0,0x00,0x71,0x80,0x60,0x01,0xC1,0x00,0x20,0x03,0x01,0x80,0x60,0x00,0x00,0xFF,0xC0,0x00,0x00,0x3F,0x00,0x00,0x00,0x00,0x00},/*"%",5*/ +{0x00,0x00,0x1F,0x00,0x00,0x00,0x7F,0xC0,0x00,0xFC,0xC0,0xC0,0x01,0xFF,0x80,0x60,0x03,0x03,0xE0,0x20,0x02,0x02,0x78,0x20,0x02,0x06,0x1E,0x20,0x03,0xFC,0x07,0x40,0x01,0xF0,0x03,0x80,0x00,0x01,0x03,0xC0,0x00,0x01,0x1C,0x60,0x00,0x01,0xE0,0x20,0x00,0x01,0x00,0x20,0x00,0x01,0x00,0x40,0x00,0x00,0x01,0x80,0x00,0x00,0x00,0x00},/*"&",6*/ +{0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x1C,0x60,0x00,0x00,0x1C,0x40,0x00,0x00,0x1F,0x80,0x00,0x00,0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"'",7*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0xF8,0x00,0x00,0x3F,0xFF,0x00,0x00,0x78,0x07,0xC0,0x01,0xC0,0x00,0xE0,0x03,0x00,0x00,0x30,0x04,0x00,0x00,0x08,0x08,0x00,0x00,0x04,0x10,0x00,0x00,0x02,0x00,0x00,0x00,0x00},/*"(",8*/ +{0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x02,0x08,0x00,0x00,0x04,0x04,0x00,0x00,0x08,0x03,0x00,0x00,0x30,0x01,0xC0,0x00,0xE0,0x00,0x78,0x07,0xC0,0x00,0x3F,0xFF,0x00,0x00,0x07,0xF8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*")",9*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0C,0x18,0x00,0x00,0x0E,0x38,0x00,0x00,0x0E,0x38,0x00,0x00,0x06,0x30,0x00,0x00,0x03,0x60,0x00,0x00,0x61,0x43,0x80,0x00,0xFF,0xFF,0x80,0x00,0x61,0x43,0x00,0x00,0x03,0x60,0x00,0x00,0x06,0x30,0x00,0x00,0x0E,0x38,0x00,0x00,0x0E,0x38,0x00,0x00,0x0C,0x18,0x00,0x00,0x00,0x00,0x00},/*"*",10*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x7F,0xFF,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00},/*"+",11*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0xE3,0x00,0x00,0x00,0xE2,0x00,0x00,0x00,0xFC,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*",",12*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00},/*"-",13*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xC0,0x00,0x00,0x01,0xE0,0x00,0x00,0x01,0xE0,0x00,0x00,0x00,0xC0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*".",14*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0C,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0xE0,0x00,0x00,0x03,0x80,0x00,0x00,0x0E,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0xE0,0x00,0x00,0x03,0x80,0x00,0x00,0x0E,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0xE0,0x00,0x00,0x03,0x80,0x00,0x00,0x0E,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"/",15*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xF8,0x00,0x00,0x7F,0xFF,0x00,0x00,0xF0,0x07,0x80,0x01,0x80,0x00,0xC0,0x03,0x00,0x00,0x60,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0x00,0x00,0x60,0x01,0x80,0x00,0xC0,0x00,0xE0,0x03,0x80,0x00,0x7F,0xFF,0x00,0x00,0x0F,0xF8,0x00,0x00,0x00,0x00,0x00},/*"0",16*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x20,0x00,0x80,0x00,0x20,0x00,0x80,0x00,0x20,0x00,0x80,0x00,0x60,0x01,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"1",17*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x78,0x00,0xE0,0x00,0x98,0x01,0x60,0x01,0x00,0x02,0x60,0x02,0x00,0x04,0x60,0x02,0x00,0x08,0x60,0x02,0x00,0x10,0x60,0x02,0x00,0x20,0x60,0x02,0x00,0x40,0x60,0x03,0x00,0x80,0x60,0x01,0x83,0x00,0x60,0x01,0xFE,0x00,0xE0,0x00,0x7C,0x07,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"2",18*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x07,0x80,0x01,0xF0,0x07,0xC0,0x01,0x00,0x00,0x40,0x02,0x00,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x03,0x03,0x80,0x20,0x01,0x86,0x80,0x40,0x01,0xFC,0xC0,0xC0,0x00,0x78,0x7F,0x80,0x00,0x00,0x1E,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"3",19*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x48,0x00,0x00,0x01,0x88,0x00,0x00,0x06,0x08,0x00,0x00,0x0C,0x08,0x10,0x00,0x30,0x08,0x10,0x00,0x40,0x08,0x10,0x01,0xFF,0xFF,0xF0,0x03,0xFF,0xFF,0xF0,0x03,0xFF,0xFF,0xF0,0x00,0x00,0x08,0x10,0x00,0x00,0x08,0x10,0x00,0x00,0x08,0x10,0x00,0x00,0x00,0x00},/*"4",20*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x80,0x00,0x1F,0x86,0x40,0x03,0xE1,0x00,0x20,0x03,0x02,0x00,0x20,0x03,0x04,0x00,0x20,0x03,0x04,0x00,0x20,0x03,0x04,0x00,0x20,0x03,0x04,0x00,0x20,0x03,0x06,0x00,0x40,0x03,0x03,0x01,0xC0,0x03,0x01,0xFF,0x80,0x03,0x00,0x7E,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"5",21*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0xFC,0x00,0x00,0x3F,0xFF,0x00,0x00,0x70,0xC3,0x80,0x00,0x81,0x80,0xC0,0x01,0x01,0x00,0x60,0x03,0x02,0x00,0x20,0x02,0x02,0x00,0x20,0x02,0x02,0x00,0x20,0x02,0x02,0x00,0x20,0x02,0x03,0x00,0x40,0x01,0xC1,0x80,0xC0,0x00,0xC0,0xFF,0x80,0x00,0x00,0x7E,0x00,0x00,0x00,0x00,0x00},/*"6",22*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x00,0x00,0x03,0xC0,0x00,0x00,0x03,0x80,0x00,0x00,0x03,0x00,0x00,0x00,0x03,0x00,0x07,0xE0,0x03,0x00,0x3F,0xE0,0x03,0x01,0xC0,0x00,0x03,0x06,0x00,0x00,0x03,0x18,0x00,0x00,0x03,0x60,0x00,0x00,0x03,0x80,0x00,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"7",23*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x1F,0x00,0x00,0x78,0x3F,0x80,0x00,0xFC,0x60,0xC0,0x01,0x8E,0xC0,0x40,0x03,0x07,0x80,0x20,0x02,0x03,0x00,0x20,0x02,0x01,0x80,0x20,0x02,0x01,0x80,0x20,0x02,0x01,0xC0,0x20,0x03,0x01,0xE0,0x40,0x01,0x86,0x70,0xC0,0x00,0xFC,0x3F,0x80,0x00,0x78,0x1F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"8",24*/ +{0x00,0x00,0x00,0x00,0x00,0x3F,0x00,0x00,0x00,0xFF,0x81,0xC0,0x01,0xC0,0xC1,0xC0,0x01,0x00,0x60,0x20,0x02,0x00,0x20,0x20,0x02,0x00,0x20,0x20,0x02,0x00,0x20,0x20,0x02,0x00,0x20,0x60,0x02,0x00,0x40,0xC0,0x01,0x00,0xC1,0x80,0x00,0xC1,0x8F,0x00,0x00,0x7F,0xFE,0x00,0x00,0x1F,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"9",25*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0xC0,0x00,0x07,0x81,0xE0,0x00,0x07,0x81,0xE0,0x00,0x03,0x00,0xC0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*":",26*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x66,0x00,0x06,0x00,0x78,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*";",27*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x01,0xC0,0x00,0x00,0x03,0x60,0x00,0x00,0x06,0x30,0x00,0x00,0x0C,0x18,0x00,0x00,0x18,0x0C,0x00,0x00,0x30,0x06,0x00,0x00,0x60,0x03,0x00,0x00,0xC0,0x01,0x80,0x01,0x00,0x00,0x40,0x02,0x00,0x00,0x20,0x04,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"<",28*/ +{0x00,0x00,0x00,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x00,0x00,0x00},/*"=",29*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x10,0x02,0x00,0x00,0x20,0x01,0x00,0x00,0x40,0x00,0xC0,0x01,0x80,0x00,0x60,0x03,0x00,0x00,0x30,0x06,0x00,0x00,0x18,0x0C,0x00,0x00,0x0C,0x18,0x00,0x00,0x06,0x30,0x00,0x00,0x03,0x60,0x00,0x00,0x01,0xC0,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*">",30*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x78,0x00,0x00,0x01,0xF8,0x00,0x00,0x02,0x38,0x00,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0xC0,0x04,0x00,0x79,0xE0,0x04,0x00,0x81,0xE0,0x04,0x01,0x00,0xC0,0x04,0x03,0x00,0x00,0x02,0x02,0x00,0x00,0x03,0x06,0x00,0x00,0x01,0xFC,0x00,0x00,0x00,0xF8,0x00,0x00,0x00,0x00,0x00,0x00},/*"?",31*/ +{0x00,0x00,0x00,0x00,0x00,0x0F,0xF8,0x00,0x00,0x3F,0xFE,0x00,0x00,0x70,0x07,0x80,0x00,0xC0,0x00,0xC0,0x01,0x01,0xF8,0x40,0x03,0x07,0xFC,0x20,0x02,0x1E,0x04,0x20,0x02,0x30,0x08,0x20,0x02,0x20,0x30,0x20,0x02,0x3F,0xFC,0x20,0x01,0x3F,0x04,0x40,0x01,0x80,0x0C,0xC0,0x00,0xE0,0x31,0x80,0x00,0x1F,0xC2,0x00,0x00,0x00,0x00,0x00},/*"@",32*/ +{0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x60,0x00,0x00,0x03,0xE0,0x00,0x00,0x3E,0x20,0x00,0x03,0xE0,0x20,0x00,0x3E,0x20,0x00,0x03,0xE0,0x20,0x00,0x03,0x80,0x20,0x00,0x07,0xFC,0x20,0x00,0x00,0x3F,0xE0,0x00,0x00,0x03,0xFE,0x20,0x00,0x00,0x3F,0xE0,0x00,0x00,0x01,0xE0,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00},/*"A",33*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x03,0x03,0x00,0x20,0x01,0x86,0x80,0x60,0x01,0xFC,0xC0,0xC0,0x00,0xF8,0x7F,0x80,0x00,0x00,0x1F,0x00,0x00,0x00,0x00,0x00},/*"B",34*/ +{0x00,0x00,0x00,0x00,0x00,0x07,0xF8,0x00,0x00,0x3F,0xFF,0x00,0x00,0x70,0x07,0x80,0x00,0xC0,0x00,0xC0,0x01,0x00,0x00,0x40,0x03,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x01,0x00,0x00,0x40,0x01,0x80,0x00,0xC0,0x03,0xC0,0x01,0x80,0x00,0x30,0x06,0x00,0x00,0x00,0x00,0x00},/*"C",35*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0x00,0x00,0x60,0x01,0x00,0x00,0x40,0x01,0x80,0x00,0xC0,0x00,0xF0,0x07,0x80,0x00,0x7F,0xFE,0x00,0x00,0x0F,0xF8,0x00,0x00,0x00,0x00,0x00},/*"D",36*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x03,0x80,0x20,0x02,0x0F,0xE0,0x20,0x03,0x00,0x00,0x60,0x03,0xC0,0x00,0xE0,0x00,0x60,0x03,0x00,0x00,0x00,0x00,0x00},/*"E",37*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x00,0x02,0x01,0x00,0x00,0x02,0x01,0x00,0x00,0x02,0x01,0x00,0x00,0x02,0x03,0x80,0x00,0x03,0x0F,0xE0,0x00,0x03,0x00,0x00,0x00,0x03,0xC0,0x00,0x00,0x00,0x60,0x00,0x00},/*"F",38*/ +{0x00,0x00,0x00,0x00,0x00,0x07,0xF8,0x00,0x00,0x3F,0xFE,0x00,0x00,0x70,0x07,0x80,0x01,0xC0,0x01,0xC0,0x01,0x00,0x00,0x40,0x03,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x01,0x00,0x20,0x20,0x01,0x00,0x20,0x40,0x03,0xC0,0x3F,0x80,0x00,0x30,0x3F,0x80,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00},/*"G",39*/ +{0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x00,0x80,0x20,0x02,0x00,0x80,0x20,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x02,0x00,0x80,0x20,0x02,0x00,0x80,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x00,0x00,0x00,0x00},/*"H",40*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"I",41*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0E,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x01,0x02,0x00,0x00,0x01,0x02,0x00,0x00,0x03,0x02,0x00,0x00,0x06,0x03,0xFF,0xFF,0xFC,0x03,0xFF,0xFF,0xF8,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"J",42*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x00,0xC0,0x20,0x02,0x01,0x00,0x20,0x00,0x07,0x80,0x00,0x00,0x0F,0xE0,0x00,0x00,0x30,0xF8,0x00,0x02,0x60,0x3E,0x20,0x03,0x80,0x0F,0x20,0x03,0x00,0x03,0xE0,0x02,0x00,0x00,0xE0,0x02,0x00,0x00,0x20,0x00,0x00,0x00,0x20},/*"K",43*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0xE0,0x00,0x00,0x03,0x00,0x00,0x00,0x00,0x00},/*"L",44*/ +{0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xE0,0x00,0x20,0x03,0xFF,0x00,0x20,0x00,0x1F,0xF0,0x00,0x00,0x01,0xFF,0x80,0x00,0x00,0x0F,0xE0,0x00,0x00,0x1E,0x00,0x00,0x03,0xE0,0x00,0x00,0x3E,0x00,0x20,0x03,0xE0,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20},/*"M",45*/ +{0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0x80,0x00,0x20,0x03,0xF0,0x00,0x20,0x00,0xFC,0x00,0x00,0x00,0x1F,0x00,0x00,0x00,0x07,0xC0,0x00,0x00,0x01,0xF0,0x00,0x00,0x00,0x7C,0x00,0x02,0x00,0x1F,0x80,0x02,0x00,0x07,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"N",46*/ +{0x00,0x00,0x00,0x00,0x00,0x0F,0xF8,0x00,0x00,0x3F,0xFE,0x00,0x00,0xF0,0x07,0x80,0x01,0x80,0x00,0xC0,0x01,0x00,0x00,0x40,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x01,0x00,0x00,0x40,0x01,0x80,0x00,0xC0,0x00,0xF0,0x03,0x80,0x00,0x3F,0xFE,0x00,0x00,0x0F,0xF8,0x00,0x00,0x00,0x00,0x00},/*"O",47*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x00,0x80,0x20,0x02,0x00,0x80,0x20,0x02,0x00,0x80,0x00,0x02,0x00,0x80,0x00,0x02,0x00,0x80,0x00,0x02,0x00,0x80,0x00,0x03,0x01,0x80,0x00,0x01,0x83,0x00,0x00,0x00,0xFE,0x00,0x00,0x00,0x7C,0x00,0x00,0x00,0x00,0x00,0x00},/*"P",48*/ +{0x00,0x00,0x00,0x00,0x00,0x0F,0xF8,0x00,0x00,0x7F,0xFF,0x00,0x00,0xF0,0x03,0x80,0x01,0x80,0x01,0xC0,0x01,0x00,0x06,0x40,0x02,0x00,0x04,0x20,0x02,0x00,0x04,0x20,0x02,0x00,0x06,0x20,0x02,0x00,0x03,0xE0,0x01,0x00,0x00,0xF8,0x01,0x80,0x00,0x5C,0x00,0xE0,0x03,0x8C,0x00,0x3F,0xFF,0x0C,0x00,0x0F,0xFC,0x18,0x00,0x00,0x00,0x00},/*"Q",49*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x01,0x00,0x20,0x02,0x01,0x00,0x20,0x02,0x01,0x80,0x00,0x02,0x01,0xE0,0x00,0x02,0x01,0xFC,0x00,0x03,0x03,0x3F,0x80,0x01,0x86,0x07,0xE0,0x01,0xFC,0x00,0xE0,0x00,0xF8,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00},/*"R",50*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x78,0x06,0x00,0x00,0xFE,0x01,0xE0,0x01,0x86,0x00,0xC0,0x03,0x03,0x00,0x40,0x02,0x03,0x00,0x20,0x02,0x01,0x80,0x20,0x02,0x01,0x80,0x20,0x02,0x01,0xC0,0x20,0x02,0x00,0xC0,0x20,0x01,0x00,0xE0,0x60,0x01,0x80,0x70,0xC0,0x03,0xE0,0x3F,0x80,0x00,0x00,0x1F,0x00,0x00,0x00,0x00,0x00},/*"S",51*/ +{0x00,0x00,0x00,0x00,0x00,0x60,0x00,0x00,0x03,0x80,0x00,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x03,0xFF,0xFF,0xE0,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x03,0x80,0x00,0x00,0x00,0xE0,0x00,0x00,0x00,0x00,0x00,0x00},/*"T",52*/ +{0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x03,0xFF,0xFF,0x00,0x03,0xFF,0xFF,0xC0,0x02,0x00,0x00,0x40,0x02,0x00,0x00,0x60,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x02,0x00,0x00,0x40,0x02,0x00,0x00,0x80,0x03,0xFF,0xFF,0x00,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"U",53*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x03,0xC0,0x00,0x00,0x03,0xFC,0x00,0x00,0x02,0x3F,0xC0,0x00,0x00,0x03,0xF8,0x00,0x00,0x00,0x7F,0x80,0x00,0x00,0x07,0xE0,0x00,0x00,0x07,0x80,0x00,0x00,0x78,0x00,0x02,0x03,0xC0,0x00,0x02,0x3C,0x00,0x00,0x03,0xC0,0x00,0x00,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00},/*"V",54*/ +{0x02,0x00,0x00,0x00,0x03,0xC0,0x00,0x00,0x03,0xFF,0x80,0x00,0x02,0x3F,0xFE,0x00,0x02,0x00,0x7F,0xE0,0x00,0x00,0x0F,0x00,0x02,0x00,0xF0,0x00,0x03,0xEF,0x00,0x00,0x03,0xFF,0x80,0x00,0x02,0x0F,0xFE,0x00,0x00,0x00,0x3F,0xE0,0x00,0x00,0x1F,0x00,0x02,0x07,0xE0,0x00,0x03,0xF8,0x00,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00},/*"W",55*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0x80,0x00,0xE0,0x03,0xF0,0x03,0x20,0x02,0xFC,0x0C,0x20,0x02,0x1F,0x30,0x00,0x00,0x07,0xC0,0x00,0x00,0x07,0xF0,0x00,0x02,0x18,0x7C,0x00,0x02,0x60,0x1F,0x20,0x03,0x80,0x03,0xE0,0x02,0x00,0x00,0xE0,0x02,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00},/*"X",56*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x03,0xC0,0x00,0x00,0x03,0xF8,0x00,0x00,0x02,0x3E,0x00,0x20,0x02,0x0F,0xC0,0x20,0x00,0x01,0xFF,0xE0,0x00,0x00,0x7F,0xE0,0x00,0x03,0x80,0x20,0x02,0x1C,0x00,0x20,0x02,0x70,0x00,0x00,0x03,0x80,0x00,0x00,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"Y",57*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x60,0x00,0xE0,0x03,0x80,0x03,0xE0,0x03,0x00,0x0F,0xA0,0x02,0x00,0x3E,0x20,0x02,0x00,0xF8,0x20,0x02,0x03,0xE0,0x20,0x02,0x0F,0x80,0x20,0x02,0x3E,0x00,0x20,0x02,0x78,0x00,0x20,0x03,0xE0,0x00,0x60,0x03,0x80,0x00,0xE0,0x02,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"Z",58*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x1F,0xFF,0xFF,0xFC,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"[",59*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x03,0xC0,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x1E,0x00,0x00,0x00,0x03,0xC0,0x00,0x00,0x00,0xF0,0x00,0x00,0x00,0x1E,0x00,0x00,0x00,0x07,0x80,0x00,0x00,0x00,0xF0,0x00,0x00,0x00,0x3C,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"\",60*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x10,0x00,0x00,0x04,0x1F,0xFF,0xFF,0xFC,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"]",61*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"^",62*/ +{0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01},/*"_",63*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"`",64*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x80,0x00,0x01,0x8F,0xC0,0x00,0x03,0x8C,0x60,0x00,0x06,0x18,0x20,0x00,0x04,0x10,0x20,0x00,0x04,0x10,0x20,0x00,0x04,0x20,0x20,0x00,0x04,0x20,0x40,0x00,0x06,0x20,0x40,0x00,0x03,0xFF,0xC0,0x00,0x01,0xFF,0xE0,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0xC0,0x00,0x00,0x00,0x00},/*"a",65*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x03,0xFF,0xFF,0xE0,0x07,0xFF,0xFF,0xC0,0x00,0x01,0x80,0xC0,0x00,0x02,0x00,0x60,0x00,0x02,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x06,0x00,0x40,0x00,0x03,0x00,0xC0,0x00,0x01,0xFF,0x80,0x00,0x00,0xFE,0x00,0x00,0x00,0x00,0x00},/*"b",66*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x7E,0x00,0x00,0x01,0xFF,0x80,0x00,0x03,0x81,0xC0,0x00,0x02,0x00,0x40,0x00,0x06,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x06,0x00,0x20,0x00,0x03,0xC0,0x40,0x00,0x01,0xC0,0x80,0x00,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"c",67*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x7E,0x00,0x00,0x01,0xFF,0x80,0x00,0x03,0x80,0xC0,0x00,0x06,0x00,0x60,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x02,0x04,0x00,0x40,0x02,0x02,0x00,0x80,0x03,0xFF,0xFF,0xE0,0x07,0xFF,0xFF,0xC0,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x00},/*"d",68*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x7E,0x00,0x00,0x01,0xFF,0x80,0x00,0x03,0x11,0xC0,0x00,0x02,0x10,0x40,0x00,0x04,0x10,0x60,0x00,0x04,0x10,0x20,0x00,0x04,0x10,0x20,0x00,0x04,0x10,0x20,0x00,0x06,0x10,0x20,0x00,0x03,0x10,0x40,0x00,0x01,0xF0,0xC0,0x00,0x00,0x71,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"e",69*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x7F,0xFF,0xE0,0x01,0xFF,0xFF,0xE0,0x01,0x04,0x00,0x20,0x03,0x04,0x00,0x20,0x02,0x04,0x00,0x20,0x02,0x04,0x00,0x20,0x02,0x04,0x00,0x00,0x02,0x00,0x00,0x00,0x01,0xC0,0x00,0x00,0x01,0xC0,0x00,0x00},/*"f",70*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x1C,0x00,0x00,0xE3,0x3E,0x00,0x03,0xFF,0xC2,0x00,0x02,0x0C,0xC3,0x00,0x04,0x04,0xC1,0x00,0x04,0x04,0xC1,0x00,0x04,0x04,0xC1,0x00,0x04,0x04,0xC1,0x00,0x06,0x0C,0xC1,0x00,0x03,0xF8,0xC3,0x00,0x05,0xF0,0x62,0x00,0x06,0x00,0x7E,0x00,0x06,0x00,0x3C,0x00,0x00,0x00,0x00},/*"g",71*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x07,0xFF,0xFF,0xE0,0x00,0x01,0x00,0x20,0x00,0x02,0x00,0x20,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x20,0x00,0x06,0x00,0x20,0x00,0x03,0xFF,0xE0,0x00,0x01,0xFF,0xE0,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20},/*"h",72*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x03,0x87,0xFF,0xE0,0x03,0x8F,0xFF,0xE0,0x03,0x80,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"i",73*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x04,0x00,0x01,0x00,0x04,0x00,0x01,0x00,0x04,0x00,0x03,0x00,0x04,0x00,0x06,0x03,0x87,0xFF,0xFC,0x03,0x8F,0xFF,0xF8,0x03,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"j",74*/ +{0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x07,0xFF,0xFF,0xE0,0x00,0x00,0x08,0x20,0x00,0x00,0x10,0x20,0x00,0x00,0x30,0x00,0x00,0x00,0xFC,0x00,0x00,0x05,0x8E,0x00,0x00,0x07,0x07,0xA0,0x00,0x06,0x01,0xE0,0x00,0x04,0x00,0xE0,0x00,0x04,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00},/*"k",75*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x02,0x00,0x00,0x20,0x03,0xFF,0xFF,0xE0,0x07,0xFF,0xFF,0xE0,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"l",76*/ +{0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x20,0x00,0x07,0xFF,0xE0,0x00,0x0F,0xFF,0xE0,0x00,0x02,0x00,0x20,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x20,0x00,0x07,0xFF,0xE0,0x00,0x03,0xFF,0xE0,0x00,0x02,0x00,0x20,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x20,0x00,0x07,0xFF,0xE0,0x00,0x03,0xFF,0xE0,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00},/*"m",77*/ +{0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x07,0xFF,0xE0,0x00,0x0F,0xFF,0xE0,0x00,0x01,0x00,0x20,0x00,0x02,0x00,0x20,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x20,0x00,0x06,0x00,0x20,0x00,0x03,0xFF,0xE0,0x00,0x01,0xFF,0xE0,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20},/*"n",78*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x7E,0x00,0x00,0x00,0xFF,0x80,0x00,0x03,0x81,0xC0,0x00,0x02,0x00,0x40,0x00,0x06,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x06,0x00,0x20,0x00,0x02,0x00,0x40,0x00,0x03,0x81,0xC0,0x00,0x01,0xFF,0x80,0x00,0x00,0x7E,0x00,0x00,0x00,0x00,0x00},/*"o",79*/ +{0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x01,0x00,0x04,0x00,0x01,0x00,0x07,0xFF,0xFF,0x00,0x0F,0xFF,0xFF,0x00,0x01,0x00,0xC1,0x00,0x02,0x00,0x41,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x06,0x00,0x40,0x00,0x03,0x01,0xC0,0x00,0x01,0xFF,0x80,0x00,0x00,0x7E,0x00,0x00,0x00,0x00,0x00},/*"p",80*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x7E,0x00,0x00,0x01,0xFF,0x80,0x00,0x03,0x80,0xC0,0x00,0x02,0x00,0x60,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x02,0x00,0x41,0x00,0x03,0x00,0xC1,0x00,0x03,0xFF,0xFF,0x00,0x07,0xFF,0xFF,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01},/*"q",81*/ +{0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x0F,0xFF,0xE0,0x00,0x0F,0xFF,0xE0,0x00,0x00,0xC0,0x20,0x00,0x01,0x00,0x20,0x00,0x02,0x00,0x20,0x00,0x06,0x00,0x20,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00},/*"r",82*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0xE0,0x00,0x01,0xC0,0xE0,0x00,0x03,0xE0,0x40,0x00,0x06,0x30,0x20,0x00,0x04,0x30,0x20,0x00,0x04,0x18,0x20,0x00,0x04,0x18,0x20,0x00,0x04,0x18,0x20,0x00,0x04,0x0C,0x20,0x00,0x02,0x0C,0x60,0x00,0x03,0x07,0xC0,0x00,0x07,0x83,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"s",83*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x0C,0x00,0x00,0x00,0x1F,0xFF,0x80,0x00,0xFF,0xFF,0xC0,0x00,0x04,0x00,0x60,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x40,0x00,0x00,0x01,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"t",84*/ +{0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x07,0xFF,0x80,0x00,0x0F,0xFF,0xC0,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x40,0x00,0x04,0x00,0x80,0x00,0x07,0xFF,0xE0,0x00,0x0F,0xFF,0xC0,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x40},/*"u",85*/ +{0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x07,0x80,0x00,0x00,0x07,0xF0,0x00,0x00,0x04,0xFE,0x00,0x00,0x04,0x1F,0xC0,0x00,0x00,0x03,0xE0,0x00,0x00,0x03,0x80,0x00,0x00,0x1C,0x00,0x00,0x04,0x60,0x00,0x00,0x07,0x80,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"v",86*/ +{0x00,0x04,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x07,0xC0,0x00,0x00,0x07,0xFC,0x00,0x00,0x04,0x3F,0x80,0x00,0x00,0x03,0xE0,0x00,0x04,0x0F,0x80,0x00,0x06,0xF0,0x00,0x00,0x07,0xF0,0x00,0x00,0x07,0xFF,0x80,0x00,0x04,0x0F,0xE0,0x00,0x00,0x03,0x80,0x00,0x04,0x3C,0x00,0x00,0x07,0xC0,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00},/*"w",87*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x20,0x00,0x04,0x00,0x60,0x00,0x07,0x00,0xE0,0x00,0x07,0x83,0x20,0x00,0x07,0xE6,0x00,0x00,0x04,0xF8,0x00,0x00,0x00,0x3C,0x00,0x00,0x04,0x5E,0x20,0x00,0x05,0x87,0xA0,0x00,0x06,0x01,0xE0,0x00,0x04,0x00,0x60,0x00,0x04,0x00,0x20,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00},/*"x",88*/ +{0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x03,0x00,0x07,0x00,0x03,0x00,0x07,0xE0,0x01,0x00,0x04,0xF8,0x01,0x00,0x04,0x1F,0x02,0x00,0x00,0x07,0xFC,0x00,0x00,0x00,0xE0,0x00,0x00,0x07,0x00,0x00,0x04,0x38,0x00,0x00,0x07,0xC0,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00},/*"y",89*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x80,0x60,0x00,0x06,0x00,0xE0,0x00,0x04,0x03,0xE0,0x00,0x04,0x07,0xA0,0x00,0x04,0x0E,0x20,0x00,0x04,0x3C,0x20,0x00,0x04,0x70,0x20,0x00,0x05,0xE0,0x20,0x00,0x07,0x80,0x20,0x00,0x07,0x00,0x60,0x00,0x04,0x00,0xE0,0x00,0x00,0x03,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"z",90*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x01,0x40,0x00,0x07,0xFE,0x3F,0xF8,0x08,0x00,0x00,0x04,0x10,0x00,0x00,0x02,0x10,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"{",91*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"|",92*/ +{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x02,0x10,0x00,0x00,0x02,0x08,0x00,0x00,0x04,0x07,0xFE,0x3F,0xF8,0x00,0x01,0x40,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"}",93*/ +{0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x0C,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x00},/*"~",94*/ +}; +#endif diff --git a/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/Kconfig b/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/Kconfig index 2620f011c..9acaad0f8 100644 --- a/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/Kconfig +++ b/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/Kconfig @@ -2,10 +2,46 @@ menuconfig DRV_USING_OV2640 bool "ov2640 driver" default n + help + note: + The resolution and window size must follow the proportional relationship, + and the resolution value had better choose constant value(eg: 320*240), + otherwise the picture will be problematic + +if DRV_USING_OV2640 + choice + prompt "Output format" + default OV2640_JPEG_MODE + help + Select the camera output format + config OV2640_JPEG_MODE + bool "jpeg mode" + config OV2640_RGB565_MODE + bool "RGB565 mode" + endchoice + config OV2640_X_RESOLUTION_IMAGE_OUTSIZE + int "X direction resolution of outputimage" + default 240 + config OV2640_Y_RESOLUTION_IMAGE_OUTSIZE + int "Y direction resolution of outputimage" + default 240 + + config OV2640_X_IMAGE_WINDOWS_SIZE + int "X direction WINDOWS SIZE" + default 400 + comment "the value must be greater than OV2640_X_RESOLUTION_IMAGE_OUTSIZE" + + config OV2640_Y_IMAGE_WINDOWS_SIZE + int "Y direction WINDOWS SIZE" + default 400 + comment "the value must be greater than OV2640_Y_RESOLUTION_IMAGE_OUTSIZE" +endif if SOC_FAMILY_STM32 config DRV_USING_OV2640 select BSP_USING_DCMI + config OV2640_RGB565_MODE + select BSP_USING_MCU_LCD endif if BOARD_K210_EVB diff --git a/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/ov2640_source/drv_ov2640.c b/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/ov2640_source/drv_ov2640.c index 9a22c085c..7fcb1e43b 100644 --- a/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/ov2640_source/drv_ov2640.c +++ b/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/ov2640_source/drv_ov2640.c @@ -672,7 +672,11 @@ const rt_uint8_t ov2640_jpeg_reg_tbl[][2]= const rt_uint8_t ov2640_rgb565_reg_tbl[][2]= { {0xFF, 0x00}, + #ifdef SOC_FAMILY_STM32 + {0xDA, 0x09}, + #elif defined BOARD_K210_EVB {0xDA, 0x08}, + #endif {0xD7, 0x03}, {0xDF, 0x02}, {0x33, 0xa0}, @@ -1467,7 +1471,7 @@ static rt_err_t rt_ov2640_control(rt_device_t dev, int cmd, void *args) { RT_ASSERT(dev != RT_NULL); rt_err_t ret = RT_EOK; - if(cmd < IOCTRL_CAMERA_SET_DVP_RESO || cmd > IOCTRL_CAMERA_SET_EXPOSURE) + if(cmd < IOCTRL_CAMERA_OUT_SIZE_RESO || cmd > IOCTRL_CAMERA_SET_EXPOSURE) { LOG_E("CMD value should be 22 ~29"); return RT_ERROR; @@ -1475,23 +1479,24 @@ static rt_err_t rt_ov2640_control(rt_device_t dev, int cmd, void *args) int value = 0; _ioctl_shoot_para shoot_para = {0}; - #ifdef BOARD_K210_EVB - _ioctl_set_dvp_reso set_dvp_reso = {0}; - #endif + _ioctl_set_reso set_dvp_reso = {0}; + if(IOCTRL_CAMERA_START_SHOT == cmd) { shoot_para = *((_ioctl_shoot_para*)args); ret = rt_ov2640_start_shoot(shoot_para.pdata,shoot_para.length); return ret; } - #ifdef BOARD_K210_EVB - else if(IOCTRL_CAMERA_SET_DVP_RESO == cmd) + else if(IOCTRL_CAMERA_OUT_SIZE_RESO == cmd) { - set_dvp_reso =*((_ioctl_set_dvp_reso*)args); + set_dvp_reso =*((_ioctl_set_reso*)args); + #ifdef BOARD_K210_EVB dvp_set_image_size(set_dvp_reso.width, set_dvp_reso.height); + #elif defined SOC_FAMILY_STM32 + ov2640_set_image_size(set_dvp_reso.width, set_dvp_reso.height); + #endif return RT_EOK; } - #endif else { value = *((int*)args); @@ -1617,18 +1622,22 @@ static rt_err_t rt_ov2640_init(rt_device_t dev) sccb_write_reg(i2c_bus, ov2640_svga_init_reg_tbl[i][0], ov2640_svga_init_reg_tbl[i][1]); } } - ov2640_rgb565_mode(); ov2640_set_light_mode(0); ov2640_set_color_saturation(0); ov2640_set_brightness(2); ov2640_set_contrast(1); + #ifdef OV2640_RGB565_MODE + ov2640_rgb565_mode(); + #elif defined OV2640_JPEG_MODE + ov2640_jpeg_mode(); + #endif #ifdef SOC_FAMILY_STM32 LOG_I("set ov2640 jpeg mode on stm32 board"); - ov2640_jpeg_mode(); - ov2640_set_image_window_size(0, 0, jpeg_img_size_tbl[g_ov2640_reso_level][0],jpeg_img_size_tbl[g_ov2640_reso_level][1]); - ov2640_set_image_out_size(jpeg_img_size_tbl[g_ov2640_reso_level][0], jpeg_img_size_tbl[g_ov2640_reso_level][1]); - LOG_I("set image resolution is %d * %d ",jpeg_img_size_tbl[g_ov2640_reso_level][0],jpeg_img_size_tbl[g_ov2640_reso_level][1]); + ov2640_set_image_out_size(OV2640_X_RESOLUTION_IMAGE_OUTSIZE,OV2640_Y_RESOLUTION_IMAGE_OUTSIZE); + ov2640_set_image_window_size(0, 0,OV2640_X_IMAGE_WINDOWS_SIZE,OV2640_Y_IMAGE_WINDOWS_SIZE); + LOG_I("set image resolution is %d * %d ",OV2640_X_RESOLUTION_IMAGE_OUTSIZE,OV2640_Y_RESOLUTION_IMAGE_OUTSIZE); #elif defined BOARD_K210_EVB + ov2640_rgb565_mode(); ov2640_set_image_window_size(0, 0, jpeg_img_size_tbl[5][0],jpeg_img_size_tbl[5][1]); ov2640_set_image_out_size(jpeg_img_size_tbl[2][0], jpeg_img_size_tbl[2][1]); LOG_I("set ov2640 rgb565 mode on K210 board and set reselotion QVGA 320,240"); diff --git a/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/ov2640_source/drv_ov2640.h b/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/ov2640_source/drv_ov2640.h index 6d0d01cb6..76b8ba323 100644 --- a/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/ov2640_source/drv_ov2640.h +++ b/Ubiquitous/RT_Thread/app_match_rt-thread/ov2640/ov2640_source/drv_ov2640.h @@ -123,28 +123,30 @@ extern "C" { #define OV2640_SENSOR_HISTO_LOW 0x61 #define OV2640_SENSOR_HISTO_HIGH 0x62 -#ifdef BOARD_K210_EVB -#define IOCTRL_CAMERA_SET_DVP_RESO (21) // set dev resolution -#else -#define IOCTRL_CAMERA_SET_DVP_RESO (22) // same as IOCTRL_CAMERA_START_SHOT -#endif -#define IOCTRL_CAMERA_START_SHOT (22) // start shoot -#define IOCTRL_CAMERA_SET_RESO (23) //set resolution -#define IOCTRL_CAMERA_SET_LIGHT (24) //set light mode -#define IOCTRL_CAMERA_SET_COLOR (25) //set color saturation -#define IOCTRL_CAMERA_SET_BRIGHTNESS (26) //set color brightness -#define IOCTRL_CAMERA_SET_CONTRAST (27) //set contrast -#define IOCTRL_CAMERA_SET_EFFECT (28) //set effect -#define IOCTRL_CAMERA_SET_EXPOSURE (29) //set auto exposure +#define IOCTRL_CAMERA_OUT_SIZE_RESO (20) // user set specific resolution outsize +#define IOCTRL_CAMERA_SET_WINDOWS_SIZE (21) // user set specific windows outsize +#define IOCTRL_CAMERA_START_SHOT (22) // start shoot +#define IOCTRL_CAMERA_SET_RESO (23) //set common resolution eg :QQVGA 0 QCIF 1 QVGA 2 WQVGA 3 CIF 4 ………… +#define IOCTRL_CAMERA_SET_LIGHT (24) //set light mode +#define IOCTRL_CAMERA_SET_COLOR (25) //set color saturation +#define IOCTRL_CAMERA_SET_BRIGHTNESS (26) //set color brightness +#define IOCTRL_CAMERA_SET_CONTRAST (27) //set contrast +#define IOCTRL_CAMERA_SET_EFFECT (28) //set effect +#define IOCTRL_CAMERA_SET_EXPOSURE (29) //set auto exposure + -#ifdef BOARD_K210_EVB typedef struct { uint32_t width; // width The width of image uint32_t height; // height The height of image -}_ioctl_set_dvp_reso; -#endif +}_ioctl_set_reso; + +typedef struct +{ + uint32_t width; // width The width of image + uint32_t height; // height The height of image +}_ioctl_ov2640_set_win_size; struct camera_device