Merge branch 'prepare_for_master' of https://git.trustie.net/xuos/xiuos into prepare_for_master

This commit is contained in:
wgz-code 2022-03-02 11:24:53 +08:00
commit 5ba501eae7
2404 changed files with 157297 additions and 31022 deletions

2
.gitmodules vendored
View File

@ -1,7 +1,7 @@
[submodule "Ubiquitous/RT_Thread/rt-thread"]
path = Ubiquitous/RT_Thread/rt-thread
url = https://code.gitlink.org.cn/chunyexixiaoyu/rt-thread.git
[submodule "Ubiquitous/RT_Thread/bsp/k210/kendryte-sdk/kendryte-sdk-source"]
[submodule "Ubiquitous/RT_Thread/aiit_board/k210/kendryte-sdk/kendryte-sdk-source"]
path = Ubiquitous/RT_Thread/aiit_board/k210/kendryte-sdk/kendryte-sdk-source
url = https://code.gitlink.org.cn/chunyexixiaoyu/kendryte-sdk-source.git
[submodule "Ubiquitous/Nuttx/apps"]

View File

@ -7,7 +7,7 @@ ifeq ($(CONFIG_ADD_NUTTX_FETURES),y)
endif
ifeq ($(CONFIG_ADD_XIUOS_FETURES),y)
ifeq ($(CONFIG_ADD_XIZI_FETURES),y)
SRC_DIR := general_functions app_test
SRC_FILES := main.c framework_init.c

View File

@ -12,7 +12,7 @@ menu "test app"
bool "Config test adc"
default n
if USER_TEST_ADC
if ADD_XIUOS_FETURES
if ADD_XIZI_FETURES
config ADC_DEV_DRIVER
string "Set ADC dev path"
default "/dev/adc1_dev"
@ -23,7 +23,7 @@ menu "test app"
bool "Config test dac"
default n
if USER_TEST_DAC
if ADD_XIUOS_FETURES
if ADD_XIZI_FETURES
config DAC_DEV_DRIVER
string "Set DAC dev path"
default "/dev/dac_dev"

View File

@ -31,7 +31,7 @@ void test_adc()
adc_fd = PrivOpen(ADC_DEV_DRIVER, O_RDWR);
if (adc_fd < 0) {
KPrintf("open adc fd error %d\n", adc_fd);
printf("open adc fd error %d\n", adc_fd);
return;
}
@ -39,7 +39,7 @@ void test_adc()
ioctl_cfg.ioctl_driver_type = ADC_TYPE;
ioctl_cfg.args = &adc_channel;
if (0 != PrivIoctl(adc_fd, OPE_CFG, &ioctl_cfg)) {
KPrintf("ioctl adc fd error %d\n", adc_fd);
printf("ioctl adc fd error %d\n", adc_fd);
PrivClose(adc_fd);
return;
}

View File

@ -42,7 +42,7 @@ Modification:
1. support spi flash open, read and write function
*************************************************/
#include <xiuos.h>
#include <xizi.h>
#include <device.h>
#include <flash_spi.h>
#include <user_api.h>

View File

@ -18,7 +18,7 @@
* @date 2021-05-29
*/
#include <transform.h>
#include <xiuos.h>
#include <xizi.h>
#include "board.h"
#include "sys_arch.h"
#include <lwip/sockets.h>

View File

@ -18,7 +18,7 @@
* @date 2021-05-29
*/
#include <transform.h>
#include <xiuos.h>
#include <xizi.h>
#include "board.h"
#include "sys_arch.h"
#include "lwip/udp.h"

View File

@ -10,7 +10,7 @@
* See the Mulan PSL v2 for more details.
*/
#include <xiuos.h>
#include <xizi.h>
#include <stdio.h>
#include <cstdlib>
using namespace std;

View File

@ -5,7 +5,7 @@ ifeq ($(CONFIG_ADD_NUTTX_FETURES),y)
include $(APPDIR)/Application.mk
endif
ifeq ($(CONFIG_ADD_XIUOS_FETURES),y)
ifeq ($(CONFIG_ADD_XIZI_FETURES),y)
SRC_FILES := double_list.c single_list.c
include $(KERNEL_ROOT)/compiler.mk
endif

View File

@ -9,7 +9,8 @@ menu "knowing app"
source "$APP_DIR/Applications/knowing_app/iris_ml_demo/Kconfig"
source "$APP_DIR/Applications/knowing_app/k210_fft_test/Kconfig"
source "$APP_DIR/Applications/knowing_app/image_processing/Kconfig"
source "$APP_DIR/Applications/knowing_app/cmsis_5_demo/Kconfig"
source "$APP_DIR/Applications/knowing_app/cmsis_5_demo/Kconfig"
source "$APP_DIR/Applications/knowing_app/nnom_demo/Kconfig"
endif
endmenu

View File

@ -1,6 +1,6 @@
menuconfig USING_CMSIS_5_DEMOAPP
bool "CMSIS-5 demo app"
depends on USING_USING_CMSIS_5_NN
depends on USING_CMSIS_5_NN
default n
if USING_CMSIS_5_DEMOAPP

View File

@ -13,6 +13,6 @@ path = [
cwd + '/demo'
]
group = DefineGroup('CMSISNN-cifar10', src, depend = ['USING_CMSIS_5_DEMOAPP'], CPPPATH = path)
group = DefineGroup('CMSISNN-cifar10', src, depend = ['USING_CMSIS_5_NN_DEMOAPP'], CPPPATH = path)
Return('group')

View File

@ -1,4 +1,4 @@
unsigned char mnist_model[] = {
const unsigned char mnist_model[] = {
0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x14, 0x00, 0x20, 0x00,
0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00,
0x18, 0x00, 0x1c, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,

View File

@ -0,0 +1,14 @@
menuconfig USING_NNOM_DEMOAPP
bool "NNOM demo app"
depends on USING_NNOM
default n
if USING_NNOM_DEMOAPP
config USING_NNOM_MNIST_DEMOAPP
bool "Using NNOM mnist demo app"
default n
endif

View File

@ -0,0 +1,14 @@
import os
Import('RTT_ROOT')
from building import *
cwd = GetCurrentDir()
objs = []
list = os.listdir(cwd)
for d in list:
path = os.path.join(cwd, d)
if os.path.isfile(os.path.join(path, 'SConscript')):
objs = objs + SConscript(os.path.join(path, 'SConscript'))
Return('objs')

View File

@ -0,0 +1,16 @@
# NNoM Mnist-simple Example
This example is from [[NNoM](https://github.com/majianjia/nnom)/**[mnist-simple](https://github.com/majianjia/nnom/tree/master/examples/mnist-simple)**] and can be deployed on Arm CPUs and RISC-V CPUs. CMSIS-NN can be used to accelerate on Arm Cortex-M CPUs.
## Requirements:
- NNoM in Framework/knowing/nnom
- To use CMSIS-NN backend, select in menuconfig "APP_Framework->Framework->support knowing framework->NNoM->Select NNoM Backend"
## To run this demo:
- Run demo by type the command
```
mnist_nnom num
```

View File

@ -0,0 +1,10 @@
import os
from building import *
cwd = GetCurrentDir()
src = Glob('*.c')
path = [cwd]
group = DefineGroup('NNOM mnist application', src, depend = ['USING_NNOM_MNIST_DEMOAPP'], CPPPATH = path)
Return('group')

View File

@ -0,0 +1,36 @@
#define IMG0 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 126, 126, 126, 126, 127, 64, 56, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 81, 126, 126, 126, 126, 126, 126, 126, 126, 109, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 28, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 89, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 126, 126, 127, 126, 114, 23, 0, 31, 89, 126, 126, 126, 127, 126, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 100, 126, 126, 126, 111, 26, 0, 0, 0, 28, 116, 126, 126, 126, 126, 107, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 126, 83, 0, 0, 0, 0, 0, 37, 116, 126, 126, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 126, 37, 0, 0, 0, 0, 0, 0, 84, 126, 126, 126, 126, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 126, 126, 126, 126, 13, 0, 0, 0, 0, 0, 0, 84, 126, 126, 126, 126, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 126, 126, 126, 127, 13, 0, 0, 0, 0, 0, 0, 84, 126, 127, 126, 126, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 126, 126, 126, 126, 13, 0, 0, 0, 0, 0, 0, 61, 126, 126, 126, 126, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 126, 68, 0, 0, 0, 0, 0, 0, 30, 126, 126, 126, 126, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 126, 112, 29, 5, 0, 0, 5, 69, 112, 126, 126, 126, 126, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 110, 126, 126, 126, 126, 126, 88, 70, 70, 89, 126, 126, 126, 126, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 126, 126, 127, 126, 126, 126, 126, 127, 126, 126, 126, 126, 127, 126, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 111, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 77, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 114, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 42, 118, 126, 126, 126, 126, 126, 126, 126, 124, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 56, 56, 64, 126, 126, 126, 70, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
#define IMG0_LABLE 0
#define IMG1 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 127, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 113, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 107, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 127, 127, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 98, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 127, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 127, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 98, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 127, 127, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 117, 126, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 126, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
#define IMG1_LABLE 1
#define IMG2 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 83, 87, 87, 47, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 78, 122, 121, 113, 113, 127, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 117, 106, 64, 16, 0, 0, 67, 112, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 120, 75, 7, 0, 0, 0, 0, 0, 93, 73, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 124, 60, 0, 0, 0, 0, 0, 0, 0, 29, 120, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 111, 30, 0, 0, 0, 0, 0, 0, 0, 0, 12, 121, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 122, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 54, 121, 114, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 28, 70, 112, 127, 125, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 101, 126, 126, 126, 127, 117, 63, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 127, 127, 127, 127, 127, 127, 127, 127, 100, 53, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 40, 27, 6, 6, 6, 29, 69, 92, 117, 127, 122, 78, 34, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 45, 94, 121, 126, 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 53, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
#define IMG2_LABLE 2
#define IMG3 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 61, 120, 108, 108, 96, 37, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 127, 127, 127, 127, 127, 127, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 58, 70, 70, 115, 127, 127, 121, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 105, 127, 127, 119, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 102, 127, 127, 117, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 116, 127, 127, 71, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 127, 127, 127, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 82, 127, 127, 124, 85, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 22, 110, 127, 127, 98, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 76, 127, 127, 94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 73, 127, 124, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 110, 127, 111, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 127, 127, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 102, 127, 127, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 69, 123, 127, 120, 63, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 44, 119, 127, 127, 127, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 69, 103, 52, 70, 125, 127, 127, 100, 63, 21, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, 127, 127, 127, 127, 127, 127, 94, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 98, 127, 127, 127, 115, 88, 38, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 110, 47, 19, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
#define IMG3_LABLE 3
#define IMG4 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 103, 126, 0, 0, 0, 0, 0, 57, 126, 111, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 123, 126, 126, 63, 0, 0, 0, 0, 56, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 55, 0, 0, 0, 0, 56, 126, 126, 99, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 0, 0, 0, 0, 0, 19, 117, 126, 126, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 0, 0, 0, 0, 0, 0, 73, 126, 126, 94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 126, 126, 126, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 81, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 100, 126, 126, 126, 0, 0, 0, 0, 0, 0, 9, 104, 126, 126, 126, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 126, 126, 126, 126, 14, 38, 84, 84, 84, 54, 9, 90, 126, 126, 126, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 107, 44, 126, 126, 126, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 120, 126, 126, 126, 127, 126, 126, 126, 126, 127, 126, 126, 126, 126, 127, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 126, 126, 126, 126, 88, 83, 75, 53, 126, 126, 126, 126, 126, 126, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 27, 27, 27, 28, 3, 0, 0, 0, 28, 27, 27, 93, 126, 126, 116, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 126, 126, 126, 101, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 94, 126, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 127, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126, 126, 126, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 126, 126, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 79, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
#define IMG4_LABLE 4
#define IMG5 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 9, 66, 95, 78, 83, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 12, 64, 95, 126, 126, 126, 126, 126, 123, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 92, 126, 126, 126, 126, 126, 126, 115, 96, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 36, 114, 126, 126, 126, 126, 126, 102, 67, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 126, 126, 126, 126, 100, 96, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 126, 126, 115, 69, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 94, 126, 126, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 126, 126, 126, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 82, 126, 126, 105, 54, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 124, 126, 126, 126, 126, 121, 94, 60, 61, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 98, 126, 126, 126, 126, 126, 126, 126, 127, 126, 80, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 54, 54, 54, 54, 105, 114, 119, 126, 126, 126, 126, 87, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 51, 109, 126, 126, 126, 112, 58, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 50, 102, 119, 126, 126, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 126, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 48, 117, 126, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 102, 102, 9, 20, 43, 76, 106, 126, 126, 114, 64, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 126, 126, 110, 117, 126, 126, 126, 126, 91, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 104, 126, 126, 126, 126, 126, 126, 115, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 65, 65, 123, 95, 65, 28, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
#define IMG5_LABLE 5
#define IMG6 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 56, 88, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 97, 121, 126, 103, 121, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 100, 126, 99, 27, 12, 99, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 53, 126, 121, 96, 14, 0, 0, 14, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 126, 126, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 72, 126, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 126, 121, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 123, 126, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 53, 126, 121, 96, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 126, 126, 107, 70, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 126, 127, 126, 126, 126, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 121, 126, 126, 88, 93, 126, 118, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 126, 126, 28, 3, 6, 93, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 117, 126, 118, 0, 0, 0, 84, 126, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 56, 0, 0, 0, 84, 126, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 126, 126, 87, 0, 0, 0, 84, 126, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 101, 126, 126, 81, 28, 9, 98, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 126, 126, 126, 116, 104, 126, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 49, 111, 126, 126, 126, 116, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126, 126, 110, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
#define IMG6_LABLE 6
#define IMG7 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 52, 122, 126, 126, 127, 126, 78, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 98, 126, 126, 126, 126, 114, 103, 126, 115, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 123, 126, 118, 105, 44, 26, 28, 123, 126, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 66, 4, 0, 0, 0, 0, 82, 126, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 126, 121, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 126, 126, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 126, 106, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 126, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 89, 126, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 126, 126, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 126, 126, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 126, 126, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78, 126, 126, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 110, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 126, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 126, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 126, 126, 104, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 126, 124, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 64, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
#define IMG7_LABLE 7
#define IMG8 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 107, 126, 127, 106, 66, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 81, 126, 126, 126, 126, 126, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 126, 127, 65, 31, 51, 96, 126, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 126, 86, 5, 0, 0, 35, 126, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 127, 126, 0, 0, 0, 0, 25, 126, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 71, 126, 85, 0, 0, 0, 0, 46, 126, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101, 127, 45, 0, 0, 0, 20, 107, 126, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 121, 116, 15, 0, 0, 20, 121, 126, 106, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 126, 117, 15, 0, 0, 86, 126, 127, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 126, 126, 86, 10, 91, 126, 126, 126, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91, 127, 126, 127, 126, 127, 86, 127, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 106, 126, 126, 126, 65, 5, 65, 126, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 126, 127, 25, 0, 0, 25, 126, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 126, 86, 5, 0, 0, 25, 126, 111, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 126, 0, 0, 0, 0, 25, 126, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 126, 126, 0, 0, 0, 0, 46, 126, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 127, 126, 0, 0, 5, 86, 127, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 126, 126, 20, 20, 86, 126, 106, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 117, 126, 127, 126, 127, 86, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 126, 126, 106, 45, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
#define IMG8_LABLE 8
#define IMG9 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 59, 109, 120, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 49, 116, 121, 127, 127, 127, 120, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 80, 127, 127, 114, 89, 96, 127, 127, 83, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 78, 127, 127, 70, 20, 0, 19, 117, 127, 127, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 127, 127, 82, 1, 0, 0, 0, 36, 127, 127, 110, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 127, 123, 25, 0, 0, 0, 0, 18, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 127, 120, 0, 0, 0, 0, 0, 83, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 127, 90, 0, 0, 0, 0, 0, 15, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 124, 127, 75, 0, 0, 0, 0, 0, 14, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 102, 127, 126, 103, 36, 22, 0, 0, 10, 112, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 119, 127, 127, 127, 115, 93, 93, 86, 83, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 121, 127, 127, 127, 127, 127, 127, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 55, 84, 124, 124, 127, 127, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 76, 93, 127, 127, 127, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 75, 127, 127, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 127, 127, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 127, 127, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 127, 127, 92, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 122, 127, 127, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 127, 127, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
#define IMG9_LABLE 9
#define TOTAL_IMAGE 10
static const int8_t img[10][784] = {IMG0,IMG1,IMG2,IMG3,IMG4,IMG5,IMG6,IMG7,IMG8,IMG9};
static const int8_t label[10] = {IMG0_LABLE,IMG1_LABLE,IMG2_LABLE,IMG3_LABLE,IMG4_LABLE,IMG5_LABLE,IMG6_LABLE,IMG7_LABLE,IMG8_LABLE,IMG9_LABLE};

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2018-2020, Jianjia Ma
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-03-29 Jianjia Ma first implementation
*/
#include <stdio.h>
#include <transform.h>
#include "nnom.h"
#include "image.h"
#include "weights.h"
nnom_model_t *model;
const char codeLib[] = "@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\\|()1{}[]?-_+~<>i!lI;:,\"^`'. ";
void print_img(int8_t * buf)
{
for(int y = 0; y < 28; y++)
{
for (int x = 0; x < 28; x++)
{
int index = 69 / 127.0 * (127 - buf[y*28+x]);
if(index > 69) index =69;
if(index < 0) index = 0;
printf("%c",codeLib[index]);
printf("%c",codeLib[index]);
}
printf("\n");
}
}
// Do simple test using image in "image.h" with model created previously.
void mnist_nnom(int argc, char **argv)
{
model = nnom_model_create();
uint32_t tick, time;
uint32_t predic_label;
float prob;
int32_t index = atoi(argv[1]);
if (index >= TOTAL_IMAGE || argc != 2)
{
printf("Please input image number within %d\n", TOTAL_IMAGE - 1);
return;
}
printf("\nprediction start.. \n");
#ifdef __RT_THREAD_H__
tick = rt_tick_get();
#endif
memcpy(nnom_input_data, (int8_t *)&img[index][0], 784);
nnom_predict(model, &predic_label, &prob);
#ifdef __RT_THREAD_H__
time = rt_tick_get() - tick;
#endif
// print original image to console
print_img((int8_t *)&img[index][0]);
#ifdef __RT_THREAD_H__
printf("Time: %d tick\n", time);
#endif
printf("Truth label: %d\n", label[index]);
printf("Predicted label: %d\n", predic_label);
printf("Probability: %d%%\n", (int)(prob * 100));
}
#ifdef __RT_THREAD_H__
MSH_CMD_EXPORT(mnist_nnom, nnom mnist demo and image number should be followed);
#endif

View File

@ -0,0 +1,166 @@
'''
Copyright (c) 2018-2020
Jianjia Ma
majianjia@live.com
SPDX-License-Identifier: Apache-2.0
Change Logs:
Date Author Notes
2019-02-12 Jianjia Ma The first version
'''
import matplotlib.pyplot as plt
import sys
import os
nnscript = os.path.abspath('../../../Framework/knowing/nnom/scripts')
sys.path.append(nnscript)
from tensorflow.keras import *
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import *
from tensorflow.keras.models import load_model, save_model
import tensorflow as tf
import numpy as np
from nnom import *
model_name = 'mnist_simple_trained_model.h5'
def image_to_cfile(data, label, num_of_image, file='image.h'):
with open(file, 'w') as f:
for i in range(num_of_image):
selected = np.random.randint(0, 1000) # select 10 out of 1000.
f.write('#define IMG%d {'% (i))
np.round(data[selected]).flatten().tofile(f, sep=", ", format="%d") # convert 0~1 to 0~127
f.write('} \n')
f.write('#define IMG%d_LABLE'% (i))
f.write(' %d \n \n' % label[selected])
f.write('#define TOTAL_IMAGE %d \n \n'%(num_of_image))
f.write('static const int8_t img[%d][%d] = {' % (num_of_image, data[0].flatten().shape[0]))
f.write('IMG0')
for i in range(num_of_image -1):
f.write(',IMG%d'%(i+1))
f.write('};\n\n')
f.write('static const int8_t label[%d] = {' % (num_of_image))
f.write('IMG0_LABLE')
for i in range(num_of_image -1):
f.write(',IMG%d_LABLE'%(i+1))
f.write('};\n\n')
def train(x_train, y_train, x_test, y_test, batch_size=64, epochs=100):
inputs = Input(shape=x_train.shape[1:])
x = Conv2D(12, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
x = ReLU()(x)
x = MaxPool2D((2,2),strides=(2,2), padding="same")(x)
x = Conv2D(24 ,kernel_size=(3,3), strides=(1,1), padding="same")(x)
x = ReLU()(x)
x = MaxPool2D((2,2),strides=(2,2), padding="same")(x)
x = Conv2D(48, kernel_size=(3,3), strides=(1,1), padding="same")(x)
x = ReLU()(x)
x = Dropout(0.2)(x)
x = MaxPool2D((2,2),strides=(2,2), padding="same")(x)
x = Flatten()(x)
x = Dense(96)(x)
x = Dropout(0.2)(x)
x = ReLU()(x)
x = Dense(10)(x)
predictions = Softmax()(x)
model = Model(inputs=inputs, outputs=predictions)
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=2,
validation_data=(x_test, y_test),
shuffle=True)
# free the session to avoid nesting naming while we load the best model after.
save_model(model, model_name)
del model
tf.keras.backend.clear_session()
return history
if __name__ == "__main__":
epochs = 2
num_classes = 10
# The data, split between train and test sets:
(x_train, y_train_num), (x_test, y_test_num) = mnist.load_data()
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# Convert class vectors to binary class matrices.
y_train = tf.keras.utils.to_categorical(y_train_num, num_classes)
y_test = tf.keras.utils.to_categorical(y_test_num, num_classes)
# reshape to 4 d becaue we build for 4d?
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)
print('x_train shape:', x_train.shape)
# quantize the range to 0~255 -> 0~1
x_test = x_test/255
x_train = x_train/255
print("data range", x_test.min(), x_test.max())
# select a few image and write them to image.h
image_to_cfile(x_test*127, y_test_num, 10, file='image.h')
# train model, save the best accuracy model
history = train(x_train, y_train, x_test, y_test, batch_size=64, epochs=epochs)
# reload best model
model = load_model(model_name)
# evaluate
evaluate_model(model, x_test, y_test)
# save weight
generate_model(model, np.vstack((x_train, x_test)), name="weights.h")
# plot
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
plt.plot(range(0, epochs), acc, color='red', label='Training acc')
plt.plot(range(0, epochs), val_acc, color='green', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

File diff suppressed because one or more lines are too long

View File

@ -51,7 +51,7 @@ ifeq ($(CONFIG_ADD_NUTTX_FETURES),y)
endif
ifeq ($(CONFIG_ADD_XIUOS_FETURES),y)
ifeq ($(CONFIG_ADD_XIZI_FETURES),y)
SRC_FILES :=
ifeq ($(CONFIG_APPLICATION_SENSOR_HCHO_TB600B_WQ_HCHO1OS), y)

View File

@ -18,7 +18,7 @@
* @date 2021.12.10
*/
#ifdef ADD_XIUOS_FETURES
#ifdef ADD_XIZI_FETURES
# include <user_api.h>
#endif
#include <sensor.h>

View File

@ -18,7 +18,7 @@
* @date 2021.04.23
*/
#ifdef ADD_XIUOS_FETURES
#ifdef ADD_XIZI_FETURES
# include <user_api.h>
#endif
#include <sensor.h>

View File

@ -18,7 +18,7 @@
* @date 2021.12.15
*/
#ifdef ADD_XIUOS_FETURES
#ifdef ADD_XIZI_FETURES
# include <user_api.h>
#endif
#include <sensor.h>

View File

@ -18,7 +18,7 @@
* @date 2021.04.23
*/
#ifdef ADD_XIUOS_FETURES
#ifdef ADD_XIZI_FETURES
# include <user_api.h>
#endif

View File

@ -18,7 +18,7 @@
* @date 2021.12.14
*/
#ifdef ADD_XIUOS_FETURES
#ifdef ADD_XIZI_FETURES
# include <user_api.h>
#endif
#include <sensor.h>

View File

@ -18,7 +18,7 @@
* @date 2021.04.23
*/
#ifdef ADD_XIUOS_FETURES
#ifdef ADD_XIZI_FETURES
# include <user_api.h>
#endif

View File

@ -18,7 +18,7 @@
* @date 2021.04.23
*/
#ifdef ADD_XIUOS_FETURES
#ifdef ADD_XIZI_FETURES
# include <user_api.h>
#endif

View File

@ -18,7 +18,7 @@
* @date 2021.04.23
*/
#ifdef ADD_XIUOS_FETURES
#ifdef ADD_XIZI_FETURES
# include <user_api.h>
#endif

View File

@ -18,7 +18,7 @@
* @date 2021.04.23
*/
#ifdef ADD_XIUOS_FETURES
#ifdef ADD_XIZI_FETURES
# include <user_api.h>
#endif

View File

@ -18,7 +18,7 @@
* @date 2021.12.15
*/
#ifdef ADD_XIUOS_FETURES
#ifdef ADD_XIZI_FETURES
# include <user_api.h>
#endif
#include <sensor.h>

View File

@ -5,10 +5,10 @@ menu "Framework"
default y
choice
prompt "select os features"
default ADD_XIUOS_FETURES
default ADD_XIZI_FETURES
config ADD_XIUOS_FETURES
bool "add xiuos fetures"
config ADD_XIZI_FETURES
bool "add xizi fetures"
config ADD_NUTTX_FETURES
bool "add nuttx fetures"

View File

@ -2,7 +2,7 @@ config ADAPTER_4G_EC200T
string "EC200T adapter name"
default "ec200t"
if ADD_XIUOS_FETURES
if ADD_XIZI_FETURES
config ADAPTER_EC200T_PWRKEY
int "EC200T PWRKEY pin number"
default "97"

View File

@ -2,7 +2,7 @@ config ADAPTER_BLUETOOTH_HC08
string "HC08 adapter name"
default "hc08"
if ADD_XIUOS_FETURES
if ADD_XIZI_FETURES
config ADAPTER_HC08_RECV_BUFFER_SIZE
int "HC08 recv data buffer size"
default "128"

View File

@ -2,7 +2,7 @@ config ADAPTER_ETHERNET_HFA21
string "HFA21 ETHERNET adapter name"
default "hfa21_ethernet"
if ADD_XIUOS_FETURES
if ADD_XIZI_FETURES
config ADAPTER_HFA21_DRIVER_EXTUART
bool "Using extra uart to support ethernet"

View File

@ -333,7 +333,7 @@ static int Hfa21EthernetConnect(struct Adapter *adapter, enum NetRoleType net_ro
{
int ret = 0;
char hfa21_ethernet_cmd[128];
char net_role_string[6] = {0};
char net_role_string[7] = {0};
/*Step1 : enter AT mode*/
Hfa21EthernetInitAtCmd(adapter->agent);

View File

@ -2,7 +2,7 @@ config ADAPTER_LORA_SX1278
string "SX1278 adapter name"
default "sx1278"
if ADD_XIUOS_FETURES
if ADD_XIZI_FETURES
config ADAPTER_SX1278_DRIVER
string "SX1278 device spi driver path"
default "/dev/spi2_lora"

View File

@ -2,7 +2,7 @@ config ADAPTER_NBIOT_BC28
string "BC28 adapter name"
default "bc28"
if ADD_XIUOS_FETURES
if ADD_XIZI_FETURES
config ADAPTER_BC28_RESETPIN
int "BC28 RESET pin number"
default "100"

View File

@ -2,7 +2,7 @@ config ADAPTER_WIFI_HFA21
string "HFA21 WIFI adapter name"
default "hfa21_wifi"
if ADD_XIUOS_FETURES
if ADD_XIZI_FETURES
config ADAPTER_HFA21_DRIVER_EXTUART
bool "Using extra uart to support wifi"

View File

@ -17,7 +17,7 @@ choice
endchoice
if ADD_XIUOS_FETURES
if ADD_XIZI_FETURES
config ADAPTER_E18_DRIVER_EXTUART
bool "Using extra uart to support zigbee"

View File

@ -17,8 +17,9 @@
* @author AIIT XUOS Lab
* @date 2021.12.15
*/
#ifdef USING_CONTROL_PLC_OPCUA
#include "../interoperability/opcua/open62541.h"
#endif
#include "plc.h"

View File

@ -1,13 +1,14 @@
menuconfig SUPPORT_KNOWING_FRAMEWORK
bool "support knowing framework"
default n
select TRANSFORM_LAYER_ATTRIUBUTE
if SUPPORT_KNOWING_FRAMEWORK
source "$APP_DIR/Framework/knowing/tensorflow-lite/Kconfig"
source "$APP_DIR/Framework/knowing/filter/Kconfig"
source "$APP_DIR/Framework/knowing/ota/Kconfig"
source "$APP_DIR/Framework/knowing/image_processing/Kconfig"
source "$APP_DIR/Framework/knowing/cmsis_5/Kconfig"
source "$APP_DIR/Framework/knowing/kpu/Kconfig"
endif
menuconfig SUPPORT_KNOWING_FRAMEWORK
bool "support knowing framework"
default n
select TRANSFORM_LAYER_ATTRIUBUTE
if SUPPORT_KNOWING_FRAMEWORK
source "$APP_DIR/Framework/knowing/tensorflow-lite/Kconfig"
source "$APP_DIR/Framework/knowing/filter/Kconfig"
source "$APP_DIR/Framework/knowing/ota/Kconfig"
source "$APP_DIR/Framework/knowing/image_processing/Kconfig"
source "$APP_DIR/Framework/knowing/cmsis_5/Kconfig"
source "$APP_DIR/Framework/knowing/kpu/Kconfig"
source "$APP_DIR/Framework/knowing/nnom/Kconfig"
endif

View File

@ -4,11 +4,11 @@ menuconfig USING_CMSIS_5
if USING_CMSIS_5
menuconfig USING_USING_CMSIS_5_NN
menuconfig USING_CMSIS_5_NN
bool "CMSIS-5 NN"
default n
if USING_USING_CMSIS_5_NN
if USING_CMSIS_5_NN
config USING_CMSIS_5_NN_ACTIVATION
bool "CMSIS-5 NN ACTIVATION"

View File

@ -27,7 +27,8 @@
* Target Processor: Cortex-M
*
* -------------------------------------------------------------------- */
#include <inttypes.h>
#include "../../../Core/Include/cmsis_gcc.h"
#include "arm_nnsupportfunctions.h"
/**

View File

@ -27,7 +27,8 @@
* Target Processor: Cortex-M
*
* -------------------------------------------------------------------- */
#include <inttypes.h>
#include "../../../Core/Include/cmsis_gcc.h"
#include "arm_nnsupportfunctions.h"
/**

View File

@ -28,7 +28,8 @@
* Target Processor: Cortex-M
*
* -------------------------------------------------------------------- */
#include <inttypes.h>
#include "../../../Core/Include/cmsis_gcc.h"
#include "arm_nnsupportfunctions.h"
/**

View File

@ -8,7 +8,7 @@ CPPPATH = []
CPPPATH += [os.path.join(cwd, 'Core/Include')]
if GetDepend('USING_USING_CMSIS_5_NN'):
if GetDepend('USING_CMSIS_5_NN'):
CPPPATH += [os.path.join(cwd, 'DSP/Include')]
CPPPATH += [os.path.join(cwd, 'NN/Include')]
CPPDEFINES += ['__FPU_PRESENT=1']

View File

@ -46,8 +46,8 @@ void k210_detect(char *json_file_path)
printf("open ov2640 fail !!");
return;
}
_ioctl_set_dvp_reso set_dvp_reso = {detect_params.sensor_output_size[1], detect_params.sensor_output_size[0]};
ioctl(g_fd, IOCTRL_CAMERA_SET_DVP_RESO, &set_dvp_reso);
_ioctl_set_reso set_dvp_reso = {detect_params.sensor_output_size[1], detect_params.sensor_output_size[0]};
ioctl(g_fd, IOCTRL_CAMERA_OUT_SIZE_RESO, &set_dvp_reso);
showbuffer =
(unsigned char *)rt_malloc_align(detect_params.sensor_output_size[0] * detect_params.sensor_output_size[1] * 2, 64);
if (NULL == showbuffer) {
@ -199,6 +199,8 @@ static void *thread_detect_entry(void *parameter)
/* display result */
for (int cnt = 0; cnt < detect_info.obj_number; cnt++) {
detect_info.obj[cnt].y1 += (detect_params.sensor_output_size[0] - detect_params.net_input_size[0])/2;
detect_info.obj[cnt].y2 += (detect_params.sensor_output_size[0] - detect_params.net_input_size[0])/2;
draw_edge((uint32_t *)showbuffer, &detect_info, cnt, 0xF800, (uint16_t)detect_params.sensor_output_size[1],
(uint16_t)detect_params.sensor_output_size[0]);
printf("%d: (%d, %d, %d, %d) cls: %s conf: %f\t", cnt, detect_info.obj[cnt].x1, detect_info.obj[cnt].y1,

View File

@ -0,0 +1,46 @@
menuconfig USING_NNOM
bool "NNOM"
default n
if USING_NNOM
config NNOM_USING_STATIC_MEMORY
bool "Using static memory"
default n
help
must set buf using "nnom_set_static_buf()" before creating a model.
config NNOM_TRUNCATE
bool "Using NNOM Truncate"
default n
help
disable: backend ops use round to the nearest int (default). enable: floor
choice
prompt "Select NNOM Format"
default NNOM_USING_HWC
config NNOM_USING_HWC
bool "Using HWC Format"
config NNOM_USING_CHW
bool "Using CHW Format"
help
CHW is incompatible with CMSIS-NN and must be used when using hardware accelerator such as KPU in K210 chip
endchoice
choice
prompt "Select NNOM Backend"
default USING_NNOM_NORMAL
config NNOM_USING_LOCAL
bool "Using NNOM local backend"
config NNOM_USING_CMSIS_NN
bool "Using CMSIS-NN backend"
select USING_CMSIS_5
select USING_CMSIS_5_NN
endchoice
endif

View File

@ -0,0 +1,14 @@
# Neural Network on Microcontroller (NNoM)
NNoM is a high-level inference Neural Network library specifically for microcontrollers, released under Apache License 2.0.
Current version is 0.4.3. More information available in [NNOM](https://github.com/majianjia/nnom).
## CMSIS-NN Backend
[CMSIS-NN/DSP](https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN) is an inference acceleration libraries for Arm Cortex-M CPUs and can be used as the backend of NNoM for high performance.
## Notes
- CHW format is incompatible with CMSIS-NN and must be used when using hardware accelerator such as KPU in K210 chip.
- Static memory buffer must be set by using "nnom_set_static_buf()" before creating a model.

View File

@ -0,0 +1,18 @@
import os
from building import *
cwd = GetCurrentDir()
src = []
CPPDEFINES = []
CPPPATH = []
src += Glob('src/core/*.c')
src += Glob('src/layers/*.c')
src += Glob('src/backends/*.c')
CPPPATH+=['%s/inc'%(cwd), '%s/port'%(cwd)]
group = DefineGroup('nnom', src, depend = ['USING_NNOM'], CPPPATH = CPPPATH, LOCAL_CPPDEFINES=CPPDEFINES)
Return('group')

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_ACTIVATION_H__
#define __NNOM_ACTIVATION_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// activation layer
typedef struct _nnom_activation_layer_t
{
nnom_layer_t super;
nnom_activation_t *act;
} nnom_activation_layer_t;
// activation with fixed q format (tanh and sigmoid)
typedef struct _nnom_activation_fixed_q_t
{
nnom_activation_t super;
uint8_t dec_bit;
} nnom_activation_fixed_q_t;
// leaky relu
typedef struct _nnom_activation_leaky_relu_t
{
nnom_activation_t super;
q7_t alpha; // alpha is present by q0.7 format. (-128 = -1)
} nnom_activation_leaky_relu_t;
// advance relu (full ReLU)
typedef struct _nnom_activation_adv_relu_t
{
nnom_activation_t super;
q7_t negative_slope; // negative_slope is present by q0.7 format. (-128 = -1)
float max; // cap of the max value
float threshold; // threshold
} nnom_activation_adv_relu_t;
// method
nnom_status_t activation_run(nnom_layer_t* layer);
nnom_status_t activation_free(nnom_layer_t *layer);
// activation delete
void act_delete(nnom_activation_t* act);
// a direct api on tensor
nnom_status_t act_tensor_run(nnom_activation_t* act, nnom_tensor_t* tensor);
// Layer API
nnom_layer_t *Activation(nnom_activation_t *act);
nnom_layer_t *ReLU(void);
nnom_layer_t *LeakyReLU(float alpha);
nnom_layer_t *AdvReLU(float alpha, float max, float threshold);
nnom_layer_t *Sigmoid(int32_t dec_bit);
nnom_layer_t *TanH(int32_t dec_bit);
// Activation API.
nnom_activation_t* act_relu(void);
nnom_activation_t* act_leaky_relu(float alpha);
nnom_activation_t* act_adv_relu(float negative_slope, float max, float threshold);
nnom_activation_t* act_tanh(int32_t dec_bit);
nnom_activation_t* act_sigmoid(int32_t dec_bit);
nnom_activation_t* act_hard_tanh(int32_t dec_bit);
nnom_activation_t* act_hard_sigmoid(int32_t dec_bit);
// utils
int32_t act_get_dec_bit(nnom_activation_type_t type, int32_t dec_bit);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_ACTIVATION_H__ */

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_AVGPOOL_H__
#define __NNOM_AVGPOOL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_maxpool.h"
// Avg Pooling
typedef nnom_maxpool_layer_t nnom_avgpool_layer_t;
// method
nnom_status_t avgpooling_build(nnom_layer_t *layer);
nnom_status_t avgpool_run(nnom_layer_t *layer);
// API
nnom_layer_t *avgpool_s(const nnom_pool_config_t * config);
nnom_layer_t *AvgPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_AVGPOOL_H__ */

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_BASELAYER_H__
#define __NNOM_BASELAYER_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_input.h"
// method
nnom_status_t default_build(nnom_layer_t *layer);
nnom_status_t default_run(nnom_layer_t *layer);
// API
nnom_layer_t *baselayer_s(const nnom_layer_config_t * config);
nnom_layer_t *BaseLayer(void);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_BASELAYER_H__ */

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_CONCAT_H__
#define __NNOM_CONCAT_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// concatenate layer
typedef struct _nnom_concat_layer
{
nnom_layer_t super;
int8_t axis;
} nnom_concat_layer_t;
typedef struct _nnom_concat_config_t
{
nnom_layer_config_t super;
int8_t axis;
} nnom_concat_config_t;
// method
nnom_status_t concat_build(nnom_layer_t *layer);
nnom_status_t concat_run(nnom_layer_t *layer);
// API
nnom_layer_t *concat_s(const nnom_concat_config_t *config);
nnom_layer_t *Concat(int8_t axis);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_CONCAT_H__ */

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_CONV2D_H__
#define __NNOM_CONV2D_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// child layers parameters
typedef struct _nnom_conv2d_layer_t
{
nnom_layer_t super;
nnom_3d_shape_t kernel;
nnom_3d_shape_t stride;
nnom_3d_shape_t pad;
nnom_3d_shape_t dilation;
nnom_padding_t padding_type;
uint32_t filter_mult; // filter size (for conv) or multilplier (for depthwise)
nnom_tensor_t *weight;
nnom_tensor_t *bias;
// test
nnom_qformat_param_t * output_rshift;
nnom_qformat_param_t * bias_lshift;
} nnom_conv2d_layer_t;
// a machine interface for configuration
typedef struct _nnom_conv2d_config_t
{
nnom_layer_config_t super;
nnom_qtype_t qtype; //quantisation type(per channel or per layer)
nnom_tensor_t *weight;
nnom_tensor_t *bias;
nnom_qformat_param_t *output_shift;
nnom_qformat_param_t *bias_shift;
uint32_t filter_size;
int8_t kernel_size[2];
int8_t stride_size[2];
int8_t padding_size[2];
int8_t dilation_size[2];
nnom_padding_t padding_type;
} nnom_conv2d_config_t;
// method
nnom_status_t conv2d_run(nnom_layer_t *layer);
nnom_status_t conv2d_build(nnom_layer_t *layer);
nnom_status_t conv2d_free(nnom_layer_t *layer);
// utils
uint32_t conv_output_length(uint32_t input_length, uint32_t filter_size, nnom_padding_t padding, uint32_t stride, uint32_t dilation);
// API
nnom_layer_t *conv2d_s(const nnom_conv2d_config_t *config);
nnom_layer_t *Conv2D(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
const nnom_weight_t *w, const nnom_bias_t *b);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_CONV2D_H__ */

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-30 Jianjia Ma The first version
*/
#ifndef __NNOM_DECONV2D_H__
#define __NNOM_DECONV2D_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_conv2d.h"
// child layers parameters
typedef nnom_conv2d_layer_t nnom_conv2d_trans_layer_t;
typedef nnom_conv2d_config_t nnom_conv2d_trans_config_t;
// method
nnom_status_t conv2d_trans_run(nnom_layer_t *layer);
nnom_status_t conv2d_trans_build(nnom_layer_t *layer);
// utils
uint32_t conv_trans_output_length(uint32_t input_length, uint32_t filter_size, nnom_padding_t padding, uint32_t stride, uint32_t dilation);
// API
nnom_layer_t *conv2d_trans_s(const nnom_conv2d_config_t *config);
nnom_layer_t *Conv2DTrans(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
const nnom_weight_t *w, const nnom_bias_t *b);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_DECONV2D_H__ */

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_CROPPING_H__
#define __NNOM_CROPPING_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_zero_padding.h"
// Cropping, same as zeropadding
typedef nnom_zero_padding_layer_t nnom_cropping_layer_t;
typedef nnom_zero_padding_config_t nnom_cropping_config_t;
// method
nnom_status_t cropping_build(nnom_layer_t *layer);
nnom_status_t cropping_run(nnom_layer_t *layer);
// API
nnom_layer_t * cropping_s(const nnom_cropping_config_t *config);
nnom_layer_t *Cropping(nnom_border_t pad);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_CROPPING_H__ */

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_DENSE_H__
#define __NNOM_DENSE_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
typedef struct _nnom_dense_layer_t
{
nnom_layer_t super;
size_t output_unit;
nnom_tensor_t *weight;
nnom_tensor_t *bias;
nnom_qformat_param_t *output_rshift;
nnom_qformat_param_t *bias_lshift;
} nnom_dense_layer_t;
// a machine interface for configuration
typedef struct _nnom_dense_config_t
{
nnom_layer_config_t super;
nnom_qtype_t qtype; //quantisation type(per channel or per layer)
nnom_tensor_t *weight;
nnom_tensor_t *bias;
nnom_qformat_param_t *output_shift;
nnom_qformat_param_t *bias_shift;
} nnom_dense_config_t;
// method
nnom_status_t dense_free(nnom_layer_t *layer);
nnom_status_t dense_build(nnom_layer_t *layer);
nnom_status_t dense_run(nnom_layer_t *layer);
// API
nnom_layer_t *dense_s(const nnom_dense_config_t *config);
nnom_layer_t *Dense(size_t output_unit, const nnom_weight_t *w, const nnom_bias_t *b);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_DENSE_H__ */

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_DW_CONV2D_H__
#define __NNOM_DW_CONV2D_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_conv2d.h"
// method
nnom_status_t dw_conv2d_build(nnom_layer_t *layer);
nnom_status_t dw_conv2d_run(nnom_layer_t *layer);
//API
nnom_layer_t *dw_conv2d_s(const nnom_conv2d_config_t *config);
nnom_layer_t *DW_Conv2D(uint32_t multiplier, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
const nnom_weight_t *w, const nnom_bias_t *b);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_DW_CONV2D_H__ */

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_FLATTEN_H__
#define __NNOM_FLATTEN_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// no special parameters but we need it.
typedef struct _nnom_flatten_config_t{
nnom_layer_config_t super;
} nnom_flatten_config_t;
// method
nnom_status_t flatten_build(nnom_layer_t *layer);
nnom_status_t flatten_run(nnom_layer_t *layer);
// API
nnom_layer_t *flatten_s(const nnom_flatten_config_t *config);
nnom_layer_t *Flatten(void);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_FLATTEN_H__ */

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_GLOBAL_POOL_H__
#define __NNOM_GLOBAL_POOL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_maxpool.h"
typedef struct _nnom_global_pool_config_t
{
nnom_layer_config_t super;
int16_t output_shift;
}nnom_global_pool_config_t;
// method
nnom_status_t global_pool_build(nnom_layer_t *layer);
// API
nnom_layer_t * global_maxpool_s(const nnom_global_pool_config_t *config);
nnom_layer_t * global_avgpool_s(const nnom_global_pool_config_t *config);
nnom_layer_t * global_sumpool_s(const nnom_global_pool_config_t *config);
nnom_layer_t *GlobalMaxPool(void);
nnom_layer_t *GlobalAvgPool(void);
nnom_layer_t *GlobalSumPool(void);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_GLOBAL_POOL_H__ */

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-08-27 Jianjia Ma The first version
*/
#ifndef __NNOM_GRU_CELL_H__
#define __NNOM_GRU_CELL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "nnom_rnn.h"
#include "nnom_activation.h"
typedef struct _nnom_gru_cell_config_t
{
nnom_layer_config_t super;
nnom_tensor_t *weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t *bias;
nnom_qformat_param_t q_dec_z, q_dec_h; // z, r, h
uint16_t units;
} nnom_gru_cell_config_t;
typedef struct _nnom_gru_cell_t
{
nnom_rnn_cell_t super;
nnom_tensor_t* weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t* bias;
// decide later.
// z, r, h
nnom_qformat_param_t q_dec_z, q_dec_h;
nnom_qformat_param_t oshift_iw, oshift_hw, bias_shift;
} nnom_gru_cell_t;
// gru
nnom_rnn_cell_t *gru_cell_s(const nnom_gru_cell_config_t* config);
nnom_status_t gru_cell_free(nnom_rnn_cell_t* cell);
nnom_status_t gru_cell_build(nnom_rnn_cell_t* cell);
nnom_status_t gru_cell_run(nnom_rnn_cell_t* cell);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_GRU_CELL_H__ */

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_INPUT_H__
#define __NNOM_INPUT_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// IO layer
typedef struct _nnom_io_layer
{
nnom_layer_t super;
nnom_3d_shape_t shape;
nnom_qformat_param_t dec_bit;
void *buf; //input or output
} nnom_io_layer_t;
typedef struct _nnom_io_config_t
{
nnom_layer_config_t super;
nnom_tensor_t *tensor;
}nnom_io_config_t;
// method
nnom_status_t input_build(nnom_layer_t *layer);
nnom_status_t input_run(nnom_layer_t *layer);
// API
nnom_layer_t *input_s(const nnom_io_config_t* config);
nnom_layer_t *Input(nnom_3d_shape_t input_shape, void *p_buf);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_INPUT_H__ */

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_LAMBDA_H__
#define __NNOM_LAMBDA_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_input.h"
// lambda layer
typedef struct _nnom_lambda_layer_t
{
nnom_layer_t super;
void *parameters; // parameters for lambda
} nnom_lambda_layer_t;
// lambda layer
typedef struct _nnom_lambda_config_t
{
nnom_layer_config_t super;
nnom_status_t (*run_func_name)(nnom_layer_t *layer); // run method. required
nnom_status_t (*build_func_name)(nnom_layer_t *layer);// compute output buffer shape. can be left null, will call default_build()
nnom_status_t (*free_func_name)(nnom_layer_t *layer); // a callback to free private resources (comp buf not included) can be left null
void *parameters; // parameters for lambda
} nnom_lambda_config_t;
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_LAMBDA_H__ */

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-08-24 Jianjia Ma The first version
*/
#ifndef __NNOM_LSTM_CELL_H__
#define __NNOM_LSTM_CELL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "nnom_rnn.h"
#include "nnom_activation.h"
// a machine interface for configuration
typedef struct _nnom_lstm_cell_config_t
{
nnom_layer_config_t super;
nnom_tensor_t *weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t *bias;
nnom_qformat_param_t q_dec_z, q_dec_h, q_dec_c; // z = iw + hw, c = cell state; h=output and memory
uint16_t units;
} nnom_lstm_cell_config_t;
typedef struct _nnom_lstm_cell_t
{
nnom_rnn_cell_t super;
nnom_tensor_t* weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t* bias;
// experimental,
// iw: input x weight
// hw: hidden state x recurrent weight
// h: hidden state (memor)
// c: cell state
nnom_qformat_param_t q_dec_z, q_dec_h, q_dec_c;
nnom_qformat_param_t oshift_iw, oshift_hw, oshift_zc, bias_shift;
} nnom_lstm_cell_t;
// LSTM
nnom_rnn_cell_t *lstm_cell_s(const nnom_lstm_cell_config_t* config);
nnom_status_t lstm_cell_free(nnom_rnn_cell_t* cell);
nnom_status_t lstm_cell_q7_q15_build(nnom_rnn_cell_t* cell);
nnom_status_t lstm_cell_q7_q15_run(nnom_rnn_cell_t* cell);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_LSTM_CELL_H__ */

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_MATRIX_H__
#define __NNOM_MATRIX_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// the maximum input layer hooked to this layer
#define MAX_INPUT_LAYER 8
// matrix layer
typedef struct _nnom_matrix_layer_t
{
nnom_layer_t super;
int16_t oshift; // output right shift
} nnom_matrix_layer_t;
typedef struct _nnom_matrix_config_t
{
nnom_layer_config_t super;
int16_t output_shift; // output right shift
} nnom_matrix_config_t;
// methods
nnom_layer_t* _same_shape_matrix_layer(void);
nnom_status_t add_run(nnom_layer_t *layer);
nnom_status_t sub_run(nnom_layer_t *layer);
nnom_status_t mult_run(nnom_layer_t *layer);
// API
nnom_layer_t *add_s(const nnom_matrix_config_t * config);
nnom_layer_t *sub_s(const nnom_matrix_config_t * config);
nnom_layer_t *mult_s(const nnom_matrix_config_t * config);
nnom_layer_t *Add(int16_t oshift);
nnom_layer_t *Sub(int16_t oshift);
nnom_layer_t *Mult(int16_t oshift);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_MATRIX_H__ */

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_MAXPOOL_H__
#define __NNOM_MAXPOOL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// Max Pooling
typedef struct _nnom_maxpool_layer_t
{
nnom_layer_t super;
nnom_3d_shape_t kernel;
nnom_3d_shape_t stride;
nnom_3d_shape_t pad;
nnom_padding_t padding_type;
int16_t output_shift; // reserve
} nnom_maxpool_layer_t;
// a machine interface for configuration
typedef struct _nnom_pool_config_t
{
nnom_layer_config_t super;
nnom_padding_t padding_type;
int16_t output_shift;
int8_t kernel_size[2];
int8_t stride_size[2];
int8_t num_dim;
} nnom_pool_config_t;
// method
nnom_status_t maxpool_build(nnom_layer_t *layer);
nnom_status_t maxpool_run(nnom_layer_t *layer);
// API
nnom_layer_t *maxpool_s(const nnom_pool_config_t * config);
nnom_layer_t *MaxPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_MATRIX_H__ */

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_OUTPUT_H__
#define __NNOM_OUTPUT_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_input.h"
// method
nnom_status_t output_build(nnom_layer_t *layer);
nnom_status_t output_run(nnom_layer_t *layer);
// API
nnom_layer_t *output_s(const nnom_io_config_t* config);
nnom_layer_t *Output(nnom_3d_shape_t output_shape, void *p_buf);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_OUTPUT_H__ */

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-12-07 Jianjia Ma The first version
*/
#ifndef __NNOM_RESHAPE_H__
#define __NNOM_RESHAPE_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
typedef struct _nnom_reshape_layer_t
{
nnom_layer_t super;
nnom_shape_data_t* dim;
uint8_t num_dim;
} nnom_reshape_layer_t;
typedef struct nnom_reshape_config_t
{
nnom_layer_config_t super;
nnom_shape_data_t* dim;
uint8_t num_dim;
} nnom_reshape_config_t;
// method
nnom_status_t reshape_run(nnom_layer_t *layer);
nnom_status_t reshape_build(nnom_layer_t *layer);
nnom_status_t reshape_free(nnom_layer_t *layer);
// API
nnom_layer_t *reshape_s(const nnom_reshape_config_t *config);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_CONV2D_H__ */

View File

@ -0,0 +1,85 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_RNN_H__
#define __NNOM_RNN_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// a machine interface for configuration
typedef struct _nnom_rnn_config_t
{
nnom_layer_config_t super;
bool return_sequence;
bool stateful;
bool go_backwards;
} nnom_rnn_config_t;
// RNN cell base type
typedef struct _nnom_rnn_cell_t
{
nnom_status_t (*run)(struct _nnom_rnn_cell_t* cell); // cell runner
nnom_status_t (*build)(struct _nnom_rnn_cell_t* cell); // cell builder, calculate buffer size, output data size
nnom_status_t (*free)(struct _nnom_rnn_cell_t* cell); //
nnom_layer_t *layer; // pointer to its layer holder
nnom_layer_config_t *config; // config for the cell event it is a layer type
nnom_rnn_cell_type_t type;
void *in_data; // input data
void *out_data; // output data
void *in_state; // input state data (or hidden state)
void *out_state; // output state data
size_t comp_buf_size; // the size of temporary buffer.
size_t state_size; // the size of hidden state
uint16_t units; // the output units
uint16_t feature_size; // the input feature size (vector size)
size_t macc; // stat of MAC count.
} nnom_rnn_cell_t;
typedef struct _nnom_rnn_layer_t
{
nnom_layer_t super;
nnom_rnn_cell_t *cell;
void *state_buf; // memory allocated to store state, size = 2 x size of state required by cell.
uint16_t timestamp_size;// size of timestamp
bool return_sequence; // whether to return the output for each unit (sequence)
bool stateful; // whether the states are kept after one inteference
bool go_backwards; // whether go backwards timestamping
} nnom_rnn_layer_t;
// rnn layer
nnom_layer_t *rnn_s(nnom_rnn_cell_t *cell, const nnom_rnn_config_t* config);
nnom_status_t rnn_run(nnom_layer_t* layer);
nnom_status_t rnn_build(nnom_layer_t* layer);
nnom_status_t rnn_free(nnom_layer_t* layer);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_RNN_H__ */

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-08-20 Jianjia Ma The first version
*/
#ifndef __NNOM_SIMPLE_CELL_H__
#define __NNOM_SIMPLE_CELL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "nnom_rnn.h"
#include "nnom_activation.h"
// This Simple Cell replicate the Keras's SimpleCell as blow
/*
def call(self, inputs, states, training=None):
prev_output = states[0] if nest.is_sequence(states) else states
h = K.dot(inputs, self.kernel)
h = K.bias_add(h, self.bias)
output = h + K.dot(prev_output, self.recurrent_kernel)
output = self.activation(output)
new_state = [output] if nest.is_sequence(states) else output
return output, new_state
*/
// a machine interface for configuration
typedef struct _nnom_simple_cell_config_t
{
nnom_layer_config_t super;
nnom_tensor_t *weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t *bias;
nnom_qformat_param_t q_dec_iw, q_dec_hw, q_dec_h;
nnom_activation_type_t act_type; // type of the activation
uint16_t units;
} nnom_simple_cell_config_t;
typedef struct _nnom_simple_cell_t
{
nnom_rnn_cell_t super;
nnom_activation_type_t act_type;
nnom_tensor_t* weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t* bias;
// experimental,
// iw: input x weight
// hw: hidden state x recurrent weight
// h: hidden state
nnom_qformat_param_t q_dec_iw, q_dec_hw, q_dec_h;
nnom_qformat_param_t oshift_iw, oshift_hw, bias_shift;
} nnom_simple_cell_t;
// RNN cells
// The shape for RNN input is (batch, timestamp, feature), where batch is always 1.
//
// SimpleCell
nnom_rnn_cell_t *simple_cell_s(const nnom_simple_cell_config_t* config);
nnom_status_t simple_cell_free(nnom_rnn_cell_t* cell);
nnom_status_t simple_cell_build(nnom_rnn_cell_t* cell);
nnom_status_t simple_cell_run(nnom_rnn_cell_t* cell);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_SIMPLE_CELL_H__ */

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_SOFTMAX_H__
#define __NNOM_SOFTMAX_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
typedef struct _nnom_softmax_config_t
{
nnom_layer_config_t super;
} nnom_softmax_config_t;
// method
nnom_status_t softmax_run(nnom_layer_t *layer);
nnom_status_t softmax_build(nnom_layer_t *layer);
// API
nnom_layer_t *softmax_s(const nnom_softmax_config_t * config);
nnom_layer_t *Softmax(void);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_SOFTMAX_H__ */

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_SUMPOOL_H__
#define __NNOM_SUMPOOL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_maxpool.h"
// Sum Pooling
typedef nnom_maxpool_layer_t nnom_sumpool_layer_t;
// method
nnom_status_t sumpool_build(nnom_layer_t *layer);
nnom_status_t sumpool_run(nnom_layer_t *layer);
// API
nnom_layer_t *sumpool_s(const nnom_pool_config_t * config);
nnom_layer_t *SumPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_SUMPOOL_H__ */

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_UPSAMPLE_H__
#define __NNOM_UPSAMPLE_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// Up Sampling layer (UnPooling)
typedef struct _nnom_upsample_layer_t
{
nnom_layer_t super;
nnom_3d_shape_t kernel;
} nnom_upsample_layer_t;
typedef struct _nnom_upsample_config_t
{
nnom_layer_config_t super;
nnom_shape_data_t kernel[2];
} nnom_upsample_config_t;
// API
nnom_layer_t *upsample_s(const nnom_upsample_config_t *config);
nnom_layer_t *UpSample(nnom_3d_shape_t kernel);
// Methods
nnom_status_t upsample_build(nnom_layer_t *layer);
nnom_status_t upsample_run(nnom_layer_t *layer);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_UPSAMPLE_H__ */

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_ZERO_PADDING_H__
#define __NNOM_ZERO_PADDING_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
typedef struct _nnom_zero_padding_config_t
{
nnom_layer_config_t super;
nnom_border_t pad;
} nnom_zero_padding_config_t;
// zero padding
typedef struct _nnom_zero_padding_layer_t
{
nnom_layer_t super;
nnom_border_t pad;
} nnom_zero_padding_layer_t;
// API
nnom_layer_t *zeropadding_s(const nnom_zero_padding_config_t* config);
nnom_layer_t *ZeroPadding(nnom_border_t pad);
// method
nnom_status_t zero_padding_build(nnom_layer_t *layer);
nnom_status_t zero_padding_run(nnom_layer_t *layer);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_ZERO_PADDING_H__ */

View File

@ -0,0 +1,415 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
* 2019-02-10 Jianjia Ma Compiler supports dense net connection
*/
#ifndef __NNOM_H__
#define __NNOM_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <stdarg.h>
#include <math.h>
#include "nnom_port.h"
#define NNOM_ALIGN (sizeof(char*)) // alignment when doing memory ops. Equal to size of pointer in byte.
#define q7_t int8_t
#define q15_t int16_t
#define q31_t int32_t
#define q63_t int64_t
/* version */
#define NNOM_MAJORVERSION 0 /**< major version number */
#define NNOM_SUBVERSION 4 /**< minor version number */
#define NNOM_REVISION 3 /**< revise version number */
#define NNOM_VERSION ((NNOM_MAJORVERSION * 10000) + (NNOM_SUBVERSION * 100) + NNOM_REVISION)
#ifdef ARM_NN_TRUNCATE
#define NNOM_TRUNCATE
#endif
#ifndef NNOM_TRUNCATE
#define NNOM_ROUND(out_shift) ((0x1 << out_shift) >> 1 )
#else
#define NNOM_ROUND(out_shift) 0
#endif
typedef enum
{
NN_SUCCESS = 0, /**< No error */
NN_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */
NN_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */
NN_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation. */
NN_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */
NN_SINGULAR = -5, /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
NN_TEST_FAILURE = -6, /**< Test Failed */
NN_NO_MEMORY = -7,
NN_MORE_TODO = -8
} nnom_status_t;
typedef enum
{
NNOM_INVALID = 0,
NNOM_BASE,
NNOM_INPUT,
NNOM_OUTPUT,
NNOM_CONV_2D,
NNOM_DW_CONV_2D,
NNOM_CONV2D_TRANS,
NNOM_BATCHNORM,
NNOM_DENSE,
NNOM_ZERO_PADDING,
NNOM_CROPPING,
NNOM_RNN,
NNOM_ACTIVATION,
NNOM_RELU,
NNOM_LEAKY_RELU,
NNOM_ADV_RELU,
NNOM_SIGMOID,
NNOM_TANH,
NNOM_SOFTMAX,
NNOM_MAXPOOL,
NNOM_GLOBAL_MAXPOOL,
NNOM_AVGPOOL,
NNOM_GLOBAL_AVGPOOL,
NNOM_SUMPOOL,
NNOM_GLOBAL_SUMPOOL,
NNOM_UPSAMPLE,
NNOM_FLATTEN,
NNOM_RESHAPE,
NNOM_LAMBDA,
NNOM_CONCAT,
NNOM_ADD,
NNOM_SUB,
NNOM_MULT,
NNOM_TYPE_MAX
} nnom_layer_type_t;
#define DEFUALT_LAYER_NAMES \
{ \
"Unknown", \
"Base", \
"Input", \
"Output", \
"Conv2D", \
"DW_Conv2D", \
"Conv2DTrsp", \
"BatchNorm", \
"Dense", \
"ZeroPad", \
"Cropping", \
"RNN", \
"Activation", \
"ReLU", \
"Leaky_ReLU", \
"Adv_ReLU", \
"Sigmoid", \
"Tanh", \
"Softmax", \
"MaxPool", \
"GL_MaxPool", \
"AvgPool", \
"GL_AvgPool", \
"SumPool", \
"GL_SumPool", \
"UpSample", \
"Flatten", \
"Reshape", \
"Lambda", \
"Concat", \
"Add", \
"Sub", \
"Mult", \
}
extern const char default_layer_names[][12];
// We dont count softmax an activation here, softmax is instanced as a layer
typedef enum
{
ACT_UNKNOWN = 0,
ACT_RELU,
ACT_LEAKY_RELU,
ACT_ADV_RELU,
ACT_TANH,
ACT_SIGMOID,
ACT_HARD_TANH,
ACT_HARD_SIGMOID
} nnom_activation_type_t;
#define ACTIVATION_NAMES \
{ \
"Unknown", \
"ReLU", \
"LkyReLU", \
"AdvReLU", \
"TanH", \
"Sigmoid", \
"HrdTanH", \
"HrdSigd", \
}
extern const char default_activation_names[][8];
// RNN cell type
typedef enum
{
NNOM_UNKOWN_CELL = 0,
NNOM_SIMPLE_CELL,
NNOM_GRU_CELL,
NNOM_LSTM_CELL,
NNOM_CELL_TYPE_MAX
} nnom_rnn_cell_type_t;
#define DEFUALT_CELL_NAMES \
{ \
"Unknown", \
"Simple", \
"GRU", \
"LSTM", \
}
extern const char default_cell_names[][8];
// parameters
typedef enum
{
PADDING_VALID = 0,
PADDING_SAME
} nnom_padding_t;
#define NNOM_TENSOR_BUF_NULL (0) // This buffer is not in used
#define NNOM_TENSOR_BUF_TEMP (1) // The memory in IO is temporary occupided, can be reused by other layer once the computation is done.
#define NNOM_TENSOR_BUF_RESERVED (2) // the mem is reserve for this layer only (not to be reused by other layer.
// currently used in compiling.
#define NNOM_BUF_EMPTY (0)
#define NNOM_BUF_FILLED (1)
// basic types
#define nnom_qformat_param_t int32_t // this should match the backend, need a better way to do it.
#define nnom_shape_data_t uint16_t
typedef struct _nnom_3d_shape_t
{
nnom_shape_data_t h, w, c;
} nnom_3d_shape_t;
typedef struct _nnom_border_t
{
nnom_shape_data_t top, bottom, left, right;
} nnom_border_t;
// nnom_3d_shape_axis_t type provide the axis[] format access to nnom_3d_shape_t
typedef union {
nnom_3d_shape_t s;
nnom_shape_data_t axis[sizeof(nnom_3d_shape_t) / sizeof(nnom_shape_data_t)];
} nnom_3d_shape_axis_t;
// tensor quantisation types
typedef enum
{
NNOM_QTYPE_PER_TENSOR = 0,
NNOM_QTYPE_PER_AXIS = 1
} nnom_qtype_t;
typedef struct _nnom_weights
{
const void *p_value;
nnom_qformat_param_t shift;
} nnom_weight_t;
typedef struct _nnom_bias
{
const void *p_value;
nnom_qformat_param_t shift;
} nnom_bias_t;
// experimental
typedef struct _nnom_tensor_t
{
void* p_data; // value
nnom_shape_data_t *dim; // dimension of this tensor
nnom_qformat_param_t *q_dec; // number of decimal bit for Q format (scale)
nnom_qformat_param_t *q_offset; // offset for each channel
nnom_qtype_t qtype; // the quantisation type
uint8_t num_dim; // the number of dimension
uint8_t bitwidth; // the data bit width, only support 8bit now
} nnom_tensor_t;
// nn wrappers
typedef struct _nnom_layer_t nnom_layer_t;
typedef struct _nnom_layer_io_t nnom_layer_io_t;
typedef struct _nnom_layer_hook_t nnom_layer_hook_t;
typedef struct _nnom_mem_block_t nnom_mem_block_t;
// activation wrapper
typedef struct _nnom_activation_t nnom_activation_t;
typedef struct _nnom_buf
{
nnom_mem_block_t *mem;
size_t size;
uint8_t type;
} nnom_buf_t;
// a memory block to store pre-assign memories during compiling. then assigned to each tensor after.
struct _nnom_mem_block_t
{
void *blk; // data block location
size_t size; // the maximum size for this block
uint8_t owners; // how many layers own this block
uint8_t state; // empty? filled? for static nn, currently only used in compiling
};
typedef struct _nnom_stat_t
{
size_t macc; //num. of mac operation
uint32_t time;
} nnom_layer_stat_t;
struct _nnom_layer_hook_t
{
nnom_layer_io_t *io; // hooked io
nnom_layer_hook_t *next; // next hook include secondary hooked layer
};
struct _nnom_layer_io_t
{
nnom_layer_hook_t hook; // for example: (layer->out)--hook--(layer->in)
nnom_layer_io_t *aux; // point to auxilary I/O (multiple I/O layer)
nnom_tensor_t *tensor; // experimental
nnom_mem_block_t *mem; // memory blocks handles for compiling only. The memory are now pass by tensor. trying to remove it.
nnom_layer_t *owner; // which layer owns this io.
uint8_t type;
};
// structured configuration base type
typedef struct _nnom_layer_config_t
{
char* name; // the name of the layer prequantiesd model (the model trained by user before converted to nnom)
} nnom_layer_config_t;
// layers base
struct _nnom_layer_t
{
nnom_layer_t *shortcut; // shortcut points to the next layer, applied on compiling
nnom_status_t (*run)(nnom_layer_t *layer); // run method. required
nnom_status_t (*build)(nnom_layer_t *layer); // compute output buffer shape. can be left null, will call default_build()
nnom_status_t (*free)(nnom_layer_t *layer); // a callback to free private resources (comp buf not included) can be left null
nnom_buf_t *comp; // computational buf
nnom_activation_t *actail; // I have an activation, I have a tail, wooo haaaa, act-tail!!!
nnom_layer_config_t *config; // point to the configuration of the layers. for machine api only.
nnom_layer_type_t type; // layer types
nnom_layer_io_t *in; // IO buff, last*layer, states
nnom_layer_io_t *out; // IO buff, next*layer, states
nnom_layer_stat_t stat; // stats, timing, ops
};
// activation base
struct _nnom_activation_t
{
nnom_status_t (*run)(struct _nnom_activation_t *act);
nnom_tensor_t *tensor;
nnom_activation_type_t type;
};
// local static functions when libc is not available
#ifdef NNOM_USING_STATIC_MEMORY
void nnom_set_static_buf(void* buf, size_t size);
void *nnom_malloc(size_t size);
void nnom_free(void* p);
#endif //NNOM_USING_STATIC_BUF
typedef struct _nnom_model nnom_model_t;
#include "nnom_tensor.h"
#include "nnom_layers.h"
#include "nnom_utils.h"
// models, I dont want to make model class as a child of layer class yet
struct _nnom_model
{
nnom_layer_t *head;
nnom_layer_t *tail;
// model constructor
nnom_status_t (*add)(struct _nnom_model *m, nnom_layer_t *layer); // has too pass a raw value
nnom_layer_t *(*hook)(nnom_layer_t *curr, nnom_layer_t *last); // create hook between 2 layers' primary IO.
nnom_layer_t *(*merge)(nnom_layer_t *method, nnom_layer_t *in1, nnom_layer_t *in2); // an older interface of merge 2 inputs.
nnom_layer_t *(*mergex)(nnom_layer_t *method, int num, ...); // merge a few layers using mutiple input method (concate, add, ...)
nnom_layer_t *(*active)(nnom_activation_t *act, nnom_layer_t *target_layer); // add the activation to the existing layer's tail
// callback
nnom_status_t (*layer_callback)(nnom_model_t *m, nnom_layer_t *layer); // layer callback will be called after each layer(after actail).
// block memory for layers
nnom_mem_block_t blocks[NNOM_BLOCK_NUM];
size_t total_ops;
bool is_inited; // is this structure initialized
bool is_allocated; // is this structure allocated by nnom (not by user)
};
#define NNOM_NULL_CHECK(p) \
if ((p) == NULL) \
{ \
NNOM_LOG("Error: NULL object.\n"); \
return NN_ARGUMENT_ERROR; \
}
// utils
size_t nnom_alignto(size_t value, uint32_t alignment);
size_t nnom_io_length(nnom_layer_io_t *io);
size_t nnom_hook_length(nnom_layer_hook_t *hook);
// memory (malloc + memeset 0)
void *nnom_mem(size_t size);
// get how much memory has been taken
size_t nnom_mem_stat(void);
// Model APIs
// create or init a model
nnom_model_t *new_model(nnom_model_t *m);
// compile as sequencial model
nnom_status_t sequencial_compile(nnom_model_t *m);
// compile as functional model
nnom_status_t model_compile(nnom_model_t *m, nnom_layer_t *input, nnom_layer_t *output);
// run a prediction
nnom_status_t model_run(nnom_model_t *m);
// delete model.
void model_delete(nnom_model_t *m);
// check version
nnom_status_t check_model_version(unsigned long model_version);
// callback, called after each layer has finished the calculation.
// this callback must return NN_SUCCESS for continually run the model. otherwise, model will be returned with the ERROR code.
// this function return NN_LENGTH_ERROR if the callback is already set to other.
nnom_status_t model_set_callback(nnom_model_t *m, nnom_status_t (*layer_callback)(nnom_model_t *m, nnom_layer_t *layer));
// delete callback.
void model_delete_callback(nnom_model_t *m);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_H__ */

View File

@ -0,0 +1,194 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
*/
#ifndef __NNOM_LAYERS_H__
#define __NNOM_LAYERS_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
// properties
nnom_3d_shape_t shape(size_t h, size_t w, size_t c);
nnom_3d_shape_t kernel(size_t h, size_t w);
nnom_3d_shape_t stride(size_t h, size_t w);
nnom_3d_shape_t dilation(size_t h, size_t w);
nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right);
//nnom_qformat_t qformat(int8_t m, int8_t n);
size_t shape_size(nnom_3d_shape_t* s);
// this function is to add a new IO to current inited IO
// input, the targeted IO that the new IO will be added to
// output , the new IO
nnom_layer_io_t* io_add_aux(nnom_layer_io_t* targeted_io);
nnom_layer_io_t *io_init(void *owner_layer, nnom_layer_io_t *io);
#define NN_CEILIF(x,y) ((x+y-1)/y)
#include "layers/nnom_activation.h"
#include "layers/nnom_concat.h"
#include "layers/nnom_conv2d.h"
#include "layers/nnom_cropping.h"
#include "layers/nnom_conv2d_trans.h"
#include "layers/nnom_dense.h"
#include "layers/nnom_dw_conv2d.h"
#include "layers/nnom_flatten.h"
#include "layers/nnom_reshape.h"
#include "layers/nnom_global_pool.h"
#include "layers/nnom_input.h"
#include "layers/nnom_lambda.h"
#include "layers/nnom_matrix.h"
#include "layers/nnom_maxpool.h"
#include "layers/nnom_avgpool.h"
#include "layers/nnom_output.h"
#include "layers/nnom_rnn.h"
#include "layers/nnom_softmax.h"
#include "layers/nnom_sumpool.h"
#include "layers/nnom_upsample.h"
#include "layers/nnom_zero_padding.h"
#include "layers/nnom_rnn.h"
#include "layers/nnom_simple_cell.h"
#include "layers/nnom_lstm_cell.h"
#include "layers/nnom_gru_cell.h"
// Layer APIs ******
// (a summary for each individual layer's files)
// input/output
nnom_layer_t *Input(nnom_3d_shape_t input_shape, void *p_buf);
nnom_layer_t *Output(nnom_3d_shape_t output_shape, void *p_buf);
// Pooling
nnom_layer_t *MaxPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad);
nnom_layer_t *AvgPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad);
nnom_layer_t *SumPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad);
nnom_layer_t *GlobalMaxPool(void);
nnom_layer_t *GlobalAvgPool(void);
nnom_layer_t *GlobalSumPool(void);
// padding, cropping, upsample
nnom_layer_t *UpSample(nnom_3d_shape_t kernel);
nnom_layer_t *ZeroPadding(nnom_border_t pad);
nnom_layer_t *Cropping(nnom_border_t pad);
// Activation
nnom_layer_t *Activation(nnom_activation_t *act);
nnom_layer_t *ReLU(void);
nnom_layer_t *LeakyReLU(float alpha);
nnom_layer_t *Softmax(void);
nnom_layer_t *Sigmoid(int32_t dec_bit); // input dec bit
nnom_layer_t *TanH(int32_t dec_bit); // input dec bit
// Matrix
nnom_layer_t *Add(int16_t oshift); // output shift
nnom_layer_t *Sub(int16_t oshift); // output shift
nnom_layer_t *Mult(int16_t oshift); // output shift
nnom_layer_t *Flatten(void);
nnom_layer_t *Concat(int8_t axis);
// -- NN Constructers --
// conv2d
nnom_layer_t *Conv2D(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad,
const nnom_weight_t *w, const nnom_bias_t *b);
// deconv2d
nnom_layer_t *Conv2DTrans(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad,
const nnom_weight_t *w, const nnom_bias_t *b);
// depthwise_convolution
nnom_layer_t *DW_Conv2D(uint32_t multiplier, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad,
const nnom_weight_t *w, const nnom_bias_t *b);
// fully connected, dense
nnom_layer_t *Dense(size_t output_unit, const nnom_weight_t *w, const nnom_bias_t *b);
// Lambda Layers
nnom_layer_t *Lambda(nnom_status_t (*run)(nnom_layer_t *), // run method, required
nnom_status_t (*build)(nnom_layer_t *), // optional, call default_build() if left null
nnom_status_t (*free)(nnom_layer_t *), // not required if no resources needs to be deleted, can be left null.
void *parameters); // user private parameters for run method, left null if not needed.
// building methods
nnom_status_t default_build(nnom_layer_t* layer);
nnom_status_t input_build(nnom_layer_t* layer);
nnom_status_t conv2d_build(nnom_layer_t* layer);
nnom_status_t dw_conv2d_build(nnom_layer_t* layer);
nnom_status_t conv2d_trans_build(nnom_layer_t* layer);
nnom_status_t dense_build(nnom_layer_t* layer);
nnom_status_t rnn_build(nnom_layer_t* layer);
nnom_status_t upsample_build(nnom_layer_t* layer);
nnom_status_t zero_padding_build(nnom_layer_t* layer);
nnom_status_t cropping_build(nnom_layer_t* layer);
nnom_status_t maxpool_build(nnom_layer_t* layer);
nnom_status_t avgpool_build(nnom_layer_t* layer);
nnom_status_t sumpool_build(nnom_layer_t* layer);
nnom_status_t global_pool_build(nnom_layer_t* layer);
nnom_status_t flatten_build(nnom_layer_t* layer);
nnom_status_t reshape_build(nnom_layer_t* layer);
nnom_status_t concat_build(nnom_layer_t* layer);
// run
nnom_status_t input_run(nnom_layer_t* layer);
nnom_status_t output_run(nnom_layer_t* layer);
nnom_status_t flatten_run(nnom_layer_t* layer);
nnom_status_t reshape_run(nnom_layer_t* layer);
nnom_status_t default_run(nnom_layer_t* layer); // simply copy data from input to output
nnom_status_t dw_conv2d_run(nnom_layer_t* layer);
nnom_status_t conv2d_run(nnom_layer_t* layer);
nnom_status_t conv2d_trans_run(nnom_layer_t* layer);
nnom_status_t dense_run(nnom_layer_t* layer);
nnom_status_t rnn_run(nnom_layer_t* layer);
nnom_status_t upsample_run(nnom_layer_t* layer);
nnom_status_t zero_padding_run(nnom_layer_t* layer);
nnom_status_t cropping_run(nnom_layer_t* layer);
nnom_status_t activation_run(nnom_layer_t* layer);
nnom_status_t softmax_run(nnom_layer_t* layer);
nnom_status_t maxpool_run(nnom_layer_t* layer);
nnom_status_t avgpool_run(nnom_layer_t* layer);
nnom_status_t sumpool_run(nnom_layer_t* layer);
nnom_status_t concat_run(nnom_layer_t* layer);
nnom_status_t add_run(nnom_layer_t* layer);
nnom_status_t sub_run(nnom_layer_t* layer);
nnom_status_t mult_run(nnom_layer_t* layer);
// Activation APIs
// Softmax is not considered as activation in NNoM, Softmax is in layer API.
nnom_activation_t* act_relu(void);
nnom_activation_t* act_leaky_relu(float alpha);
nnom_activation_t* act_sigmoid(int32_t dec_bit);
nnom_activation_t* act_tanh(int32_t dec_bit);
// direct API
nnom_status_t act_tensor_run(nnom_activation_t* act, nnom_tensor_t* tensor);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_LAYERS_H__ */

View File

@ -0,0 +1,974 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Notice:
* Code in this file inlcudes derivative works from CMSIS, which is released under alternative license.
* Please check the LICENSE file for detial.
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
* 2019-03-19 Jianjia Ma Local C implementation partly from CMSIS-NN
*/
#ifndef __NNOM_LOCAL_H__
#define __NNOM_LOCAL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "stdint.h"
#include "nnom_port.h"
#ifdef ARM_NN_TRUNCATE
#define NNOM_TRUNCATE
#endif
// SSAT implementation with C code
#ifndef __NNOM_SSAT
static inline int __NNOM_SSAT(int32_t value, int32_t bit) {
int32_t min = -(1<<(bit-1));
int32_t max = (1<<(bit-1)) - 1;
if (value < min)
return min;
else if (value > max)
return max;
else
return value;
}
#endif
// USAT implementation with C code
#ifndef __NNOM_USAT
static inline int __NNOM_USAT(int32_t value, int32_t bit) {
int32_t max = (1<<(bit-1)) - 1;
if (value < 0)
return 0;
else if (value > max)
return max;
else
return value;
}
#endif
#define MAX(A, B) ((A) > (B) ? (A) : (B))
#define MIN(A, B) ((A) < (B) ? (A) : (B))
// Those functions/tables below are partially modifed from CMSIS-NN lib
// https://github.com/ARM-software/CMSIS_5
//
void local_avepool_q7_HWC(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, NULL by now
q7_t *Im_out);
void local_avepool_q7_CHW(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, NULL by now
q7_t *Im_out);
// modified from CMSIS-NN test_ref
void local_maxpool_q7_HWC(const q7_t * Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t * bufferA, // a buffer for local storage, NULL by now
q7_t * Im_out);
void local_maxpool_q7_CHW(const q7_t * Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t * bufferA, // a buffer for local storage, NULL by now
q7_t * Im_out);
void local_sumpool_q7_HWC(const q7_t * Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t * bufferA, // a buffer for local storage, size = 4*output_size
q7_t * Im_out);
void local_sumpool_q7_CHW(const q7_t * Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t * bufferA, // a buffer for local storage, size = 4*output_size
q7_t * Im_out);
// customised up sample pooling
void local_up_sampling_q7_HWC(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // NULL
q7_t *Im_out);
void local_up_sampling_q7_CHW(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // NULL
q7_t *Im_out);
void local_convolve_HWC_q7_nonsquare(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_convolve_CHW_q7_nonsquare(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_conv_trans_HWC_q7_nonsquare(const int8_t * Im_in,
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,// input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_depthwise_separable_conv_CHW_q7_nonsquare(const q7_t *Im_in,// input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_zero_padding_HWC_q7(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_zero_padding_CHW_q7(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_cropping_HWC_q7(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_cropping_CHW_q7(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_fully_connected_q7_opt(const q7_t * pV, // pointer to vector
const q7_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q7_t * bias, q7_t * pOut, // output operand
q15_t * vec_buffer);
void local_fully_connected_q7(const q7_t * pV, // pointer to vector
const q7_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q7_t * bias, q7_t * pOut, // output operand
q15_t * vec_buffer);
// matrix dot,
// it takes reorderd weight as input, (see dense layer for detail. this is basiclly a dense opt without bias)
void local_dot_q7_opt(const q7_t *pV, // pointer to vector
const q7_t *pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t out_shift, // amount of right-shift for output
q7_t *pOut); // result buffer
void local_dot_q7(const q7_t *pV, // pointer to vector
const q7_t *pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t out_shift, // amount of right-shift for output
q7_t *pOut); // output operand)
// softmax
void local_softmax_q7(const q7_t * vec_in, const uint32_t dim_vec, q7_t * p_out);
// sigmoid
void local_sigmoid_q7(q7_t * data, uint32_t size, int16_t int_width);
// tanh
void local_tanh_q7(q7_t * data, uint32_t size, int16_t int_width);
// relu
void local_relu_q7(q7_t * data, uint32_t size);
// leaky relu
void local_leaky_relu_q7(q7_t *data, q7_t alpha, uint32_t size);
// alpha in q7 format with dec_bit=7
// max and threshold has the same Q format with the activation
void local_adv_relu_q7(q7_t *data, q7_t alpha, q7_t max, q7_t threshold, uint32_t size);
// hard sigmoid,
// y=-1 if x < -2.5
// y=1 if x > 2.5
// otherwise y = 0.2 * x + 0.5 (y=0.20315 * x + 0.5)
void local_hard_sigmoid_q7(q7_t *data, uint32_t size, int16_t dec_bit);
// hard tanh
// y=-1 if x < -1
// y=1 if x > 1
// otherwise y = x
void local_hard_tanh_q7(q7_t *data, uint32_t size, int16_t dec_bit);
// matrix ops
void local_mult_q7(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// add
void local_add_q7(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// sub
void local_sub_q7(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// take multiple blocks (>2) as input
void local_multiple_add_q7( q7_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q7_t **p_src);
void local_multiple_mult_q7( q7_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q7_t **p_src);
void local_multiple_sub_q7( q7_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q7_t **p_src);
// Below tables credit to CMSIS
// For more info. check CMSIS-NN lib
// https://github.com/ARM-software/CMSIS_5/blob/develop/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c
static const q7_t nnom_sigmoid_table_q7[256] = {
0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e,
0x50, 0x52, 0x53, 0x55, 0x57, 0x59, 0x5a, 0x5c,
0x5e, 0x5f, 0x61, 0x62, 0x63, 0x65, 0x66, 0x67,
0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
0x71, 0x72, 0x72, 0x73, 0x74, 0x74, 0x75, 0x76,
0x76, 0x77, 0x77, 0x78, 0x78, 0x79, 0x79, 0x7a,
0x7a, 0x7a, 0x7b, 0x7b, 0x7b, 0x7c, 0x7c, 0x7c,
0x7c, 0x7c, 0x7d, 0x7d, 0x7d, 0x7d, 0x7d, 0x7e,
0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04,
0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06,
0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09,
0x0a, 0x0a, 0x0b, 0x0c, 0x0c, 0x0d, 0x0e, 0x0e,
0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
0x17, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f, 0x21,
0x22, 0x24, 0x26, 0x27, 0x29, 0x2b, 0x2d, 0x2e,
0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
};
static const q7_t nnom_tanh_table_q7[256] = {
0x00, 0x08, 0x10, 0x18, 0x1f, 0x27, 0x2e, 0x35,
0x3b, 0x41, 0x47, 0x4c, 0x51, 0x56, 0x5a, 0x5e,
0x61, 0x65, 0x68, 0x6a, 0x6d, 0x6f, 0x71, 0x72,
0x74, 0x75, 0x76, 0x78, 0x78, 0x79, 0x7a, 0x7b,
0x7b, 0x7c, 0x7c, 0x7d, 0x7d, 0x7e, 0x7e, 0x7e,
0x7e, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x81,
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x82,
0x82, 0x82, 0x82, 0x82, 0x83, 0x83, 0x84, 0x84,
0x85, 0x85, 0x86, 0x87, 0x88, 0x88, 0x8a, 0x8b,
0x8c, 0x8e, 0x8f, 0x91, 0x93, 0x96, 0x98, 0x9b,
0x9f, 0xa2, 0xa6, 0xaa, 0xaf, 0xb4, 0xb9, 0xbf,
0xc5, 0xcb, 0xd2, 0xd9, 0xe1, 0xe8, 0xf0, 0xf8,
};
// ------------ 16bit ops --------------------
void local_avepool_q15_HWC(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_avepool_q15_CHW(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_maxpool_q15_HWC(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_maxpool_q15_CHW(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_sumpool_q15_HWC(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, size = 4*output_size
q15_t *Im_out);
void local_sumpool_q15_CHW(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, size = 4*output_size
q15_t *Im_out);
void local_up_sampling_q15_HWC(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_up_sampling_q15_CHW(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_convolve_HWC_q15_nonsquare(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_convolve_CHW_q15_nonsquare(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_conv_trans_HWC_q15_nonsquare(const int8_t * Im_in,
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_depthwise_separable_conv_HWC_q15_nonsquare(const q15_t *Im_in,// input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_depthwise_separable_conv_CHW_q15_nonsquare(const q15_t *Im_in,// input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_zero_padding_HWC_q15(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_zero_padding_CHW_q15(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_cropping_HWC_q15(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_cropping_CHW_q15(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_dot_q15(const q15_t *pV, // pointer to vector
const q15_t *pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t out_shift, // amount of right-shift for output
q15_t *pOut); // output operand)
void local_dot_q15_opt(const q15_t * pV,
const q15_t * pM,
const uint16_t dim_vec,
const uint16_t num_of_rows,
const uint16_t out_shift,
q15_t * pOut);
// original implementation
// this support none bias, the it will perform like a dot.
// set the `bias=NULL` to work
void local_fully_connected_mat_q7_vec_q15(const q15_t * pV, // pointer to vector
const q7_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q7_t * bias, // bias
q15_t * pOut, // output
q15_t * vec_buffer); // not used but to keep the interface same as the ARM's version
// work on recorder matrix
// this support none bias, set the bias=NULL to work
void local_fully_connected_mat_q7_vec_q15_opt(const q15_t * pV,
const q7_t * pM,
const uint16_t dim_vec,
const uint16_t num_of_rows,
const uint16_t bias_shift,
const uint16_t out_shift,
const q7_t * bias,
q15_t * pOut,
q15_t * vec_buffer);
// matrix operation Q15
void local_multiple_add_q15( q15_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q15_t **p_src);
void local_multiple_mult_q15( q15_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q15_t **p_src);
void local_multiple_sub_q15( q15_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q15_t **p_src);
void local_mult_q15(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// add
void local_add_q15(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// sub
void local_sub_q15(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// Convert Q7 to Q15
void local_q7_to_q15_no_shift(const q7_t *src, q15_t *des, uint32_t size);
void local_q7_to_q15(const q7_t *src, q15_t *des, uint32_t size);
// q15 shift to q7
void local_q15_to_q7(const q15_t *src, q7_t *des, uint32_t shift, uint32_t size);
// y = 1 - x
void local_1_minor_z_q15(q15_t *src, q15_t *des, uint16_t dec_bit, uint32_t size);
void local_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out);
void local_hard_sigmoid_q15(q15_t *data, uint32_t size, int16_t dec_bit);
void local_hard_tanh_q15(q15_t *data, uint32_t size, int16_t dec_bit);
void local_relu_q15(q15_t *data, uint32_t size);
void local_leaky_relu_q15(q15_t *data, q7_t alpha, uint32_t size);
void local_adv_relu_q15(q15_t *data, q7_t negative_slope, q15_t max, q15_t threshold, uint32_t size);
void local_sigmoid_q15(q15_t * data, uint32_t size, uint16_t int_width);
void local_tanh_q15(q15_t * data, uint32_t size, uint16_t int_width);
static const q15_t nnom_sigmoid_table_q15[256] = {
0x4000, 0x4200, 0x43ff, 0x45fc, 0x47f5, 0x49eb, 0x4bdc, 0x4dc8,
0x4fad, 0x518a, 0x5360, 0x552c, 0x56ef, 0x58a8, 0x5a57, 0x5bfb,
0x5d93, 0x5f20, 0x60a1, 0x6216, 0x637f, 0x64db, 0x662b, 0x676f,
0x68a6, 0x69d2, 0x6af1, 0x6c05, 0x6d0d, 0x6e09, 0x6efb, 0x6fe2,
0x70be, 0x7190, 0x7258, 0x7316, 0x73cc, 0x7478, 0x751b, 0x75b7,
0x764a, 0x76d6, 0x775b, 0x77d8, 0x784f, 0x78c0, 0x792a, 0x798f,
0x79ee, 0x7a48, 0x7a9d, 0x7aed, 0x7b39, 0x7b80, 0x7bc4, 0x7c03,
0x7c3f, 0x7c78, 0x7cad, 0x7ce0, 0x7d0f, 0x7d3c, 0x7d66, 0x7d8d,
0x7db3, 0x7dd6, 0x7df7, 0x7e16, 0x7e33, 0x7e4f, 0x7e69, 0x7e81,
0x7e98, 0x7eae, 0x7ec2, 0x7ed5, 0x7ee7, 0x7ef8, 0x7f08, 0x7f17,
0x7f25, 0x7f32, 0x7f3e, 0x7f4a, 0x7f55, 0x7f5f, 0x7f69, 0x7f72,
0x7f7b, 0x7f83, 0x7f8a, 0x7f91, 0x7f98, 0x7f9e, 0x7fa4, 0x7faa,
0x7faf, 0x7fb4, 0x7fb8, 0x7fbd, 0x7fc1, 0x7fc5, 0x7fc8, 0x7fcc,
0x7fcf, 0x7fd2, 0x7fd5, 0x7fd7, 0x7fda, 0x7fdc, 0x7fde, 0x7fe0,
0x7fe2, 0x7fe4, 0x7fe6, 0x7fe7, 0x7fe9, 0x7fea, 0x7feb, 0x7fed,
0x7fee, 0x7fef, 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff4,
0x000b, 0x000c, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
0x0012, 0x0013, 0x0015, 0x0016, 0x0017, 0x0019, 0x001a, 0x001c,
0x001e, 0x0020, 0x0022, 0x0024, 0x0026, 0x0029, 0x002b, 0x002e,
0x0031, 0x0034, 0x0038, 0x003b, 0x003f, 0x0043, 0x0048, 0x004c,
0x0051, 0x0056, 0x005c, 0x0062, 0x0068, 0x006f, 0x0076, 0x007d,
0x0085, 0x008e, 0x0097, 0x00a1, 0x00ab, 0x00b6, 0x00c2, 0x00ce,
0x00db, 0x00e9, 0x00f8, 0x0108, 0x0119, 0x012b, 0x013e, 0x0152,
0x0168, 0x017f, 0x0197, 0x01b1, 0x01cd, 0x01ea, 0x0209, 0x022a,
0x024d, 0x0273, 0x029a, 0x02c4, 0x02f1, 0x0320, 0x0353, 0x0388,
0x03c1, 0x03fd, 0x043c, 0x0480, 0x04c7, 0x0513, 0x0563, 0x05b8,
0x0612, 0x0671, 0x06d6, 0x0740, 0x07b1, 0x0828, 0x08a5, 0x092a,
0x09b6, 0x0a49, 0x0ae5, 0x0b88, 0x0c34, 0x0cea, 0x0da8, 0x0e70,
0x0f42, 0x101e, 0x1105, 0x11f7, 0x12f3, 0x13fb, 0x150f, 0x162e,
0x175a, 0x1891, 0x19d5, 0x1b25, 0x1c81, 0x1dea, 0x1f5f, 0x20e0,
0x226d, 0x2405, 0x25a9, 0x2758, 0x2911, 0x2ad4, 0x2ca0, 0x2e76,
0x3053, 0x3238, 0x3424, 0x3615, 0x380b, 0x3a04, 0x3c01, 0x3e00,
};
static const q15_t nnom_tanh_table_q15[256] = {
0x0000, 0x07fd, 0x0feb, 0x17b9, 0x1f59, 0x26bf, 0x2ddf, 0x34ae,
0x3b27, 0x4142, 0x46fd, 0x4c56, 0x514d, 0x55e2, 0x5a1a, 0x5df6,
0x617c, 0x64b0, 0x6797, 0x6a37, 0x6c95, 0x6eb5, 0x709e, 0x7254,
0x73dc, 0x753a, 0x7672, 0x7788, 0x787f, 0x795b, 0x7a1e, 0x7acb,
0x7b65, 0x7bee, 0x7c66, 0x7cd1, 0x7d30, 0x7d84, 0x7dce, 0x7e0f,
0x7e49, 0x7e7d, 0x7eaa, 0x7ed2, 0x7ef5, 0x7f14, 0x7f30, 0x7f48,
0x7f5e, 0x7f71, 0x7f82, 0x7f91, 0x7f9e, 0x7fa9, 0x7fb3, 0x7fbc,
0x7fc4, 0x7fcb, 0x7fd1, 0x7fd7, 0x7fdc, 0x7fe0, 0x7fe4, 0x7fe7,
0x7fea, 0x7fed, 0x7fef, 0x7ff1, 0x7ff3, 0x7ff4, 0x7ff6, 0x7ff7,
0x7ff8, 0x7ff9, 0x7ffa, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffc, 0x7ffd,
0x7ffd, 0x7ffd, 0x7ffe, 0x7ffe, 0x7ffe, 0x7ffe, 0x7fff, 0x7fff,
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001,
0x8001, 0x8001, 0x8001, 0x8002, 0x8002, 0x8002, 0x8002, 0x8003,
0x8003, 0x8003, 0x8004, 0x8004, 0x8005, 0x8006, 0x8006, 0x8007,
0x8008, 0x8009, 0x800a, 0x800c, 0x800d, 0x800f, 0x8011, 0x8013,
0x8016, 0x8019, 0x801c, 0x8020, 0x8024, 0x8029, 0x802f, 0x8035,
0x803c, 0x8044, 0x804d, 0x8057, 0x8062, 0x806f, 0x807e, 0x808f,
0x80a2, 0x80b8, 0x80d0, 0x80ec, 0x810b, 0x812e, 0x8156, 0x8183,
0x81b7, 0x81f1, 0x8232, 0x827c, 0x82d0, 0x832f, 0x839a, 0x8412,
0x849b, 0x8535, 0x85e2, 0x86a5, 0x8781, 0x8878, 0x898e, 0x8ac6,
0x8c24, 0x8dac, 0x8f62, 0x914b, 0x936b, 0x95c9, 0x9869, 0x9b50,
0x9e84, 0xa20a, 0xa5e6, 0xaa1e, 0xaeb3, 0xb3aa, 0xb903, 0xbebe,
0xc4d9, 0xcb52, 0xd221, 0xd941, 0xe0a7, 0xe847, 0xf015, 0xf803,
};
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_LOCAL_H__ */

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
* 2019-02-10 Jianjia Ma Compiler supports dense net connection
*/
#ifndef __NNOM_TENSOR_H__
#define __NNOM_TENSOR_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "nnom.h"
void delete_tensor(nnom_tensor_t* t);
nnom_tensor_t* new_tensor(nnom_qtype_t type, uint32_t num_dim, uint32_t num_channel);
// set tensor by value
// for tensor with quantized type NNOM_QTYPE_PER_TENSOR
nnom_tensor_t* tensor_set_attr_v(nnom_tensor_t* t,
nnom_qformat_param_t dec_bit, nnom_qformat_param_t offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth);
nnom_tensor_t* tensor_set_attr(nnom_tensor_t* t,
nnom_qformat_param_t*dec_bit, nnom_qformat_param_t *offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth);
nnom_tensor_t* tensor_cpy_attr(nnom_tensor_t* des, nnom_tensor_t* src);
size_t tensor_get_num_channel(nnom_tensor_t* t);
size_t tensor_size(nnom_tensor_t* t);
size_t tensor_size_byte(nnom_tensor_t* t);
// only support 3d tensor
// change format from CHW to HWC
// the shape of the data, input data, output data
void tensor_hwc2chw_q7(nnom_tensor_t* des, nnom_tensor_t* src);
// change format from CHW to HWC
// the shape of the data, input data, output data
void tensor_chw2hwc_q7(nnom_tensor_t* des, nnom_tensor_t* src);
// deprecated.
void hwc2chw_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out);
void chw2hwc_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out);
#ifdef __cplusplus
}
#endif
#endif /*__NNOM_TENSOR_H__ */

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
*/
#ifndef __NNOM_UTILS_H__
#define __NNOM_UTILS_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
typedef struct _nnom_predict_t
{
uint16_t *confusion_mat; // confusiong matrix
uint32_t *top_k; // which stored the num of prediction in rank_k, example: Top-2 = top_k[0]+top_k[1]
nnom_model_t *model; // the model to run
int8_t *buf_prediction; // the pointer to the output of softmax layer(normally the end of classifier).
// setting
uint32_t label_num; // number of types in classification
uint32_t top_k_size; // number of k that wants to know.
// running
uint32_t predict_count; // how many prediction is done
//timing
uint32_t t_run_total; // total running time
uint32_t t_predict_start; // when it is initial
uint32_t t_predict_total; // total time of the whole test
} nnom_predict_t;
// create a prediction
// input model, the buf pointer to the softwmax output (Temporary, this can be extract from model)
// the size of softmax output (the num of lable)
// the top k that wants to record.
nnom_predict_t *prediction_create(nnom_model_t *m, int8_t *buf_prediction, size_t label_num, size_t top_k_size); // currently int8_t
// after a new data is set in input
// feed data to prediction
// input the current label, (range from 0 to total number of label -1)
// (the current input data should be set by user manully to the input buffer of the model.)
// return NN_ARGUMENT_ERROR if parameter error
nnom_status_t prediction_run(nnom_predict_t *pre, uint32_t true_label, uint32_t* predict_label, float* prob);
// to mark prediction finished
void prediction_end(nnom_predict_t *pre);
// free all resources
void prediction_delete(nnom_predict_t *pre);
// print matrix
void prediction_matrix(nnom_predict_t *pre);
// print top-k
void prediction_top_k(nnom_predict_t *pre);
// this function is to print sumarry
void prediction_summary(nnom_predict_t *pre);
// -------------------------------
// stand alone prediction API
// this api test one set of data, return the prediction
// return the predicted label
// return NN_ARGUMENT_ERROR if parameter error
nnom_status_t nnom_predict(nnom_model_t *m, uint32_t *label, float *prob);
void model_stat(nnom_model_t *m);
void model_io_format(nnom_model_t *m);
#ifdef __cplusplus
}
#endif
#endif /*__NNOM_UTILS_H__ */

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
*/
#ifndef __NNOM_PORT_H__
#define __NNOM_PORT_H__
#include <stdlib.h>
#include <stdio.h>
#include <transform.h>
/* use static memory */
// must set buf using "nnom_set_static_buf()" before creating a model.
/* dynamic memory interfaces */
/* when libc is not available, you shall implement the below memory interfaces (libc equivalents). */
#ifndef NNOM_USING_STATIC_MEMORY
#define nnom_malloc(n) malloc(n)
#define nnom_free(p) free(p)
#endif
/* memory interface */
/* when libc is not available, you shall implement your equivalent functions here */
#define nnom_memset(p,v,s) memset(p,v,s)
#define nnom_memcpy(dst,src,len) memcpy(dst,src,len)
/* runtime & debug */
#define nnom_us_get() 0 // return a microsecond timestamp
#define nnom_ms_get() 0 // return a millisecond timestamp
#define NNOM_LOG(...) printf(__VA_ARGS__)
/* NNoM configuration */
#define NNOM_BLOCK_NUM (8) // maximum number of memory blocks, increase it when log request.
#define DENSE_WEIGHT_OPT (1) // if used fully connected layer optimized weights.
#endif

View File

@ -0,0 +1,4 @@
fully_connected_opt_weight_generation.py - is from https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN/Scripts/NNFunctions witch is not a part of NNoM
Please refer to NNoM documents for its usages.

View File

@ -0,0 +1 @@
# package

View File

@ -0,0 +1,153 @@
#!/usr/bin/env python
'''
This file is apart of CMSIS-NN release
https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN/Scripts/NNFunctions
'''
import numpy as np
def convert_to_x4_q7_weights(weights):
[r, h, w, c] = weights.shape
weights = np.reshape(weights, (r, h*w*c))
num_of_rows = r
num_of_cols = h*w*c
new_weights = np.copy(weights)
new_weights = np.reshape(new_weights, (r*h*w*c))
counter = 0
for i in range(int(num_of_rows/4)):
# we only need to do the re-ordering for every 4 rows
row_base = 4*i
for j in range(int(num_of_cols/4)):
# for each 4 entries
column_base = 4*j
new_weights[counter] = weights[row_base ][column_base ]
new_weights[counter+1] = weights[row_base+1][column_base ]
new_weights[counter+2] = weights[row_base ][column_base+2]
new_weights[counter+3] = weights[row_base+1][column_base+2]
new_weights[counter+4] = weights[row_base+2][column_base ]
new_weights[counter+5] = weights[row_base+3][column_base ]
new_weights[counter+6] = weights[row_base+2][column_base+2]
new_weights[counter+7] = weights[row_base+3][column_base+2]
new_weights[counter+8] = weights[row_base ][column_base+1]
new_weights[counter+9] = weights[row_base+1][column_base+1]
new_weights[counter+10] = weights[row_base ][column_base+3]
new_weights[counter+11] = weights[row_base+1][column_base+3]
new_weights[counter+12] = weights[row_base+2][column_base+1]
new_weights[counter+13] = weights[row_base+3][column_base+1]
new_weights[counter+14] = weights[row_base+2][column_base+3]
new_weights[counter+15] = weights[row_base+3][column_base+3]
counter = counter + 16
# the remaining ones are in order
for j in range((int)(num_of_cols-num_of_cols%4), int(num_of_cols)):
new_weights[counter] = weights[row_base][j]
new_weights[counter+1] = weights[row_base+1][j]
new_weights[counter+2] = weights[row_base+2][j]
new_weights[counter+3] = weights[row_base+3][j]
counter = counter + 4
return new_weights
def convert_to_x4_q15_weights(weights):
[r, h, w, c] = weights.shape
weights = np.reshape(weights, (r, h*w*c))
num_of_rows = r
num_of_cols = h*w*c
new_weights = np.copy(weights)
new_weights = np.reshape(new_weights, (r*h*w*c))
counter = 0
for i in range(int(num_of_rows/4)):
# we only need to do the re-ordering for every 4 rows
row_base = 4*i
for j in range(int(num_of_cols/2)):
# for each 2 entries
column_base = 2*j
new_weights[counter] = weights[row_base ][column_base ]
new_weights[counter+1] = weights[row_base ][column_base+1]
new_weights[counter+2] = weights[row_base+1][column_base ]
new_weights[counter+3] = weights[row_base+1][column_base+1]
new_weights[counter+4] = weights[row_base+2][column_base ]
new_weights[counter+5] = weights[row_base+2][column_base+1]
new_weights[counter+6] = weights[row_base+3][column_base ]
new_weights[counter+7] = weights[row_base+3][column_base+1]
counter = counter + 8
# the remaining ones are in order
for j in range((int)(num_of_cols-num_of_cols%2), int(num_of_cols)):
new_weights[counter] = weights[row_base][j]
new_weights[counter+1] = weights[row_base+1][j]
new_weights[counter+2] = weights[row_base+2][j]
new_weights[counter+3] = weights[row_base+3][j]
counter = counter + 4
return new_weights
def convert_q7_q15_weights(weights):
[r, h, w, c] = weights.shape
weights = np.reshape(weights, (r, h*w*c))
num_of_rows = r
num_of_cols = h*w*c
new_weights = np.copy(weights)
new_weights = np.reshape(new_weights, (r*h*w*c))
counter = 0
for i in range(int(num_of_rows/4)):
# we only need to do the re-ordering for every 4 rows
row_base = 4*i
for j in range(int(num_of_cols/2)):
# for each 2 entries
column_base = 2*j
new_weights[counter] = weights[row_base ][column_base ]
new_weights[counter+1] = weights[row_base+1][column_base ]
new_weights[counter+2] = weights[row_base ][column_base+1]
new_weights[counter+3] = weights[row_base+1][column_base+1]
new_weights[counter+4] = weights[row_base+2][column_base ]
new_weights[counter+5] = weights[row_base+3][column_base ]
new_weights[counter+6] = weights[row_base+2][column_base+1]
new_weights[counter+7] = weights[row_base+3][column_base+1]
counter = counter + 8
# the remaining ones are in order
for j in range((int)(num_of_cols-num_of_cols%2), int(num_of_cols)):
new_weights[counter] = weights[row_base][j]
new_weights[counter+1] = weights[row_base+1][j]
new_weights[counter+2] = weights[row_base+2][j]
new_weights[counter+3] = weights[row_base+3][j]
counter = counter + 4
return new_weights
if __name__ == "__main__":
# input dimensions
vec_dim = 127
row_dim = 127
weight = np.zeros((row_dim,vec_dim), dtype=int)
# generate random inputs
for i in range(row_dim):
for j in range(vec_dim):
weight[i][j] = np.random.randint(256)-128
weight = np.reshape(weight, (row_dim, vec_dim, 1, 1))
outfile = open("../Ref_Implementations/fully_connected_testing_weights.h", "w")
outfile.write("#define IP2_WEIGHT {")
weight.tofile(outfile,sep=",",format="%d")
outfile.write("}\n\n")
new_weight = convert_to_x4_q7_weights(weight)
outfile.write("#define IP4_WEIGHT {")
new_weight.tofile(outfile,sep=",",format="%d")
outfile.write("}\n\n")
new_weight = convert_q7_q15_weights(weight)
outfile.write("#define IP4_q7_q15_WEIGHT {")
new_weight.tofile(outfile,sep=",",format="%d")
outfile.write("}\n\n")
new_weight = convert_to_x4_q15_weights(weight)
outfile.write("#define IP4_WEIGHT_Q15 {")
new_weight.tofile(outfile,sep=",",format="%d")
outfile.write("}\n\n")
outfile.close()

View File

@ -0,0 +1,561 @@
'''
Copyright (c) 2018-2020
Jianjia Ma
majianjia@live.com
SPDX-License-Identifier: Apache-2.0
Change Logs:
Date Author Notes
2020-05-22 Jianjia Ma The first version
'''
from tensorflow.keras.layers import *
import numpy as np
def convert_tensor_name(t):
return 'tensor_'+t.name.replace('/', '_').replace(':', '_')
def to_cstyle(data, integer=True):
#Convert an array to C style basket, not to be used for very large array. size > options['threshold'] will lead to ...
if(integer):
data = np.array(data, dtype=np.int).flatten()
else:
data = np.array(data).flatten()
s = np.array2string(data, separator=',')
s = s.replace("\n","").replace("\r","").replace(' ','')
s = s.replace(',', ', ')
s = s.replace('(', '[').replace(')', ']')
return s.replace('[', '{').replace(']', '}')
def tensor_shape(tensor, is_io_tensor=False):
# inconsistance of TF1 and TF2
# get tensor shape without None or ?
try:
shape = tensor.shape.as_list() # tf1
except:
shape = tensor.get_shape().as_list() # tf2
if(shape[0] == None or is_io_tensor):
shape = shape[1:]
else:
shape = shape
# for rnn input with timestamp = None, need a better implementation
for i in range(len(shape)):
shape[i] = shape[i] if shape[i] is not None else 1
return shape
def gen_base_config(layer):
config = '{.name = "%s"}' % (layer.name)
return config
def gen_values(var_name, var, size='', dtype='const int8_t'):
s = '<dtype> <var_name>[<size>] = <var>;\n'
s = s.replace('<var_name>', var_name).replace('<var>', var).replace('<size>', size).replace('<dtype>', dtype)
return s
# generate tensor by the tensor config
def gen_tensor(tensor, dec_bits, tensor_value='NULL', per_axis=False, is_io_tensor=False):
config = '''
const nnom_shape_data_t <tensor_name>_dim[] = <dim>;
const nnom_qformat_param_t <tensor_name>_dec[] = <q_dec>;
const nnom_qformat_param_t <tensor_name>_offset[] = <q_offset>;
const nnom_tensor_t <tensor_name> = {
.p_data = (void*)<value>,
.dim = (nnom_shape_data_t*)<tensor_name>_dim,
.q_dec = (nnom_qformat_param_t*)<tensor_name>_dec,
.q_offset = (nnom_qformat_param_t*)<tensor_name>_offset,
.qtype = <qtype>,
.num_dim = <num_dim>,
.bitwidth = <bitwidth>
};
'''
# inconsistance of TF1 and TF2
shape = tensor_shape(tensor, is_io_tensor)
config = config.replace('<tensor_name>', convert_tensor_name(tensor))#.name.replace('/','_').split(':')[0]) #conv2d/kernel:0
config = config.replace('<bitwidth>', '8')
config = config.replace('<value>', tensor_value)
config = config.replace('<dim>', to_cstyle(shape))
config = config.replace('<num_dim>', str(len(shape)))
if(type(dec_bits) == str):
config = config.replace('<q_dec>', dec_bits)
config = config.replace('<q_offset>', to_cstyle([0]))
else:
config = config.replace('<q_dec>', to_cstyle(dec_bits))
config = config.replace('<q_offset>', to_cstyle([0]))
if(per_axis):
config = config.replace('<qtype>', 'NNOM_QTYPE_PER_AXIS')
else:
config = config.replace('<qtype>', 'NNOM_QTYPE_PER_TENSOR')
return config
# create tensor by directly setting up the value
def gen_create_tensor(tensor_name, shape, dec_bits, tensor_value='NULL', per_axis=False):
config = '''
const nnom_shape_data_t <tensor_name>_dim[] = <dim>;
const nnom_qformat_param_t <tensor_name>_dec[] = <q_dec>;
const nnom_qformat_param_t <tensor_name>_offset[] = <q_offset>;
const nnom_tensor_t <tensor_name> = {
.p_data = (void*)<value>,
.dim = (nnom_shape_data_t*)<tensor_name>_dim,
.q_dec = (nnom_qformat_param_t*)<tensor_name>_dec,
.q_offset = (nnom_qformat_param_t*)<tensor_name>_offset,
.qtype = <qtype>,
.num_dim = <num_dim>,
.bitwidth = <bitwidth>
};
'''
config = config.replace('<tensor_name>', tensor_name)
config = config.replace('<bitwidth>', '8')
config = config.replace('<value>', tensor_value)
config = config.replace('<dim>', to_cstyle(shape))
config = config.replace('<num_dim>', str(len(shape)))
if(type(dec_bits) == str):
config = config.replace('<q_dec>', dec_bits)
config = config.replace('<q_offset>', to_cstyle([0]))
else:
config = config.replace('<q_dec>', to_cstyle(dec_bits))
config = config.replace('<q_offset>', to_cstyle([0]))
if(per_axis):
config = config.replace('<qtype>', 'NNOM_QTYPE_PER_AXIS')
else:
config = config.replace('<qtype>', 'NNOM_QTYPE_PER_TENSOR')
return config
def gen_conv2d_config(layer, output_shifts, bias_shifts):
c = '''
const nnom_qformat_param_t <layer_name>_output_shift[] = <output_shift_values>;
const nnom_qformat_param_t <layer_name>_bias_shift[] = <bias_shift_values>;
const nnom_conv2d_config_t <layer_name>_config = {
.super = <base_config>,
.qtype = <qtype>,
.weight = (nnom_tensor_t*)&<weight>,
.bias = (nnom_tensor_t*)&<bias>,
.output_shift = (nnom_qformat_param_t *)&<layer_name>_output_shift,
.bias_shift = (nnom_qformat_param_t *)&<layer_name>_bias_shift,
.filter_size = <filter_size>,
.kernel_size = <kernel_size>,
.stride_size = <stride_size>,
.padding_size = <padding_size>,
.dilation_size = <dilation_size>,
.padding_type = <padding_type>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<qtype>', "NNOM_QTYPE_PER_TENSOR")
c = c.replace('<weight>',convert_tensor_name(layer.weights[0]))
c = c.replace('<bias>',convert_tensor_name(layer.weights[1]))
c = c.replace('<output_shift_values>', output_shifts)
c = c.replace('<bias_shift_values>', bias_shifts)
c = c.replace('<filter_size>', str(layer.filters) if layer.filters is not None else str(layer.depth_multiplier)) # output channel
c = c.replace('<kernel_size>', to_cstyle(layer.kernel_size))
c = c.replace('<stride_size>', to_cstyle(layer.strides))
c = c.replace('<padding_size>', '{0, 0}') # not using it with keras, defined by padding type instead
c = c.replace('<dilation_size>', to_cstyle(layer.dilation_rate))
c = c.replace('<padding_type>', 'PADDING_'+layer.padding.upper())
return c
def gen_conv2d_trans_config(layer, output_shifts, bias_shifts):
c = '''
const nnom_qformat_param_t <layer_name>_output_shift[] = <output_shift_values>;
const nnom_qformat_param_t <layer_name>_bias_shift[] = <bias_shift_values>;
const nnom_conv2d_trans_config_t <layer_name>_config = {
.super = <base_config>,
.qtype = <qtype>,
.weight = (nnom_tensor_t*)&<weight>,
.bias = (nnom_tensor_t*)&<bias>,
.output_shift = (nnom_qformat_param_t *)&<layer_name>_output_shift,
.bias_shift = (nnom_qformat_param_t *)&<layer_name>_bias_shift,
.filter_size = <filter_size>,
.kernel_size = <kernel_size>,
.stride_size = <stride_size>,
.padding_size = <padding_size>,
.dilation_size = <dilation_size>,
.padding_type = <padding_type>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<qtype>', "NNOM_QTYPE_PER_TENSOR")
c = c.replace('<weight>',convert_tensor_name(layer.weights[0]))
c = c.replace('<bias>',convert_tensor_name(layer.weights[1]))
c = c.replace('<output_shift_values>', output_shifts)
c = c.replace('<bias_shift_values>', bias_shifts)
c = c.replace('<filter_size>', str(layer.filters)) # output channel
c = c.replace('<kernel_size>', to_cstyle(layer.kernel_size))
c = c.replace('<stride_size>', to_cstyle(layer.strides))
c = c.replace('<padding_size>', '{0, 0}') # not using it with keras, defined by padding type instead
c = c.replace('<dilation_size>', to_cstyle(layer.dilation_rate))
c = c.replace('<padding_type>', 'PADDING_'+layer.padding.upper())
return c
def gen_dense_config(layer, output_shifts, bias_shift):
c = '''
const nnom_qformat_param_t <layer_name>_output_shift[] = <output_shift_values>;
const nnom_qformat_param_t <layer_name>_bias_shift[] = <bias_shift_values>;
const nnom_dense_config_t <layer_name>_config = {
.super = <base_config>,
.qtype = <qtype>,
.weight = (nnom_tensor_t*)&<weight>,
.bias = (nnom_tensor_t*)&<bias>,
.output_shift = (nnom_qformat_param_t *)&<layer_name>_output_shift,
.bias_shift = (nnom_qformat_param_t *)&<layer_name>_bias_shift
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<qtype>', "NNOM_QTYPE_PER_TENSOR")
c = c.replace('<weight>', convert_tensor_name(layer.weights[0]))
c = c.replace('<bias>', convert_tensor_name(layer.weights[1]))
c = c.replace('<output_shift_values>', output_shifts)
c = c.replace('<bias_shift_values>', bias_shift)
return c
def gen_io_config(layer, tensor_name):
c = '''
const nnom_io_config_t <layer_name>_config = {
.super = <base_config>,
.tensor = (nnom_tensor_t*)&<tensor>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<tensor>', tensor_name)
return c
def gen_output_config(previous_layer, dec_bits, output_num, value_name='nnom_output_data'): #cheat at the moments
c = '''
const nnom_shape_data_t <tensor_name>_dim[] = <dim>;
const nnom_qformat_param_t <tensor_name>_dec[] = <q_dec>;
const nnom_qformat_param_t <tensor_name>_offset[] = <q_offset>;
const nnom_tensor_t <tensor_name> = {
.p_data = (void*)<value>,
.dim = (nnom_shape_data_t*)<tensor_name>_dim,
.q_dec = (nnom_qformat_param_t*)<tensor_name>_dec,
.q_offset = (nnom_qformat_param_t*)<tensor_name>_offset,
.qtype = <qtype>,
.num_dim = <num_dim>,
.bitwidth = 8
};
const nnom_io_config_t <layer_name>_config = {
.super = <base_config>,
.tensor = (nnom_tensor_t*)&<tensor_name>
};
'''
shape = tensor_shape(previous_layer.output, is_io_tensor=True)
c = c.replace('<tensor_name>', 'tensor_output'+str(output_num))
c = c.replace('<layer_name>', 'output'+str(output_num))
c = c.replace('<base_config>', '{.name = "output'+str(output_num)+'"}') # cheating at the moment.
c = c.replace('<value>', value_name)
c = c.replace('<qtype>', 'NNOM_QTYPE_PER_TENSOR')
c = c.replace('<num_dim>', str(len(shape)))
c = c.replace('<dim>', to_cstyle(shape))
c = c.replace('<q_dec>', '{'+dec_bits+'}')
c = c.replace('<q_offset>', to_cstyle([0]))
return c
def gen_pooling_config(layer, output_shifts='0'):
c = '''
const nnom_pool_config_t <layer_name>_config = {
.super = <base_config>,
.padding_type = <padding_type>,
.output_shift = <output_shift>,
.kernel_size = <kernel_size>,
.stride_size = <stride_size>,
.num_dim = <num_dim>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<padding_type>', 'PADDING_'+layer.padding.upper())
c = c.replace('<kernel_size>', to_cstyle(layer.pool_size))
c = c.replace('<stride_size>', to_cstyle(layer.strides))
c = c.replace('<num_dim>', str(len(layer.pool_size)))
c = c.replace('<output_shift>', output_shifts) # not used at the moment
return c
def gen_gl_pooling_config(layer, output_shifts='0'):
c = '''
const nnom_global_pool_config_t <layer_name>_config = {
.super = <base_config>,
.output_shift = <output_shift>,
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<output_shift>', output_shifts)
return c
def gen_matrix_config(layer, output_shift_name='0'):
c = '''
const nnom_matrix_config_t <layer_name>_config = {
.super = <base_config>,
.output_shift = <output_shift>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<output_shift>', output_shift_name) # not used at the moment
return c
def gen_zero_padding_config(layer):
c = '''
const nnom_zero_padding_config_t <layer_name>_config = {
.super = <base_config>,
.pad = <padding>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
try:
c = c.replace('<padding>', to_cstyle(sum(layer.padding, ())))
except:
pad = ((0, 0), layer.padding)
c = c.replace('<padding>', to_cstyle(sum(pad, ())))
return c
def gen_cropping_config(layer):
c = '''
const nnom_cropping_config_t <layer_name>_config = {
.super = <base_config>,
.pad = <padding>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
try:
c = c.replace('<padding>', to_cstyle(sum(layer.cropping, ()))) #((top_crop, bottom_crop), (left_crop, right_crop))
except:
pad = ((0, 0), layer.cropping)
c = c.replace('<padding>', to_cstyle(sum(pad, ())))
return c
def gen_upsampling_config(layer):
c = '''
const nnom_upsample_config_t <layer_name>_config = {
.super = <base_config>,
.kernel = <kernel>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<kernel>', to_cstyle(layer.size))
return c
def gen_softmax_config(layer):
c = '''
const nnom_softmax_config_t <layer_name>_config = {
.super = <base_config>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
return c
def gen_flatten_config(layer):
c = '''
const nnom_flatten_config_t <layer_name>_config = {
.super = <base_config>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
return c
def gen_reshape_config(layer):
c = '''
const nnom_shape_data_t <layer_name>_targeted_shape[] = <shape>;
const nnom_reshape_config_t <layer_name>_config = {
.super = <base_config>,
.dim = (nnom_shape_data_t*)<layer_name>_targeted_shape,
.num_dim = <num_dim>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<shape>', to_cstyle(layer.output_shape[1:]))
c = c.replace('<num_dim>', str(len(layer.output_shape[1:])))
return c
def gen_concat_config(layer):
c = '''
const nnom_concat_config_t <layer_name>_config = {
.super = <base_config>,
.axis = <axis>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<axis>', str(layer.axis))
return c
def gen_lambda_config(layer, run_func_name='NULL', build_func_name='NULL', free_func_name='NULL', parameters_name='NULL'):
c = '''
const nnom_lambda_config_t <layer_name>_config = {
.super = <base_config>,
.run_func_name = <run_func_name>,
.build_func_name = <build_func_name>,
.free_func_name = <free_func_name>,
.parameters = <parameters_name>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<run_func_name>', run_func_name)
c = c.replace('<build_func_name>', build_func_name)
c = c.replace('<free_func_name>', free_func_name)
c = c.replace('<parameters_name>', parameters_name)
return c
def gen_rnn_config(layer):
c = '''
const nnom_rnn_config_t <layer_name>_config = {
.super = <base_config>,
.return_sequence = <return_sequence>,
.stateful = <stateful>,
.go_backwards = <go_backwards>
};
'''
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<stateful>', 'true' if layer.stateful else 'false')
c = c.replace('<go_backwards>', 'true' if layer.go_backwards else 'false')
c = c.replace('<return_sequence>', 'true' if layer.return_sequences else 'false')
return c
def gen_simple_cell_config(layer, q_list):
c = '''
const nnom_simple_cell_config_t <layer_name>_simple_cell_config = {
.super = <base_config>,
.weights = (nnom_tensor_t*)&<weights>,
.recurrent_weights = (nnom_tensor_t*)&<recurrent_weights>,
.bias = (nnom_tensor_t*)&<bias>,
.q_dec_iw = <q_dec_iw>,
.q_dec_hw = <q_dec_hw>,
.q_dec_h = <q_dec_h>,
.act_type = <act_type>,
.units = <units>
};
'''
try:
cell_cfg = layer.get_config()['cell']['config']
except:
cell_cfg = layer.get_config()
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<weights>', convert_tensor_name(layer.weights[0]))
c = c.replace('<recurrent_weights>', convert_tensor_name(layer.weights[1]))
c = c.replace('<bias>', convert_tensor_name(layer.weights[2]))
c = c.replace('<q_dec_iw>', str(q_list[1])) # the qfmt of input x weight
c = c.replace('<q_dec_hw>', str(q_list[2])) # q of hidden x recurrent weight
c = c.replace('<q_dec_h>', str(q_list[0])) # output, if act != relu, should be 7 (consider delete it.)
c = c.replace('<act_type>', 'ACT_' + cell_cfg['activation'].upper())
c = c.replace('<units>', str(cell_cfg['units']))
return c
def gen_lstm_cell_config(layer, q_list):
c = '''
const nnom_lstm_cell_config_t <layer_name>_lstm_cell_config = {
.super = <base_config>,
.weights = (nnom_tensor_t*)&<weights>,
.recurrent_weights = (nnom_tensor_t*)&<recurrent_weights>,
.bias = (nnom_tensor_t*)&<bias>,
.q_dec_z = <q_dec_z>,
.q_dec_h = <q_dec_h>,
.q_dec_c = <q_dec_c>,
.units = <units>
};
'''
try:
cell_cfg = layer.get_config()['cell']['config']
except:
cell_cfg = layer.get_config()
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<weights>', convert_tensor_name(layer.weights[0]))
c = c.replace('<recurrent_weights>', convert_tensor_name(layer.weights[1]))
c = c.replace('<bias>', convert_tensor_name(layer.weights[2]))
c = c.replace('<q_dec_h>', str(q_list[0])) # output and memory state, (should be q0.7. consider delete it)
c = c.replace('<q_dec_c>', str(q_list[1])) # cell state
c = c.replace('<q_dec_z>', str(q_list[2])) # input*weight + hidden*weight + bias
c = c.replace('<units>', str(cell_cfg['units']))
return c
def gen_gru_cell_config(layer, q_list):
c = '''
const nnom_gru_cell_config_t <layer_name>_gru_cell_config = {
.super = <base_config>,
.weights = (nnom_tensor_t*)&<weights>,
.recurrent_weights = (nnom_tensor_t*)&<recurrent_weights>,
.bias = (nnom_tensor_t*)&<bias>,
.q_dec_z = <q_dec_z>,
.q_dec_h = <q_dec_h>,
.units = <units>
};
'''
try:
cell_cfg = layer.get_config()['cell']['config']
except:
cell_cfg = layer.get_config()
c = c.replace('<layer_name>', layer.name)
c = c.replace('<base_config>', gen_base_config(layer))
c = c.replace('<weights>', convert_tensor_name(layer.weights[0]))
c = c.replace('<recurrent_weights>', convert_tensor_name(layer.weights[1]))
c = c.replace('<bias>', convert_tensor_name(layer.weights[2]))
c = c.replace('<q_dec_h>', str(q_list[0])) #
c = c.replace('<q_dec_z>', str(q_list[1])) #
c = c.replace('<units>', str(cell_cfg['units']))
return c
if __name__ == "__main__":
# test only
from tensorflow.keras.models import load_model
model = load_model("../model.h5")
print(gen_tensor(model.layers[1].weights[0], dec_bits=(1, 2, 3, 4, 5)))
print(gen_tensor(model.layers[1].weights[1], dec_bits=(1, 2, 3, 4, 5)))
print(gen_conv2d_config(model.layers[1], (1,2,3), 3))
with open("test.h", 'w') as fp:
# fp.write(gen_tensor(model.layers[1].weights[0], dec_bits=(1, 2, 3, 4, 5)))
# fp.write(gen_tensor(model.layers[1].weights[1], dec_bits=(1, 2, 3, 4, 5)))
# fp.write(gen_conv2d_config(model.layers[1], (1,2,3,)))
fp.write('#include "nnom.h"\n')
# test all
for layer in model.layers:
if(type(layer) in [Conv2D, Conv1D]):
for w in layer.weights:
fp.write(gen_tensor(w, [3]))
fp.write(gen_conv2d_config(layer, {0}, 2))
elif(type(layer) in [Dense]):
for w in layer.weights:
fp.write(gen_tensor(w, [3]))
fp.write(gen_dense_config(layer, 2, 2))
elif(type(layer) in [Input]):
fp.write(gen_io_config(layer, [9,1,1]))
elif(type(layer) in [MaxPooling2D, GlobalMaxPooling2D, AveragePooling2D, GlobalAveragePooling2D]):
fp.write(gen_pooling_config(layer))
elif(type(layer) in [Multiply, Add, Subtract]):
fp.write(gen_matrix_config(layer))
elif(type(layer) in [ZeroPadding2D, ZeroPadding1D]):
fp.write(gen_zero_padding_config(layer))
elif(type(layer) in [Cropping2D, Cropping1D]):
fp.write(gen_cropping_config(layer))
elif(type(layer) in [Softmax]):
fp.write(gen_softmax_config(layer))
elif(type(layer) in [Flatten]):
fp.write(gen_flatten_config(layer))
elif(type(layer) in [Concatenate]):
fp.write(gen_concat_config(layer))
elif(type(layer) in [Lambda]):
fp.write(gen_lambda_config(layer))
elif(type(layer) in [UpSampling2D, UpSampling1D]):
fp.write(gen_upsampling_config(layer))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,845 @@
'''
Copyright (c) 2018-2020
Jianjia Ma
majianjia@live.com
SPDX-License-Identifier: Apache-2.0
Change Logs:
Date Author Notes
2019-02-05 Jianjia Ma The first version
This file provides:
-> fake_quantisation layers which simulate the output quantisation on fixed-point NN models.
-> weights/bias quantisation of Convolution and Dense Layer. "weight.h" file generations
-> export "testing set" binary data file.
-> print output ranges of each layers.
Currently, this script does not support RNN (type) layers.
'''
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.models import Model
from sklearn import metrics
from .fully_connected_opt_weight_generation import *
import time
import warnings
"""
this is the generate the test set data to a bin file
bin file can be used to validate the implementation in MCU
"""
def generate_test_bin(x, y, name='test_data_with_label.bin'):
'''
this method generate the
:param x: input x data size
:param y: input label (one hot label)
:return:
'''
# quantize input x
min_value = np.min(x)
max_value = np.max(x)
int_bits = int(np.ceil(np.log2(max(abs(min_value), abs(max_value)))))
dec_bits = 7 - int_bits
x = np.round(x*2**dec_bits).astype(np.int8)
# get label
if(len(y.shape) >1):
test_label = np.argwhere(y == 1).astype(np.int8) # test data
test_label = test_label[:, 1]
else:
test_label = y
# get data
dat = x.astype(dtype="byte") # test data
batch_size = dat.shape[0] # total pices of data
dat = dat.flatten() # flatten to get the total size.
block_size = int(dat.size / batch_size) # this must be integer but... just to confirm
# write (label x 128) (data_block x 128)
label_batch = 128 # the Y-modem example uses 128 batch
with open(name, 'wb') as f:
start = 0
while start <= (test_label.size - label_batch):
test_label[start: start + label_batch].tofile(f)
dat[block_size * start: block_size * (start + label_batch)].tofile(f)
start += label_batch
# the rest data
if (start < test_label.size):
rest_len = test_label.size - start
new_labls = test_label[start:]
new_labls = np.pad(new_labls, (0, label_batch - rest_len), mode='constant')
new_labls.tofile(f)
dat[block_size * start:].tofile(f)
print("binary test file generated:", name)
print("test data length:", test_label.size)
return
def is_shift_layer(layer):
''' layer which can change the output encoding'''
#FIXME: add more which will change the output shift
if('input' in layer.name or
'conv2d' in layer.name or
'conv1d' in layer.name or
'dense' in layer.name or
'softmax' in layer.name or
'sigmoid' in layer.name or
'tanh' in layer.name or
('add' in layer.name and 'zero' not in layer.name) or # the name, zero_padding contains 'add'
'subtract' in layer.name or
'multiply' in layer.name or
('activation' in layer.name and layer.get_config()['activation'] == 'softmax')or
('activation' in layer.name and layer.get_config()['activation'] == 'sigmoid') or
('activation' in layer.name and layer.get_config()['activation'] == 'tanh')
):
return True
return False
def is_shift_fixed(layer):
''' layer which shift to a fixed value'''
#FIXME: add more which will change the output shift
if('softmax' in layer.name or
'sigmoid' in layer.name or
'tanh' in layer.name or
('activation' in layer.name and layer.get_config()['activation'] == 'softmax') or
('activation' in layer.name and layer.get_config()['activation'] == 'sigmoid') or
('activation' in layer.name and layer.get_config()['activation'] == 'tanh')
):
return True
return False
def fuse_bn_to_conv(layer):
# try to fuse BN layer to convolutional
if ('conv' in layer.name) and \
('batch_normalization' in layer._outbound_nodes[0].outbound_layer.name):
print("fusing batch normalization to", layer.name)
bn_layer = layer._outbound_nodes[0].outbound_layer
c_w = layer.get_weights()[0]
c_b = layer.get_weights()[1]
print('original weight max', c_w.max(), 'min', c_w.min())
print('original bias max', c_b.max(), 'min', c_b.min())
bn_gamma = bn_layer.get_weights()[0]
bn_beta = bn_layer.get_weights()[1]
bn_mean = bn_layer.get_weights()[2]
bn_variance = bn_layer.get_weights()[3]
if ('conv2d' in layer.name):
epsilon = 1e-3 # default epsilon for tf.slim.batch_norm
for l in range(c_w.shape[3]):
for k in range(c_w.shape[2]):
for j in range(c_w.shape[1]):
for i in range(c_w.shape[0]):
if "depthwise" in layer.name: # depthwise batchnorm params are ordered differently
c_w[i][j][k][l] *= bn_gamma[k] / np.sqrt(bn_variance[k] + epsilon)
else:
c_w[i][j][k][l] *= bn_gamma[l] / np.sqrt(bn_variance[l] + epsilon)
if "depthwise" in layer.name:
depth_dim = c_w.shape[2]
else:
depth_dim = c_w.shape[3]
for l in range(depth_dim):
c_b[l] = (bn_gamma[l] * (c_b[l] - bn_mean[l]) / np.sqrt(bn_variance[l] + epsilon)) + bn_beta[l]
# conv1d
else:
epsilon = 1e-3 # default epsilon for tf.slim.batch_norm
for k in range(c_w.shape[2]):
for j in range(c_w.shape[1]):
for i in range(c_w.shape[0]):
if "depthwise" in layer.name: # depthwise batchnorm params are ordered differently
c_w[i][j][k] *= bn_gamma[j] / np.sqrt(bn_variance[j] + epsilon)
else:
c_w[i][j][k] *= bn_gamma[k] / np.sqrt(bn_variance[k] + epsilon)
if "depthwise" in layer.name:
depth_dim = c_w.shape[1]
else:
depth_dim = c_w.shape[2]
for l in range(depth_dim):
c_b[l] = (bn_gamma[l] * (c_b[l] - bn_mean[l]) / np.sqrt(bn_variance[l] + epsilon)) + bn_beta[l]
print('fused weight max', c_w.max(), 'min', c_w.min())
print('fused bias max', c_b.max(), 'min', c_b.min())
# write the weights back to the layer
# after that, the model will be destroyed.. need a better way to pass the new weight
layer.set_weights([c_w, c_b])
def generate_weights(model, name='weights.h', format='hwc', shift_list=None):
# Quantize weights to 8-bits using (min,max) and write to file
f = open(name, 'w')
f.write('#include "nnom.h"\n\n')
f.close()
for curr_idx, layer in enumerate(model.layers):
if (not layer.weights):
continue
# before merging bn layer, check if the bn is "legally" after Conv
if('batch_normalization' in layer.name) and \
('conv' not in layer.inbound_nodes[0].inbound_layers.name):
raise Exception('Currently only support batch_normalization after conv', layer.name,
layer._inbound_nodes[0].inbound_layers[0].name)
# try to fuse BN layer to convolutional
if ('conv' in layer.name) and \
('batch_normalization' in layer.outbound_nodes[0].outbound_layer.name):
fuse_bn_to_conv(layer)
# generate weights and bias now
weight_dec_shift = 0
print('weights for layer', layer.name)
for var in layer.weights:
var_name = str(var.name)
if("kernel" in var_name ):
var_values = layer.get_weights()[0] # weight
print(" weight:", var_name)
elif("bias" in var_name):
var_values = layer.get_weights()[1] # bias
print(" bias: ",var_name)
else:
continue
print(" original shape: ", var_values.shape)
min_value = np.min(var_values)
max_value = np.max(var_values)
int_bits = int(np.ceil(np.log2(max(abs(min_value), abs(max_value)))))
dec_bits = 7 - int_bits
print(" dec bit", dec_bits)
bSameAsKernel = False
if(is_shift_layer(layer)):
bSameAsKernel = False
inp = layer.input.name.replace(':','/').split('/')[0]
input_encoding = shift_list[inp]
if ("kernel" in var_name):
weight_dec_shift = dec_bits
else:
shift = input_encoding+weight_dec_shift-dec_bits
if(shift < 0):
bSameAsKernel = True
if(shift_list is None or bSameAsKernel):
# check if bias shift > weight shift, then reduce bias shift to weight shift
if ("kernel" in var_name):
weight_dec_shift = dec_bits
else:
if(dec_bits > weight_dec_shift):
dec_bits = weight_dec_shift
print(" new dec bit", dec_bits)
# convert to [-128,128) or int8
var_values = np.round(var_values * 2 ** dec_bits)
var_name = var_name.replace('/', '_')
var_name = var_name.replace(':', '_')
with open(name, 'a') as f:
f.write('#define ' + var_name.upper() + ' {')
# CHW format
if ('chw' in format):
if "dense" in var_name and "kernel" in var_name:
transposed_wts = np.transpose(var_values)
transposed_wts = convert_to_x4_q7_weights(
np.reshape(transposed_wts, (transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
# all other kernels, bias stay the same
else:
transposed_wts = var_values
# HWC format
else:
if (len(var_values.shape) == 3): # 1D convolution layer weights
transposed_wts = np.transpose(var_values, (2, 0, 1))
elif (len(var_values.shape) == 4): # 2D convolution layer weights
transposed_wts = np.transpose(var_values, (3, 0, 1, 2))
else: # fully connected layer weights or biases of any layer
# test, use opt weight reorder
if "dense" in var_name and "kernel" in var_name:
transposed_wts = np.transpose(var_values)
transposed_wts = convert_to_x4_q7_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
else:
transposed_wts = np.transpose(var_values)
print(" reshape to:",transposed_wts.shape)
with open(name, 'a') as f:
transposed_wts.tofile(f, sep=", ", format="%d")
f.write('}\n\n')
if ("bias" in var_name):
f.write('#define ' + var_name.upper() + '_SHIFT ' + '(' + str(dec_bits) + ')\n\n\n')
if ("kernel" in var_name ):
f.write('#define ' + var_name.upper() + '_SHIFT ' + '(' + str(dec_bits) + ')\n\n')
"""
# for checking the quantised and dequantised range.
with K.tf.Session() as session:
# convert back original range but quantized to 8-bits or 256 levels
var_values = var_values / (2 ** dec_bits)
var_values = session.run(K.tf.assign(var, var_values))
print(' '+var_name + ' number of wts/bias: ' + str(var_values.shape) + \
' dec bits: ' + str(dec_bits) + \
' max: (' + str(np.max(var_values)) + ',' + str(max_value) + ')' + \
' min: (' + str(np.min(var_values)) + ',' + str(min_value) + ')')
"""
def layers_output_ranges(model, x_test, quantize_method='max_min', calibrate_size=1000):
# limit the test data size
np.random.shuffle(x_test)
if(x_test.shape[0] > calibrate_size):
x_test = x_test[:1000]
# test, show the output ranges
shift_list = {}
# FIXME: only support one input
if(type(model.layers[0]) != InputLayer):
L = [model.input] + model.layers
else:
L = model.layers
last_layer = None
for layer in L: # layer loop
if("input" in layer.name):
features = x_test
else:
# batch_normalization will need to be handled differently, since we are fusing the weight to its predecessor.
# sigmoid and tanh are different, their shift is fixed to 7
if(is_shift_layer(layer) or
('batch_normalization' in layer.name)):
layer_model = Model(inputs=model.input, outputs=layer.output)
features = layer_model.predict(x_test)
else:
# leave the features not changed, so this layer shift will be the same
# as its inputs
pass
# calculate no saturation shift
max_val = features.max()
min_val = features.min()
int_bits = int(np.ceil(np.log2(max(abs(max_val), abs(min_val)))))
dec_bits = 7 - int_bits
# saturation shift, using KLD method
# Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
if('kld' in quantize_method and not is_shift_fixed(layer) and "input" not in layer.name and "dense" not in layer.name): # test, also do not use kld in input layer
import scipy.stats
abs_max = max(abs(max_val), abs(min_val))
small_var = 1e-5
bins = np.arange(-abs_max, abs_max, abs_max/2048*2)
q_bins = np.arange(-abs_max, abs_max, abs_max/256*2)
flat_hist = np.histogram(features.flatten(), bins=bins)[0]
kl_loss = []
kl_shifts = []
for shift in range(4):
t = 2 ** (dec_bits + shift) # 2-based threshold
act = np.round(features.flatten() * t)
act = act / t
act = np.clip(act, -128/t, 127/t)
act = np.histogram(act, bins=q_bins)[0]
act_hist = np.zeros(2047)
chunk = int(2048/256)
for i in range(int(255)):
none_zero = np.count_nonzero(flat_hist[i*chunk:(i+1)*chunk])
if none_zero == 0:
continue
for j in range(chunk):
act_hist[i*chunk+j] = act[i]/none_zero if flat_hist[i*chunk+j] != 0 else 0
flat_hist[flat_hist==0] = small_var
act_hist[act_hist==0] = small_var
kl = scipy.stats.entropy(flat_hist, act_hist)
kl_loss.append(kl)
kl_shifts.append(dec_bits + shift)
"""
ax = plt.subplot(8, 1, shift+1)
ax.plot(flat_hist)
ax.plot(act_hist)
"""
new_dec = kl_shifts[np.argmin(kl_loss)] # set the dec_bit to the KLD results
#plt.show()
print("KLD loss", kl_loss)
print("KLD shift", kl_shifts)
if(new_dec != dec_bits):
print(layer.name,"is using KLD method, original shift",dec_bits, "KLD results", new_dec)
dec_bits = new_dec
print( layer.name, "max value:", max_val, "min value:", min_val,"dec bit", dec_bits)
# record the shift
if(type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
shift_list[layer.name.split(':')[0]] = dec_bits
else:
shift_list[layer.name] = dec_bits
if ('batch_normalization' in layer.name):
shift_list[last_layer.name] = dec_bits # use the bn layer shift to update the last layer.
last_layer = layer
LM = {}
for layer in model.layers:
LM[layer.name] = layer
L = [l for l in model.layers[1:]]
L.reverse()
def update_previous_layer_shift(layer, Q):
if(type(layer.input) == list):
for inp in layer.input:
iname = inp.name.split('/')[0]
if('input' in iname):
continue
shift_list[iname] = Qmin
if(not is_shift_layer(LM[iname])):
update_previous_layer_shift(LM[iname], Q)
else:
iname = layer.input.name.split('/')[0]
if('input' in iname):
return
shift_list[iname] = Qmin
if(not is_shift_layer(LM[iname])):
update_previous_layer_shift(LM[iname], Q)
for layer in L:
if(type(layer.input) == list):
iname = layer.input[0].name.split('/')[0]
Qmin = shift_list[iname]
for inp in layer.input:
iname = inp.name.split('/')[0]
if(shift_list[iname] < Qmin):
Qmin = shift_list[iname]
if(shift_list[iname] != Qmin):
bFlag = True
for inp in layer.input:
iname = inp.name.split('/')[0]
shift_list[iname] = Qmin
if(not is_shift_layer(LM[iname])):
update_previous_layer_shift(LM[iname], Qmin)
print('set shift', Qmin, 'for the input of', layer.name, ':', [inp.name.split('/')[0] for inp in layer.input])
if(not is_shift_layer(layer) or Qmin < shift_list[layer.name]): # update current layer's shift only when we cannot change the shift
shift_list[layer.name] = Qmin
print("shift list", shift_list)
return shift_list
def generate_model(model, x_test, name='weights.h', format='hwc', quantize_method='max_min'):
shift_list = layers_output_ranges(model, x_test, quantize_method=quantize_method)
generate_weights(model, name=name, format=format, shift_list=shift_list)
if(type(model.layers[0]) != InputLayer):
L = [model.input] + model.layers
else:
L = model.layers
with open(name,'a') as fp:
fp.write('\n/* output enconding for each layer */\n')
for layer in L:
if(type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
iname = layer.name.split(':')[0]
else:
iname = layer.name
fp.write('#define %s_OUTPUT_SHIFT %s\n'%(iname.upper(), shift_list[iname]))
fp.write('\n/* bias shift and output shift for each layer */\n')
for layer in model.layers:
if(is_shift_layer(layer)):
iname = layer.name.upper()
if(len(layer.weights) == 2 and
'kernel' in layer.weights[0].name and
'bias' in layer.weights[1].name):
kname = layer.weights[0].name.upper().replace('/', '_').replace(':', '_')
bname = layer.weights[1].name.upper().replace('/', '_').replace(':', '_')
inp = layer.input.name.replace(':','/').split('/')[0].upper()
fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_SHIFT+{2}_SHIFT-{0}_OUTPUT_SHIFT)\n'.format(
iname, inp, kname))
fp.write('#define {0}_BIAS_LSHIFT ({1}_OUTPUT_SHIFT+{2}_SHIFT-{3}_SHIFT)\n'.format(
iname, inp, kname, bname))
fp.write('#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(iname))
fp.write('#if {0}_BIAS_LSHIFT < 0\n#error {0}_BIAS_RSHIFT must be bigger than 0\n#endif\n'.format(iname))
# add, sub
elif ('add' in layer.name or
'subtract' in layer.name):
# only consider the first, they have been set to same in out_put_range()
inp = layer.input[0].name.replace(':','/').split('/')[0].upper()
fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_SHIFT-{0}_OUTPUT_SHIFT)\n'.format(
iname, inp))
fp.write('#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(iname))
# mult is different, Q3.4 * Q3.4 = Q6.8. if mult out is Q4.3, then shift (Q.4+q.4)-Q.3=5. Am I right?
elif ('multiply' in layer.name ):
inp = layer.input[0].name.replace(':','/').split('/')[0].upper()
fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_SHIFT*2-{0}_OUTPUT_SHIFT)\n'.format(
iname, inp))
fp.write('#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(iname))
fp.write('\n/* weights for each layer */\n')
LI = {}
ID = 0
def is_skipable_layer(layer):
# FIXME: add more that could be skiped
if('lambda' in layer.name or
'dropout' in layer.name or
'batch_normalization' in layer.name or
('flatten' in layer.name and 'chw' not in format)): # flatten layer can be skipped in HWC but have to present in CHW
return True
return False
for id,layer in enumerate(L):
if(is_skipable_layer(layer)):
inp = layer.input.name.replace(':','/').split('/')[0]
LI[layer.name] = (LI[inp][0], layer)
else:
if(type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
LI[layer.name.split(':')[0]] = (ID, layer)
else:
LI[layer.name] = (ID, layer)
ID += 1
if ('input' in layer.name or not layer.weights):
continue
for var in layer.weights:
var_name = str(var.name).replace('/', '_').replace(':', '_')
if("kernel" in var_name):
fp.write('static const int8_t %s_weights[] = %s;\n'%(layer.name, var_name.upper()))
fp.write('static const nnom_weight_t %s_w = { (const void*)%s_weights, %s_OUTPUT_RSHIFT};\n'%(layer.name,layer.name, layer.name.upper()))
elif("bias" in var_name):
fp.write('static const int8_t %s_bias[] = %s;\n'%(layer.name, var_name.upper()))
fp.write('static const nnom_bias_t %s_b = { (const void*)%s_bias, %s_BIAS_LSHIFT};\n'%(layer.name,layer.name, layer.name.upper()))
fp.write('\n/* nnom model */\n')
# FIXME: now only support one input and one output
sz = 1
for d in model.input.shape[1:]:
sz = sz*d
fp.write('static int8_t nnom_input_data[%d];\n'%(sz))
sz = 1
for d in model.output.shape[1:]:
sz = sz*d
fp.write('static int8_t nnom_output_data[%d];\n'%(sz))
fp.write('static nnom_model_t* nnom_model_create(void)\n{\n')
fp.write('\tstatic nnom_model_t model;\n')
if(ID>32):
fp.write('\tnnom_layer_t ** layer = malloc(sizeof(nnom_layer_t *)*%d);\n'%(ID+1))
fp.write('\tif(NULL == layer) return NULL;\n')
else:
fp.write('\tnnom_layer_t* layer[%d];\n'%(ID+1))
fp.write('\n\tnew_model(&model);\n\n')
for layer in L:
if(is_skipable_layer(layer)):
continue
#FIXME: need a better solution to seperate the input 'tensor' from other layers
if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
id,_ = LI[layer.name.split(':')[0]]
else:
id,_ = LI[layer.name]
if('input' in layer.name):
try:
inshape = layer.input_shape[0][1:] # new changes in tf2?
except:
inshape = layer.shape[1:]
if (len(inshape) == 1): # 1-D input
fp.write('\tlayer[%d] = Input(shape(%d,1,1), nnom_input_data);\n' % (id, inshape[0]))
elif (len(inshape) == 2): # 1-D input
fp.write('\tlayer[%d] = Input(shape(1,%d,%d), nnom_input_data);\n' % (id, inshape[0], inshape[1]))
else:
fp.write('\tlayer[%d] = Input(shape%s, nnom_input_data);\n' % (id, inshape))
# convlutional
elif('conv1d' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
cfg = layer.get_config()
if('depthwise' in layer.name):
fp.write('\tlayer[{0}] = model.hook(DW_Conv2D({1}, kernel(1,{2}), stride(1,{3}), dilation(1,{4}), PADDING_{5}, &{6}_w, &{6}_b), layer[{7}]);\n'.format(
id, 1, cfg['kernel_size'][0], cfg['strides'][0], cfg['dilation_rate'][0], cfg['padding'].upper(),
layer.name, LI[inp][0]))
else:
fp.write('\tlayer[{0}] = model.hook(Conv2D({1}, kernel(1,{2}), stride(1,{3}), dilation(1,{4}), PADDING_{5}, &{6}_w, &{6}_b), layer[{7}]);\n'.format(
id, cfg['filters'], cfg['kernel_size'][0], cfg['strides'][0], cfg['dilation_rate'][0], cfg['padding'].upper(),
layer.name, LI[inp][0]))
elif('conv2d' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
cfg = layer.get_config()
if ('depthwise' in layer.name):
fp.write('\tlayer[{0}] = model.hook(DW_Conv2D({1}, kernel{2}, stride{3}, dilation{4}, PADDING_{5}, &{6}_w, &{6}_b), layer[{7}]);\n'.format(
id, 1, cfg['kernel_size'], cfg['strides'], cfg['dilation_rate'], cfg['padding'].upper(),
layer.name, LI[inp][0]))
else:
fp.write('\tlayer[{0}] = model.hook(Conv2D({1}, kernel{2}, stride{3}, dilation{4}, PADDING_{5}, &{6}_w, &{6}_b), layer[{7}]);\n'.format(
id, cfg['filters'], cfg['kernel_size'], cfg['strides'], cfg['dilation_rate'], cfg['padding'].upper(),
layer.name, LI[inp][0]))
# activations
elif('activation' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
cfg = layer.get_config()
if(cfg['activation'] == 'relu'):
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n'%(id, LI[inp][0]))
if(cfg['activation'] == 'tanh'):
fp.write('\tlayer[%s] = model.active(act_tanh(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
if(cfg['activation'] == 'sigmoid'):
fp.write('\tlayer[%s] = model.active(act_sigmoid(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
elif(cfg['activation'] == 'softmax'):
fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n'%(id, LI[inp][0]))
elif('re_lu' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n'%(id, LI[inp][0]))
# pooling
elif('max_pooling' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
cfg = layer.get_config()
if ('global' in layer.name):
fp.write('\tlayer[%s] = model.hook(GlobalMaxPool(), layer[%s]);\n' % (id, LI[inp][0]))
elif('2d' in layer.name):
fp.write('\tlayer[%s] = model.hook(MaxPool(kernel%s, stride%s, PADDING_%s), layer[%d]);\n'%(
id, cfg['pool_size'], cfg['strides'], cfg['padding'].upper(), LI[inp][0]))
elif('1d' in layer.name):
fp.write('\tlayer[{0}] = model.hook(MaxPool(kernel(1,{1}), stride(1,{2}), PADDING_{3}), layer[{4}]);\n'.format(
id, cfg['pool_size'][0], cfg['strides'][0], cfg['padding'].upper(), LI[inp][0]))
elif('average_pooling' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
cfg = layer.get_config()
if ('global' in layer.name):
# a global avg pool before softmax can be replace by sumpool in MCU (recommend)
if(layer == model.layers[-2] and 'Softmax' in model.layers[-1].output.name):
print(layer.name, 'has been replaced by GlobalSumPool()')
fp.write('\tlayer[%s] = model.hook(GlobalSumPool(), layer[%s]);\n' % (id, LI[inp][0]))
else:
fp.write('\tlayer[%s] = model.hook(GlobalAvgPool(), layer[%s]);\n' % (id, LI[inp][0]))
elif('2d' in layer.name):
fp.write('\tlayer[%s] = model.hook(AvgPool(kernel%s, stride%s, PADDING_%s), layer[%d]);\n'%(
id, cfg['pool_size'], cfg['strides'], cfg['padding'].upper(), LI[inp][0]))
elif('1d' in layer.name):
fp.write('\tlayer[{0}] = model.hook(AvgPool(kernel(1,{1}), stride(1,{2}), PADDING_{3}), layer[{4}]);\n'.format(
id, cfg['pool_size'][0], cfg['strides'][0], cfg['padding'].upper(), LI[inp][0]))
elif ('up_sampling' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
cfg = layer.get_config()
if('2d' in layer.name):
fp.write('\tlayer[%s] = model.hook(UpSample(kernel%s), layer[%d]);\n'%(id, cfg['size'], LI[inp][0]))
elif('1d' in layer.name):
fp.write('\tlayer[{0}] = model.hook(UpSample(kernel(1,{1})), layer[{2}]);\n'.format(
id, cfg['size'][0], LI[inp][0]))
# zero padding
elif ('zero_padding' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
cfg = layer.get_config()
if('2d' in layer.name):
fp.write('\tlayer[{0}] = model.hook(ZeroPadding(border({1},{2},{3},{4})), layer[{5}]);\n'.format(
id, cfg['padding'][0][0], cfg['padding'][0][1], cfg['padding'][1][0],cfg['padding'][1][1], LI[inp][0]))
elif('1d' in layer.name):
fp.write('\tlayer[{0}] = model.hook(ZeroPadding(border(0,0,{1},{2})), layer[{3}]);\n'.format(
id, cfg['padding'][0], cfg['padding'][1], LI[inp][0]))
# Cropping
elif ('cropping' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
cfg = layer.get_config()
if('2d' in layer.name):
fp.write('\tlayer[{0}] = model.hook(Cropping(border({1},{2},{3},{4})), layer[{5}]);\n'.format(
id, cfg['cropping'][0][0], cfg['cropping'][0][1], cfg['cropping'][1][0],cfg['cropping'][1][1], LI[inp][0]))
elif('1d' in layer.name):
fp.write('\tlayer[{0}] = model.hook(Cropping(border(0,0,{1},{2})), layer[{3}]);\n'.format(
id, cfg['cropping'][0], cfg['cropping'][1], LI[inp][0]))
# others
elif('flatten' in layer.name): # flatten is needed in CHW backend but not needed in HWC
inp = layer.input.name.replace(':', '/').split('/')[0]
fp.write('\tlayer[%s] = model.hook(Flatten(), layer[%s]);\n'%(id, LI[inp][0]))
elif('concatenate' in layer.name):
inps = [input.name.replace(':','/').split('/')[0] for input in layer.input]
inX = ''
for inp in inps:
inX += ' ,layer[%d]'%(LI[inp][0])
cfg = layer.get_config()
fp.write('\tlayer[%s] = model.mergex(Concat(%s), %s%s);\n'%(
id, cfg['axis'], len(inps), inX))
elif('add' in layer.name):
inps = [input.name.replace(':','/').split('/')[0] for input in layer.input]
inX = ''
for inp in inps:
inX += ' ,layer[%d]'%(LI[inp][0])
fp.write('\tlayer[%s] = model.mergex(Add(%s_OUTPUT_RSHIFT), %s%s);\n'%(
id, layer.name.upper(), len(inps), inX))
elif('subtract' in layer.name):
inps = [input.name.replace(':','/').split('/')[0] for input in layer.input]
inX = ''
for inp in inps:
inX += ' ,layer[%d]'%(LI[inp][0])
fp.write('\tlayer[%s] = model.mergex(Sub(%s_OUTPUT_RSHIFT), %s%s);\n'%(
id, layer.name.upper(), len(inps), inX))
elif('multiply' in layer.name):
warnings.warn("Warning mutiply is under testing")
inps = [input.name.replace(':','/').split('/')[0] for input in layer.input]
inX = ''
for inp in inps:
inX += ' ,layer[%d]'%(LI[inp][0])
fp.write('\tlayer[%s] = model.mergex(Mult(%s_OUTPUT_RSHIFT), %s%s);\n'%(
id, layer.name.upper(), len(inps), inX))
elif('dense' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
cfg = layer.get_config()
fp.write('\tlayer[{0}] = model.hook(Dense({1}, &{2}_w, &{2}_b), layer[{3}]);\n'.format(
id, cfg['units'], layer.name, LI[inp][0]))
elif('softmax' in layer.name):
inp = layer.input.name.replace(':','/').split('/')[0]
fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n'%(id, LI[inp][0]))
else:
raise Exception('unsupported layer', layer.name, layer)
"""
# temporary fixed for activations attached into layers in construction
def is_activation_attached(layer):
if(("Softmax" in layer.output.name and "softmax" not in layer.name)or
("Relu" in layer.output.name and "re_lu" not in layer.name) or
("Sigmoid" in layer.output.name and "sigmoid" not in layer.name) or
("Tanh" in layer.output.name and "tanh" not in layer.name)):
return True
return False
if "input" not in layer.name and is_activation_attached(layer):
inp = layer.output.name.replace(':', '/').split('/')[0]
cfg = layer.get_config()
if(cfg['activation'] == 'relu'):
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n'%(id, LI[inp][0]))
if(cfg['activation'] == 'tanh'):
fp.write('\tlayer[%s] = model.active(act_tanh(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
if(cfg['activation'] == 'sigmoid'):
fp.write('\tlayer[%s] = model.active(act_sigmoid(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
elif(cfg['activation'] == 'softmax'):
fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n'%(id, LI[inp][0]))
"""
# FIXME, test later.
if('softmax' in layer.name
or ('activation' in layer.name and layer.get_config()['activation'] == 'softmax')):
fp.write('\tlayer[%s] = model.hook(Output(shape(%s,1,1), nnom_output_data), layer[%s]);\n'%(id+1, layer.output.shape[1], id))
elif len(layer.output.shape) == 4:
fp.write('\tlayer[%s] = model.hook(Output(shape%s, nnom_output_data), layer[%s]);\n'%(id+1, layer.output.shape[1:], id))
elif len(layer.output.shape) == 3:
fp.write('\tlayer[%s] = model.hook(Output(shape(1,%s,%s), nnom_output_data), layer[%s]);\n'%(id+1, layer.output.shape[1], layer.output.shape[2], id))
elif len(layer.output.shape) == 2:
fp.write('\tlayer[%s] = model.hook(Output(shape(%s,1,1), nnom_output_data), layer[%s]);\n'%(id+1, layer.output.shape[1], id))
else:
raise Exception('unsupported output shape of the last layer', layer.name, layer)
fp.write('\tmodel_compile(&model, layer[0], layer[%s]);\n'%(id+1))
if(ID>32):
fp.write('\tfree(layer);\n')
fp.write('\treturn &model;\n}\n')
with open('.shift_list','w') as fp:
fp.write(str(shift_list))
def evaluate_model(model, x_test, y_test, running_time=False, to_file='evaluation.txt'):
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=2)
print('Test loss:', scores[0])
print('Top 1:', scores[1])
if(len(y_test.shape)>1):
# predictions = model.predict(x_test)
# output = tf.keras.metrics.top_k_categorical_accuracy(y_test, predictions, k=2)
# # with tf.Session() as sess:
# # result = sess.run(output)
# result =
# print("Top 2:",result)
predictions = model.predict(x_test)
matrix = metrics.confusion_matrix(y_test.argmax(axis=1), predictions.argmax(axis=1))
print(matrix)
run_time = 0
if running_time:
# try to calculate the time
T = time.time()
for i in range(10):
model.predict(x_test)
T = time.time() - T
run_time = round((T / 10 / x_test.shape[0] * 1000 * 1000), 2)
print("Runing time:",run_time , "us" )
#
with open(to_file, 'w') as f:
f.write("Runing time: "+ str(run_time) + "us" + "\n")
f.write('Test loss:'+ str(scores[0]) + "\n")
f.write('Top 1:'+ str(scores[1])+ "\n")
if (len(y_test.shape) > 1):
#f.write("Top 2:"+ str(result)+ "\n")
#f.write(str(matrix))
for row in matrix:
row.tofile(f, sep=',')
f.write("\n")
# try to check the weight and bias dec ranges
for layer in model.layers:
if (not layer.weights):
continue
for var in layer.weights:
var_name = str(var.name)
if ("kernel" in var_name):
var_values = layer.get_weights()[0] # weight
else:
var_values = layer.get_weights()[1] # bias
min_value = np.min(var_values)
max_value = np.max(var_values)
intt = int(np.ceil(np.log2(max(abs(min_value), abs(max_value)))))
dec = 7 - intt
print(var_name, "Dec num:", dec)
return scores
def f2q(d, Q):
'''To convert a number from floating point to Qm.n format:
1. Multiply the floating point number by 2n
2. Round to the nearest integer
'''
return np.round(d*2**Q)
def q2f(d, Q):
'''To convert a number from Qm.n format to floating point:
1. Convert the number to floating point as if it were an integer, in other words remove the binary point
2. Multiply by 2-n
'''
return d*2**-Q
def show_weights(w, name):
sz = 1
for s in w.shape:
sz = sz*s
aL = w.reshape(sz,)
MIN,MAX=min(aL),max(aL)
Q = int(np.ceil(np.log2(max(abs(MIN),abs(MAX)))))
Q = 7-Q
qL = f2q(aL,Q)
qL = q2f(qL,Q)
plt.figure(figsize=(18, 3))
plt.subplot(131)
plt.title(name)
plt.plot(aL)
plt.grid()
aL.sort()
plt.plot(aL,'r')
plt.grid()
plt.subplot(132)
plt.title('Q%s'%(Q))
qL.sort()
plt.plot(aL,'r')
plt.plot(qL,'g')
plt.grid()
plt.subplot(133)
plt.hist(aL,100)
plt.title('hist')
plt.grid()
plt.show()
def compare(a,b,name):
sz = 1
for s in a.shape:
sz = sz*s
aL = a.reshape(sz,)
bL = b.reshape(sz,)
assert(len(aL) == len(bL))
Z = list(zip(aL,bL))
Z.sort(key=lambda x: x[0])
aL1,bL1=zip(*Z)
plt.figure(figsize=(18, 3))
plt.subplot(131)
plt.plot(aL)
plt.plot(aL1,'r')
plt.grid()
plt.title('tf-%s'%(name))
plt.subplot(133)
plt.plot(bL1,'g')
plt.plot(aL1,'r')
plt.grid()
plt.title('compare')
plt.subplot(132)
bL1=list(bL1)
bL1.sort()
plt.plot(bL)
plt.plot(bL1,'g')
plt.grid()
plt.title('nn-%s'%(name))
plt.show()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
size_t shape_size(nnom_3d_shape_t *s)
{
if (s == NULL)
return 0;
return s->h * s->w * s->c;
}
nnom_3d_shape_t shape(size_t h, size_t w, size_t c)
{
nnom_3d_shape_t s;
s.h = h;
s.w = w;
s.c = c;
return s;
}
nnom_3d_shape_t kernel(size_t h, size_t w)
{
return shape(h, w, 1);
}
nnom_3d_shape_t stride(size_t h, size_t w)
{
return shape(h, w, 1);
}
nnom_3d_shape_t dilation(size_t h, size_t w)
{
return shape(h, w, 1);
}
nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right)
{
nnom_border_t b;
b.top = top;
b.bottom = bottom;
b.left = left;
b.right = right;
return b;
}
// this function has to be used while assign a io for a layer.
// because the io needs to know who is its owner.
nnom_layer_io_t *io_init(void *owner_layer, nnom_layer_io_t *io)
{
io->owner = (nnom_layer_t *)owner_layer;
return io;
}
// this function is to add a new IO to current inited IO
// input, the targeted IO that the new IO will be added to
// output , the new IO
nnom_layer_io_t *io_add_aux(nnom_layer_io_t *targeted_io)
{
nnom_layer_io_t *new_io;
// check if the targeted io is inited, and its aux = NULL
if (targeted_io == NULL || targeted_io->owner == NULL || targeted_io->aux != NULL)
return NULL;
// create new io, init it
new_io = nnom_mem(sizeof(nnom_layer_io_t));
if (new_io == NULL)
return NULL;
// add to aux
targeted_io->aux = new_io;
return io_init(targeted_io->owner, new_io);
}

View File

@ -0,0 +1,245 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
* 2019-02-14 Jianjia Ma Add layer.free() method.
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <stdarg.h>
#include "nnom.h"
#include "nnom_tensor.h"
// tensor size
size_t tensor_size(nnom_tensor_t* t)
{
size_t size = 0;
if (t != NULL)
{
size = t->dim[0];
for (int i = 1; i < t->num_dim; i++)
size *= t->dim[i];
}
return size;
}
size_t tensor_size_byte(nnom_tensor_t* t)
{
return tensor_size(t)*t->bitwidth/8;
}
size_t tensor_get_num_channel(nnom_tensor_t* t)
{
// this will need to be changed to support batch.
#ifdef NNOM_USING_CHW
// channel first
//return t->dim[0];
return t->dim[t->num_dim -1]; // we are always using hwc to describe even our data is in CHW
#else
// channel last
return t->dim[t->num_dim -1];
#endif
}
// initialise/create new tensor
nnom_tensor_t* new_tensor(nnom_qtype_t type, uint32_t num_dim, uint32_t num_channel)
{
nnom_tensor_t* t = NULL;
uint32_t q_len;
if(type == NNOM_QTYPE_PER_AXIS)
{
q_len = num_channel;
}
else if (type == NNOM_QTYPE_PER_TENSOR)
{
q_len = 1;
}
else
{
NNOM_LOG("ERROR: tensor type not specified\n");
return NULL;
}
t = nnom_mem(nnom_alignto(sizeof(nnom_tensor_t), NNOM_ALIGN)
+ nnom_alignto(num_dim*sizeof(nnom_shape_data_t),sizeof(nnom_qformat_param_t))
+ q_len*sizeof(nnom_qformat_param_t)*2);
if(t == NULL)
return t;
t->dim = (nnom_shape_data_t*)((uint8_t*)t + sizeof(nnom_tensor_t)); // should add alignment
t->q_dec = (nnom_qformat_param_t*)((uint8_t*)t->dim + nnom_alignto(num_dim*sizeof(nnom_shape_data_t),sizeof(nnom_qformat_param_t)));
t->q_offset = (nnom_qformat_param_t*)((uint8_t*)t->q_dec + q_len*sizeof(nnom_qformat_param_t));
t->num_dim = num_dim;
t->qtype = type;
return t;
}
void delete_tensor(nnom_tensor_t* t)
{
if (t)
nnom_free(t);
}
// set tensor by value
// for tensor with quantized type NNOM_QTYPE_PER_TENSOR
nnom_tensor_t* tensor_set_attr_v(nnom_tensor_t* t,
nnom_qformat_param_t dec_bit, nnom_qformat_param_t offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth)
{
// copy dim
t->num_dim = num_dim;
nnom_memcpy(t->dim, dim, sizeof(nnom_shape_data_t) * num_dim);
// bitwidth
t->bitwidth = bitwidth;
// copy the offset and q format
*(t->q_dec) = dec_bit;
*(t->q_offset) = offset;
return t;
}
// set tensor by pointer
// for tensor with quantized type NNOM_QTYPE_PER_AXIS
nnom_tensor_t* tensor_set_attr(nnom_tensor_t* t,
nnom_qformat_param_t*dec_bit, nnom_qformat_param_t *offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth)
{
size_t size;
// copy dim
t->num_dim = num_dim;
nnom_memcpy(t->dim, dim, sizeof(nnom_shape_data_t) * num_dim);
// get the q format data size
if(t->qtype == NNOM_QTYPE_PER_AXIS)
size = sizeof(nnom_qformat_param_t) * tensor_get_num_channel(t);
else
size = sizeof(nnom_qformat_param_t);
// bitwidth
t->bitwidth = bitwidth;
// copy the offset and q format
nnom_memcpy(t->q_dec, dec_bit, size);
nnom_memcpy(t->q_offset, offset, size);
return t;
}
// this method copy the attributes of a tensor to a new tensor
// before that, src and des tensor must already have QTYPE and NUM_OF_DIM set.
// Note, the tensors must have the same lenght. this method wont cpy the memory pointer data (we will assign memory later after building)
nnom_tensor_t* tensor_cpy_attr(nnom_tensor_t* des, nnom_tensor_t* src)
{
size_t size;
if(src->qtype != des->qtype || src->num_dim != des->num_dim)
return NULL;
if(src->qtype == NNOM_QTYPE_PER_AXIS)
size = sizeof(nnom_qformat_param_t) * tensor_get_num_channel(src);
else
size = sizeof(nnom_qformat_param_t);
// bit
des->bitwidth = src->bitwidth;
// copy quantisation parameters
nnom_memcpy(des->q_dec, src->q_dec, size);
nnom_memcpy(des->q_offset, src->q_offset, size);
// copy number of dimension
des->num_dim = src->num_dim;
nnom_memcpy(des->dim, src->dim, src->num_dim * sizeof(nnom_shape_data_t));
return des;
}
// change format from CHW to HWC
// the shape of the data, input data, output data
void tensor_hwc2chw_q7(nnom_tensor_t* des, nnom_tensor_t* src)
{
q7_t* p_out = des->p_data;
q7_t* p_in = src->p_data;
for (int c = 0; c < src->dim[2]; c++)
{
for (int h = 0; h < src->dim[0]; h++)
{
for (int w = 0; w < src->dim[1]; w++)
{
*p_out = p_in[(h * src->dim[1] + w) * src->dim[2] + c];
p_out++;
}
}
}
}
// only support 3d tensor
// change format from CHW to HWC
void tensor_chw2hwc_q7(nnom_tensor_t* des, nnom_tensor_t* src)
{
q7_t* p_out = des->p_data;
q7_t* p_in = src->p_data;
int im_size;
int h_step;
im_size = src->dim[0] * src->dim[1]; // H*W
for (int h = 0; h < src->dim[0]; h++)
{
h_step = src->dim[1] * h;
for (int w = 0; w < src->dim[1]; w++)
{
for (int c = 0; c < src->dim[2]; c++)
{
*p_out = p_in[im_size * c + h_step + w];
p_out++;
}
}
}
}
// (deprecated by tensor_hwc2chw version)
// change format from CHW to HWC
// the shape of the data, input data, output data
void hwc2chw_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out)
{
for (int c = 0; c < shape.c; c++)
{
for (int h = 0; h < shape.h; h++)
{
for (int w = 0; w < shape.w; w++)
{
*p_out = p_in[(h * shape.w + w) * shape.c + c];
p_out++;
}
}
}
}
// (deprecated)
// change format from CHW to HWC
// the shape of the data, input data, output data
void chw2hwc_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out)
{
int im_size = shape.w * shape.h;
int h_step;
for (int h = 0; h < shape.h; h++)
{
h_step = shape.w * h;
for (int w = 0; w < shape.w; w++)
{
for (int c = 0; c < shape.c; c++)
{
*p_out = p_in[im_size * c + h_step + w];
p_out++;
}
}
}
}

View File

@ -0,0 +1,417 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
*/
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_utils.h"
static nnom_predict_t *_predict_create_instance(nnom_model_t *m, size_t label_num, size_t top_k_size)
{
nnom_predict_t *pre;
// allocate memory
pre = (nnom_predict_t *)nnom_malloc(sizeof(nnom_predict_t));
if(pre == NULL)
return NULL;
pre->top_k = (uint32_t *)nnom_malloc(top_k_size * sizeof(uint32_t));
pre->confusion_mat = (uint16_t *)nnom_malloc(label_num * label_num * sizeof(uint16_t));
if(pre->top_k == NULL || pre->confusion_mat == NULL)
{
nnom_free(pre->top_k); nnom_free(pre->confusion_mat); nnom_free(pre);
return NULL;
}
nnom_memset(pre->top_k, 0, top_k_size * sizeof(uint32_t));
nnom_memset(pre->confusion_mat, 0, label_num * label_num * sizeof(uint16_t));
// config
pre->label_num = label_num;
pre->top_k_size = top_k_size;
pre->predict_count = 0;
// run
pre->model = m;
pre->t_run_total = 0; // model running time in total
pre->t_predict_start = 0; // when it is initial
pre->t_predict_total = 0; // total time of the whole test
return pre;
}
static void _predict_delete_instance(nnom_predict_t *pre)
{
if(pre == NULL)
return;
nnom_free(pre->top_k);
nnom_free(pre->confusion_mat);
nnom_free(pre);
}
// create a prediction
// input model, the buf pointer to the softwmax output (Temporary, this can be extract from model)
// the size of softmax output (the num of lable)
// the top k that wants to record.
nnom_predict_t *prediction_create(nnom_model_t *m, int8_t *buf_prediction, size_t label_num, size_t top_k_size)
{
nnom_predict_t *pre = _predict_create_instance(m, label_num, top_k_size);
if (!pre)
return NULL;
if (!m)
{
_predict_delete_instance(pre);
return NULL;
}
// set the output buffer of model to the prediction instance
pre->buf_prediction = buf_prediction;
// mark start time.
pre->t_predict_start = nnom_ms_get();
return pre;
}
// after a new data is set in input
// feed data to prediction
// input the current label, (range from 0 to total number of label -1)
// (the current input data should be set by user manully to the input buffer of the model.)
nnom_status_t prediction_run(nnom_predict_t *pre, uint32_t true_label, uint32_t*predict_label, float* prob)
{
int max_val;
int max_index;
uint32_t true_ranking = 0;
uint32_t start;
uint32_t sum = 0;
if (!pre)
return NN_ARGUMENT_ERROR;
// now run model
start = nnom_ms_get();
model_run(pre->model);
pre->t_run_total += nnom_ms_get() - start;
// only draw matrix and top k when number of label > 1
if (pre->label_num > 1)
{
// find how many prediction is bigger than the ground true.
// Raning rules, same as tensorflow. however, predictions in MCU is more frequencly to have equal probability since it is using fixed-point.
// if ranking is 1, 2, =2(true), 4, 5, 6. the result will be top 3.
// if ranking is 1, 2(true), =2, 4, 5, 6. the result will be top 2.
// find the ranking of the prediced label.
for (uint32_t j = 0; j < pre->label_num; j++)
{
if (j == true_label)
continue;
if (pre->buf_prediction[true_label] < pre->buf_prediction[j])
true_ranking++;
// while value[label] = value[j]. only when label > j, label is the second of j
else if (pre->buf_prediction[true_label] == pre->buf_prediction[j] && j < true_label)
true_ranking++;
}
if (true_ranking < pre->top_k_size)
pre->top_k[true_ranking]++;
// Find top 1 and return the current prediction.
// If there are several maximum prediction, return the first one.
max_val = pre->buf_prediction[0];
max_index = 0;
for (uint32_t j = 1; j < pre->label_num; j++)
{
if (pre->buf_prediction[j] > max_val)
{
max_val = pre->buf_prediction[j];
max_index = j;
}
sum += pre->buf_prediction[j];
}
// result
if (max_val != 0)
*prob = (float)max_val / 127.f;
else
*prob = 0;
*predict_label = max_index;
// fill confusion matrix
pre->confusion_mat[true_label * pre->label_num + max_index] += 1;
}
// only one neural as output.
else
{
*prob = (float)pre->buf_prediction[0] / 127.f;
if (*prob >= 0.5f)
*predict_label = 1;
else
*predict_label = 0;
}
// prediction count
pre->predict_count++;
// return the prediction
return NN_SUCCESS;
}
void prediction_end(nnom_predict_t *pre)
{
if (!pre)
return;
pre->t_predict_total = nnom_ms_get() - pre->t_predict_start;
}
void prediction_delete(nnom_predict_t *pre)
{
_predict_delete_instance(pre);
}
void prediction_matrix(nnom_predict_t *pre)
{
if (!pre)
return;
// print titles
NNOM_LOG("\nConfusion matrix:\n");
NNOM_LOG("predict");
for (int i = 0; i < pre->label_num; i++)
{
NNOM_LOG("%6d", i);
}
NNOM_LOG("\n");
NNOM_LOG("actual\n");
// print the matrix
for (int i = 0; i < pre->label_num; i++)
{
uint32_t row_total = 0;
NNOM_LOG(" %3d | ", i);
for (int j = 0; j < pre->label_num; j++)
{
row_total += pre->confusion_mat[i * pre->label_num + j];
NNOM_LOG("%6d", pre->confusion_mat[i * pre->label_num + j]);
}
NNOM_LOG(" |%4d%%\n", pre->confusion_mat[i * pre->label_num + i] * 100 / row_total);
row_total = 0;
}
NNOM_LOG("\n");
}
// top-k
void prediction_top_k(nnom_predict_t *pre)
{
uint32_t top = 0;
if (!pre)
return;
for (int i = 0; i < pre->top_k_size; i++)
{
top += pre->top_k[i];
if (top != pre->predict_count)
NNOM_LOG("Top %d Accuracy: %d.%02d%% \n", i + 1, (top * 100) / pre->predict_count,
((top * 100 * 100) / pre->predict_count)%100);
else
NNOM_LOG("Top %d Accuracy: 100%% \n", i + 1);
}
}
// this function is to print sumarry
void prediction_summary(nnom_predict_t *pre)
{
if (!pre)
return;
// sumamry
NNOM_LOG("\nPrediction summary:\n");
NNOM_LOG("Test frames: %d\n", pre->predict_count);
NNOM_LOG("Test running time: %d sec\n", pre->t_predict_total / 1000);
NNOM_LOG("Model running time: %d ms\n", pre->t_run_total);
if(pre->predict_count !=0)
NNOM_LOG("Average prediction time: %d us\n", (pre->t_run_total * 1000) / pre->predict_count);
if(pre->t_run_total != 0)
NNOM_LOG("Average effeciency: %d.%02d ops/us\n", (int)(((uint64_t)pre->model->total_ops * pre->predict_count) / (pre->t_run_total * 1000)),
(int)(((uint64_t)pre->model->total_ops * pre->predict_count)*100 / (pre->t_run_total * 1000))%100);
if(pre->t_run_total !=0 && pre->predict_count !=0)
NNOM_LOG("Average frame rate: %d.%d Hz\n", 1000 / (pre->t_run_total / pre->predict_count),
(1000*10 / (pre->t_run_total / pre->predict_count))%10);
// only valid for multiple labels
if(pre->label_num > 1)
{
// print top-k
prediction_top_k(pre);
// print confusion matrix
prediction_matrix(pre);
}
}
// stand alone prediction API
// this api test one set of data, return the prediction
nnom_status_t nnom_predict(nnom_model_t *m, uint32_t *label, float *prob)
{
int32_t max_val, max_index, sum;
int8_t *output;
if (!m)
return NN_ARGUMENT_ERROR;
model_run(m);
// get the output memory
output = m->tail->out->tensor->p_data;
// multiple neural output
if (tensor_size(m->tail->out->tensor) > 1)
{
// Top 1
max_val = output[0];
max_index = 0;
sum = max_val;
for (uint32_t i = 1; i < tensor_size(m->tail->out->tensor); i++)
{
if (output[i] > max_val)
{
max_val = output[i];
max_index = i;
}
sum += output[i];
}
// send results
*label = max_index;
if(max_val !=0)
*prob = (float)max_val/127.f;
else
*prob = 0;
}
// single neural output
else
{
*prob = (float)output[0] / 127.f;
if (*prob >= 0.5f)
*label = 1;
else
*label = 0;
}
return NN_SUCCESS;
}
static void layer_stat(nnom_layer_t *layer)
{
// layer stat
if(layer->type != NNOM_RNN)
NNOM_LOG("%-10s - ", default_layer_names[layer->type]);
else
{
NNOM_LOG("%-3s/", default_layer_names[layer->type]);
NNOM_LOG("%-6s - ", default_cell_names[((nnom_rnn_layer_t*)layer)->cell->type]);
}
NNOM_LOG(" %8d ", layer->stat.time);
// MAC operation
if(layer->stat.macc == 0)
NNOM_LOG(" ");
else if (layer->stat.macc < 10000)
NNOM_LOG("%7d ", (uint32_t)layer->stat.macc);
else if (layer->stat.macc < 1000*1000)
NNOM_LOG("%6dk ", (uint32_t)(layer->stat.macc/1000));
else if (layer->stat.macc < 1000*1000*1000)
NNOM_LOG("%3d.%02dM ", (uint32_t)(layer->stat.macc/(1000*1000)), (uint32_t)(layer->stat.macc%(1000*1000)/(10*1000))); // xxx.xx M
else
NNOM_LOG("%3d.%02dG ", (uint32_t)(layer->stat.macc/(1000*1000*1000)), (uint32_t)(layer->stat.macc%(1000*1000*1000)/(10*1000*1000))); // xxx.xx G
// layer efficiency
if (layer->stat.macc != 0 && layer->stat.time != 0)
NNOM_LOG("%d.%02d\n", (uint32_t)(layer->stat.macc / layer->stat.time), (uint32_t)((layer->stat.macc * 100) / (layer->stat.time) % 100));
else
NNOM_LOG("\n");
}
void model_stat(nnom_model_t *m)
{
size_t total_ops = 0;
size_t total_time = 0;
nnom_layer_t *layer;
uint32_t run_num = 0;
if (!m)
return;
layer = m->head;
NNOM_LOG("\nPrint running stat..\n");
NNOM_LOG("Layer(#) - Time(us) ops(MACs) ops/us \n");
NNOM_LOG("--------------------------------------------------------\n");
while (layer)
{
run_num++;
NNOM_LOG("#%-3d", run_num);
total_ops += layer->stat.macc;
total_time += layer->stat.time;
layer_stat(layer);
if (layer->shortcut == NULL)
break;
layer = layer->shortcut;
}
NNOM_LOG("\nSummary:\n");
NNOM_LOG("Total ops (MAC): %d", (uint32_t)(total_ops));
NNOM_LOG("(%d.%02dM)\n", (uint32_t) (total_ops/(1000*1000)), (uint32_t)(total_ops%(1000*1000)/(10000)));
NNOM_LOG("Prediction time :%dus\n", (uint32_t)total_time);
if(total_time != 0)
NNOM_LOG("Efficiency %d.%02d ops/us\n",
(uint32_t)(total_ops / total_time),
(uint32_t)((total_ops * 100) / (total_time) % 100));
NNOM_LOG("Total memory:%d\n", (uint32_t)nnom_mem_stat());
}
void model_io_format(nnom_model_t *m)
{
nnom_layer_t *layer;
uint32_t run_num = 0;
if (!m)
return;
layer = m->head;
NNOM_LOG("\nPrint layer input/output..\n");
NNOM_LOG("Layer(#) - Input(Qnm) Output(Qnm) Oshape \n");
NNOM_LOG("----------------------------------------------------------\n");
while (layer)
{
run_num++;
NNOM_LOG("#%-3d", run_num);
if(layer->type != NNOM_RNN)
NNOM_LOG("%-10s - ", default_layer_names[layer->type]);
else
{
NNOM_LOG("%-3s/", default_layer_names[layer->type]);
NNOM_LOG("%-6s - ", default_cell_names[((nnom_rnn_layer_t*)layer)->cell->type]);
}
NNOM_LOG(" %2d.%2d", 7-layer->in->tensor->q_dec[0], layer->in->tensor->q_dec[0]);
NNOM_LOG(" %2d.%2d", 7-layer->out->tensor->q_dec[0], layer->out->tensor->q_dec[0]);
NNOM_LOG(" (");
for (int i = 0; i < 3; i++)
{
if (layer->out->tensor->num_dim > i)
NNOM_LOG("%4d,", layer->out->tensor->dim[i]);
else
NNOM_LOG(" ");
}
NNOM_LOG(")\n");
if (layer->shortcut == NULL)
break;
layer = layer->shortcut;
}
}

View File

@ -0,0 +1,369 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_activation.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
nnom_layer_t *Activation(nnom_activation_t *act)
{
nnom_activation_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_activation_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_activation_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_ACTIVATION;
layer->super.run = activation_run;
layer->super.build = default_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_NULL; // when a layer's io is set to NULL, both will point to same mem.
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
// set activation to layer
layer->act = act;
// set free method
layer->super.free = activation_free;
return (nnom_layer_t *)layer;
}
nnom_layer_t *ReLU(void)
{
nnom_layer_t *layer = Activation(act_relu());
if (layer == NULL)
return NULL;
// set type in layer parent
layer->type = NNOM_RELU;
return layer;
}
nnom_layer_t *LeakyReLU(float alpha)
{
nnom_layer_t *layer = Activation(act_leaky_relu(alpha));
if (layer == NULL)
return NULL;
// set type in layer parent
layer->type = NNOM_LEAKY_RELU;
return layer;
}
nnom_layer_t *AdvReLU(float alpha, float max, float threshold)
{
nnom_layer_t *layer = Activation(act_adv_relu(alpha, max, threshold));
if (layer == NULL)
return NULL;
// set type in layer parent
layer->type = NNOM_ADV_RELU;
return layer;
}
nnom_layer_t *Sigmoid(int32_t dec_bit)
{
nnom_layer_t *layer = Activation(act_sigmoid(dec_bit));
if (layer == NULL)
return NULL;
// set type in layer parent
layer->type = NNOM_SIGMOID;
return layer;
}
nnom_layer_t *TanH(int32_t dec_bit)
{
nnom_layer_t *layer = Activation(act_tanh(dec_bit));
if (layer == NULL)
return NULL;
// set type in layer parent
layer->type = NNOM_TANH;
return layer;
}
void act_delete(nnom_activation_t* act){
nnom_free(act);
}
// activation takes act instance which is created. therefore, it must be free when activation is deleted.
// this is the callback in layer->free
nnom_status_t activation_free(nnom_layer_t *layer)
{
if(layer)
act_delete(((nnom_activation_layer_t *)layer)->act);
return NN_SUCCESS;
}
nnom_status_t activation_run(nnom_layer_t *layer)
{
nnom_activation_layer_t *cl = (nnom_activation_layer_t *)layer;
return act_tensor_run(cl->act, layer->in->tensor);
}
// porting
static nnom_status_t relu_run(nnom_activation_t* act)
{
if(act->tensor->bitwidth == 16)
{
#ifdef NNOM_USING_CMSIS_NN
arm_relu_q15(act->tensor->p_data, tensor_size(act->tensor));
#else
local_relu_q15(act->tensor->p_data, tensor_size(act->tensor));
#endif
}
else
{
#ifdef NNOM_USING_CMSIS_NN
arm_relu_q7(act->tensor->p_data, tensor_size(act->tensor));
#else
local_relu_q7(act->tensor->p_data, tensor_size(act->tensor));
#endif
}
return NN_SUCCESS;
}
// leaky relu
static nnom_status_t leaky_relu_run(nnom_activation_t* act)
{
nnom_activation_leaky_relu_t* a = (nnom_activation_leaky_relu_t*) act;
if(act->tensor->bitwidth == 16)
local_leaky_relu_q15(act->tensor->p_data, a->alpha, tensor_size(act->tensor));
else
local_leaky_relu_q7(act->tensor->p_data, a->alpha, tensor_size(act->tensor));
return NN_SUCCESS;
}
// advance relu
static nnom_status_t adv_relu_run(nnom_activation_t* act)
{
nnom_activation_adv_relu_t* a = (nnom_activation_adv_relu_t*) act;
// we need to convert float to fixpoint in runtime where we can know the tensor's q format
if(act->tensor->bitwidth == 16)
{
q15_t max = 32767;
q15_t threshold = MIN(a->threshold * (1 << (15 - act->tensor->q_dec[0])), 32767);
q7_t max_scale = (1 << (15 - act->tensor->q_dec[0]));
if(a->max != INFINITY && a->max != 0x7fc00000)
if(a->max * max_scale < max)
max = a->max * max_scale;
local_adv_relu_q15(act->tensor->p_data, a->negative_slope, max, threshold, tensor_size(act->tensor));
}
// 8bit
else
{
q7_t max = 127;
q7_t threshold = MIN(a->threshold * (1 << (7 - act->tensor->q_dec[0])), 127);
q7_t max_scale = (1 << (7 - act->tensor->q_dec[0]));
if(a->max != INFINITY && a->max != 0x7fc00000) // QNAN 0x7fc00000 also represent infinity in script 0.4.1
if(a->max * max_scale < max)
max = a->max * max_scale;
local_adv_relu_q7(act->tensor->p_data, a->negative_slope, max, threshold, tensor_size(act->tensor));
}
return NN_SUCCESS;
}
static nnom_status_t tanh_run(nnom_activation_t* act)
{
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
// 16 bit
if(act->tensor->bitwidth == 16)
{
uint8_t int_bit = 15 - a->dec_bit;
#ifdef NNOM_USING_CMSIS_NN
arm_nn_activations_direct_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_TANH);
#else
local_tanh_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit);
#endif
}
else // 8bit
{
uint8_t int_bit = 7 - a->dec_bit;
// arm version cannot handle int_bit > 3
#ifdef NNOM_USING_CMSIS_NN
if(act->tensor->q_dec[0] <= 3)
arm_nn_activations_direct_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_TANH);
else
#endif
local_tanh_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit);
}
return NN_SUCCESS;
}
static nnom_status_t sigmoid_run( nnom_activation_t* act)
{
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
// 16 bit
if(act->tensor->bitwidth == 16)
{
uint8_t int_bit = 15 - a->dec_bit;
#ifdef NNOM_USING_CMSIS_NN
arm_nn_activations_direct_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_SIGMOID);
#else
local_sigmoid_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit);
#endif
}
else // 8bit
{
uint8_t int_bit = 7 - a->dec_bit;
// arm version cannot handle int_bit > 3
#ifdef NNOM_USING_CMSIS_NN
if(act->tensor->q_dec[0] <= 3)
arm_nn_activations_direct_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_TANH);
else
#endif
local_sigmoid_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit);
}
return NN_SUCCESS;
}
static nnom_status_t hard_tanh_run( nnom_activation_t* act)
{
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
if(act->tensor->bitwidth == 16)
local_hard_tanh_q15(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit + 8); // a->dec is based on 8 bit.
else
local_hard_tanh_q7(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit);
return NN_SUCCESS;
}
static nnom_status_t hard_sigmoid_run( nnom_activation_t* act)
{
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
if(act->tensor->bitwidth == 16)
local_hard_sigmoid_q15(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit + 8); // a->dec is based on 8 bit.
else
local_hard_sigmoid_q7(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit);
return NN_SUCCESS;
}
//
nnom_activation_t* act_relu(void)
{
nnom_activation_t* act = nnom_mem(sizeof(nnom_activation_t));
act->run = relu_run;
act->type = ACT_RELU;
return act;
}
nnom_activation_t* act_leaky_relu(float alpha)
{
nnom_activation_leaky_relu_t* act = nnom_mem(sizeof(nnom_activation_leaky_relu_t));
act->super.run = leaky_relu_run;
act->super.type = ACT_LEAKY_RELU;
act->alpha = (q7_t)(alpha*128);
return (nnom_activation_t* )act;
}
nnom_activation_t* act_adv_relu(float negative_slope, float max, float threshold)
{
nnom_activation_adv_relu_t* act = nnom_mem(sizeof(nnom_activation_adv_relu_t));
act->super.run = adv_relu_run;
act->super.type = ACT_ADV_RELU;
act->negative_slope = (q7_t)(negative_slope*128);
act->max = max;
act->threshold = threshold;
return (nnom_activation_t* )act;
}
nnom_activation_t* act_tanh(int32_t dec_bit)
{
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
act->super.run = tanh_run;
act->super.type = ACT_TANH;
act->dec_bit = dec_bit;
return (nnom_activation_t*)act;
}
nnom_activation_t* act_sigmoid(int32_t dec_bit)
{
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
act->super.run = sigmoid_run;
act->super.type = ACT_SIGMOID;
act->dec_bit = dec_bit;
return (nnom_activation_t*)act;
}
nnom_activation_t* act_hard_tanh(int32_t dec_bit)
{
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
act->super.run = hard_tanh_run;
act->super.type = ACT_HARD_TANH;
act->dec_bit = dec_bit;
return (nnom_activation_t*)act;
}
nnom_activation_t* act_hard_sigmoid(int32_t dec_bit)
{
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
act->super.run = hard_sigmoid_run;
act->super.type = ACT_HARD_SIGMOID;
act->dec_bit = dec_bit;
return (nnom_activation_t*)act;
}
// return the decimal bit if the activation will change the q format of the layer.
int32_t act_get_dec_bit(nnom_activation_type_t type, int32_t dec_bit)
{
switch(type)
{
case ACT_RELU:
case ACT_LEAKY_RELU:
case ACT_ADV_RELU:
break;
case ACT_TANH:
case ACT_HARD_TANH:
case ACT_SIGMOID:
case ACT_HARD_SIGMOID:
dec_bit = 7;
default:break;
}
return dec_bit;
}
// a direct api to run activate a tensor
nnom_status_t act_tensor_run(nnom_activation_t* act, nnom_tensor_t* tensor)
{
act->tensor = tensor;
return act->run(act);
}

View File

@ -0,0 +1,167 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_avgpool.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
nnom_layer_t *avgpool_s(const nnom_pool_config_t * config)
{
nnom_avgpool_layer_t *cl;
if(config->num_dim == 1)
{
cl = (nnom_avgpool_layer_t *)AvgPool(kernel(1, config->kernel_size[0]),
stride(1, config->stride_size[0]),
config->padding_type);
}
else
{
cl = (nnom_avgpool_layer_t *)AvgPool(kernel(config->kernel_size[0], config->kernel_size[1]),
stride(config->stride_size[0], config->stride_size[1]),
config->padding_type);
}
if(cl)
{
cl->super.config = (void*) config;
cl->output_shift = config->output_shift; // no idea if we need it
}
return (nnom_layer_t *)cl;
}
nnom_layer_t *AvgPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type)
{
nnom_layer_t *layer = MaxPool(k, s, pad_type);
if (layer != NULL)
{
layer->type = NNOM_AVGPOOL;
layer->run = avgpool_run;
layer->build = avgpool_build;
}
return (nnom_layer_t *)layer;
}
nnom_status_t avgpool_build(nnom_layer_t *layer)
{
uint32_t size;
// avg pooling share the same output shape, stride, padding setting.
maxpool_build(layer);
#ifdef NNOM_USING_CMSIS_NN
// however, avg pooling require a computational buffer.
// bufferA size: 2*dim_im_out*ch_im_in
size = layer->out->tensor->dim[1] > layer->out->tensor->dim[0] ?
layer->out->tensor->dim[1] : layer->out->tensor->dim[0];
layer->comp->size = 2 * size * layer->in->tensor->dim[2];
#endif
return NN_SUCCESS;
}
nnom_status_t avgpool_run(nnom_layer_t *layer)
{
nnom_avgpool_layer_t *cl = (nnom_avgpool_layer_t *)(layer);
uint16_t out_x, out_y;
// if global pooling
if(layer->out->tensor->num_dim == 1)
{
out_x = 1; out_y = 1;
}
else // normal pooling.
{
out_x = layer->out->tensor->dim[1]; //W
out_y = layer->out->tensor->dim[0]; //h
}
// 16 bit
if(layer->in->tensor->bitwidth == 16)
{
#ifdef NNOM_USING_CHW
local_avepool_q15_CHW(layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
cl->output_shift,
NULL,
layer->out->tensor->p_data);
#else
local_avepool_q15_HWC(layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
cl->output_shift,
NULL,
layer->out->tensor->p_data);
#endif
}
// 8bit
else{
#ifdef NNOM_USING_CHW
local_avepool_q7_CHW(layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
cl->output_shift,
NULL,
layer->out->tensor->p_data);
#else //end of CHW
#ifdef NNOM_USING_CMSIS_NN
// 2D, square
if (layer->in->tensor->dim[1] == layer->in->tensor->dim[0] &&
layer->out->tensor->dim[1] == layer->out->tensor->dim[0] &&
cl->output_shift == 0)
{
arm_avepool_q7_HWC(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->kernel.w, cl->pad.w, cl->stride.w,
layer->out->tensor->dim[1],
layer->comp->mem->blk,
layer->out->tensor->p_data);
}
// none square 2D, or 1D
else
#endif
{
// CMSIS-NN does not support none-square pooling, we have to use local implementation
local_avepool_q7_HWC(layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
cl->output_shift,
NULL,
layer->out->tensor->p_data);
}
#endif
}
return NN_SUCCESS;
}

View File

@ -0,0 +1,90 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_baselayer.h"
// this layer copys the input to the output
nnom_layer_t *baselayer_s(const nnom_layer_config_t * config)
{
nnom_layer_t *layer = BaseLayer();
if(layer)
layer->config = (void*) config;
return layer;
}
nnom_layer_t *BaseLayer()
{
nnom_io_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_io_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_io_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_BASE;
layer->super.run = default_run;
layer->super.build = default_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_NULL;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
return (nnom_layer_t *)layer;
}
// this is call while output shape is not defined.
// this will set the output shape same as input shape, and it set only the primary IO
// this cannot be used as first layer, of course...
nnom_status_t default_build(nnom_layer_t *layer)
{
// get the last layer's output as input shape
layer->in->tensor = layer->in->hook.io->tensor;
// output tensor
// 1. allocate a new tensor for output
// 2. set the same dim, qfmt to the new tensor.
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR,layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// now this build has passed the input tensors (shapes, formats) to the new tensors.
return NN_SUCCESS;
}
// simply copy input to output
nnom_status_t default_run(nnom_layer_t *layer)
{
if(layer->out->type != NNOM_TENSOR_BUF_NULL)
{
nnom_memcpy(layer->out->tensor->p_data, layer->in->tensor->p_data, tensor_size_byte(layer->in->tensor));
}
return NN_SUCCESS;
}

View File

@ -0,0 +1,223 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_concat.h"
nnom_layer_t *concat_s(const nnom_concat_config_t *config)
{
nnom_layer_t* layer = Concat(config->axis);
if(layer)
layer->config = (void*) config;
return layer;
}
// concate method
// concate requires more than one input module. aux input will be allocated in model.merge()
nnom_layer_t *Concat(int8_t axis)
{
nnom_concat_layer_t *layer;
nnom_layer_io_t *in, *out;
size_t mem_size;
// apply a block memory for all the sub handles.
mem_size = sizeof(nnom_concat_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_concat_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_CONCAT;
layer->super.run = concat_run;
layer->super.build = concat_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
// axis
layer->axis = axis;
return (nnom_layer_t *)layer;
}
nnom_status_t concat_build(nnom_layer_t *layer)
{
nnom_concat_layer_t *cl = (nnom_concat_layer_t *)layer;
nnom_layer_io_t *in;
uint32_t in_num = 0;
int32_t num_dim;
// for each input module, copy the shape from the output of last layer
in = layer->in;
while (in != NULL)
{
//get the last layer's output as input shape
in->tensor = in->hook.io->tensor;
in = in->aux;
in_num++;
}
// allocate new tensor for output, keep the same dimension lenght
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// convert the axis.
if (cl->axis < 0)
cl->axis = (layer->in->tensor->num_dim + cl->axis);
else if (cl->axis >0)
cl->axis = cl->axis -1; // keras use axis start from 1. we are using 0, 1, 2 (check?)
// find out the concated axis
num_dim = layer->in->tensor->num_dim;
for (uint32_t i = 0; i < num_dim; i ++)
{
// exclue the concat axies
if (i == cl->axis)
{
layer->out->tensor->dim[i] = 0;
// add the same axis from all input up.
in = layer->in;
while (in != NULL)
{
layer->out->tensor->dim[i] += in->tensor->dim[i];
in = in->aux;
}
continue;
}
// check others, all other must be same shape
in = layer->in;
while (in != NULL && in->aux != NULL)
{
if (in->tensor->dim[i] != in->aux->tensor->dim[i])
return NN_ARGUMENT_ERROR;
in = in->aux;
}
// now set other axis
layer->out->tensor->dim[i] = layer->in->tensor->dim[i];
}
return NN_SUCCESS;
}
#ifdef NNOM_USING_CHW
// axis index converter between HWC and CHW
static inline int chw_i(int hwc, int num_dim)
{
num_dim = num_dim -1;
hwc = hwc + 1;
if(hwc>num_dim)
hwc = 0;
return hwc;
}
static inline int hwc_i(int chw, int num_dim)
{
num_dim = num_dim -1;
chw = chw - 1;
if(chw<num_dim)
chw = num_dim;
return chw;
}
#endif
nnom_status_t concat_run(nnom_layer_t *layer)
{
// by default, concat layer has mutiple (>=2) input and 1 output.
nnom_concat_layer_t *cl = (nnom_concat_layer_t *)layer;
nnom_layer_io_t *in;
uint32_t dwidth = layer->in->tensor->bitwidth/8; // data width in byte
#ifdef NNOM_USING_CHW
// Concatenate for HWC
uint8_t *pin;
uint8_t *pout = layer->out->tensor->p_data;
uint32_t block_size;
uint32_t n_block;
uint8_t num_dim = layer->in->tensor->num_dim;
// calcualte number of block to concat. the other shapes before the concat axis
n_block = 1;
for(int i= 0; i< chw_i(cl->axis, num_dim); i++)
{
n_block *= layer->in->tensor->dim[hwc_i(i, num_dim)];
}
// concat all input layers
for(int i=0; i<n_block; i++)
{
in = layer->in;
while (in != NULL)
{
// the block size of concat data in this layer
block_size = dwidth;
for(int j= num_dim-1; j >= chw_i(cl->axis, num_dim); j--)
block_size *= in->tensor->dim[hwc_i(j, num_dim)];
// concat
pin = (uint8_t *)in->tensor->p_data + i * block_size;
nnom_memcpy(pout, pin, block_size);
pout += block_size;
in = in->aux;
}
}
#else // end of CHW concate
// Concatenate for HWC
uint8_t* pin;
uint8_t* pout = layer->out->tensor->p_data;
uint32_t block_size;
uint32_t n_block;
uint8_t num_dim = layer->in->tensor->num_dim;
// calcualte the number of block to concat. (the other shapes before the concat axis)
n_block = 1;
for (int i = 0; i < cl->axis; i++)
n_block *= layer->in->tensor->dim[i];
// concat all input layers
for (int i = 0; i < n_block; i++)
{
in = layer->in;
while (in != NULL)
{
// the block size of concat data in this layer
block_size = dwidth;
for (int j = cl->axis; j < num_dim; j++)
block_size *= in->tensor->dim[j];
// concat
pin = (uint8_t*)in->tensor->p_data + i * block_size;
nnom_memcpy(pout, pin, block_size);
pout += block_size;
in = in->aux;
}
}
#endif
return NN_SUCCESS;
}

View File

@ -0,0 +1,434 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_conv2d.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
// a machine friendly api, with suffix _s for structured configuration.
nnom_layer_t *conv2d_s(const nnom_conv2d_config_t *config)
{
nnom_conv2d_layer_t *layer;
nnom_buf_t *comp;
nnom_layer_io_t *in, *out;
size_t mem_size;
// allocate a block memory for all the sub handles and shifts.
mem_size = sizeof(nnom_conv2d_layer_t) + sizeof(nnom_layer_io_t) * 2 + sizeof(nnom_buf_t);
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_conv2d_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_CONV_2D;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
comp->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
#ifdef NNOM_USING_CMSIS_NN
layer->super.comp = comp;
#endif
// set run method & output shape
layer->super.run = conv2d_run;
layer->super.build = conv2d_build;
layer->super.free = conv2d_free;
// save the config
layer->super.config = (void*) config;
// get the private parameters
// test: for 1d input, expend h = 1
if(config->weight->num_dim == 3)
{
layer->kernel = kernel(1, config->kernel_size[0]);
layer->stride = stride(1, config->stride_size[0]);
layer->dilation = dilation(1, config->dilation_size[0]);
}
else
{
layer->kernel = kernel(config->kernel_size[0], config->kernel_size[1]);
layer->stride = stride(config->stride_size[0], config->stride_size[1]);
layer->dilation = dilation(config->dilation_size[0], config->dilation_size[1]);
}
layer->filter_mult = config->filter_size; // for convs, this means filter number
layer->padding_type = config->padding_type;
// get bias and weight tensor, this should be created by script.
layer->weight = config->weight;
layer->bias = config->bias;
// get shifts
layer->output_rshift = (nnom_qformat_param_t *)config->output_shift;
layer->bias_lshift = (nnom_qformat_param_t *)config->bias_shift;
// padding
if (layer->padding_type == PADDING_SAME)
{
layer->pad.h = layer->dilation.h * (layer->kernel.h - 1) / 2;
layer->pad.w = layer->dilation.w * (layer->kernel.w - 1) / 2;
layer->pad.c = (1 - 1) / 2;
}
return (nnom_layer_t *)layer;
}
// Conv2D
// multiplier of (output/input channel),
// shape of kernal, shape of strides, weight struct, bias struct
nnom_layer_t *Conv2D(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
const nnom_weight_t *w, const nnom_bias_t *b)
{
nnom_conv2d_layer_t *layer;
nnom_buf_t *comp;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_conv2d_layer_t) + sizeof(nnom_layer_io_t) * 2 + sizeof(nnom_buf_t);
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_conv2d_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_CONV_2D;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
comp->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
#ifdef NNOM_USING_CMSIS_NN
layer->super.comp = comp;
#endif
// set run method & output shape
layer->super.run = conv2d_run;
layer->super.build = conv2d_build;
// get the private parameters
layer->kernel = k;
layer->stride = s;
layer->dilation = d;
layer->filter_mult = filters; // for convs, this means filter number
layer->padding_type = pad_type;
// create weight and bias tensor
layer->weight = new_tensor(NNOM_QTYPE_PER_TENSOR, 4, filters);
layer->bias = new_tensor(NNOM_QTYPE_PER_TENSOR, 1, filters);
// configure weight tensor manually to support new tensor based backends.
// needs to be very careful
{
// config weight
nnom_shape_data_t dim[4] = {k.h, k.w, k.c, filters};
*(layer->weight->q_offset) = 0; // we have no support of offset here
*(layer->weight->q_dec) = 0; // not using it
layer->weight->p_data = (void*)w->p_value;
layer->weight->bitwidth = 8;
layer->weight->qtype = NNOM_QTYPE_PER_TENSOR;
nnom_memcpy(layer->weight->dim, dim, layer->weight->num_dim * sizeof(nnom_shape_data_t));
// config bias
dim[0] = filters;
*(layer->bias->q_offset) = 0; // we have no support of offset here
*(layer->bias->q_dec) = 0; // not using it
layer->bias->p_data = (void*) b->p_value;
layer->bias->bitwidth = 8;
layer->weight->qtype = NNOM_QTYPE_PER_TENSOR;
nnom_memcpy(layer->bias->dim, dim, layer->bias->num_dim * sizeof(nnom_shape_data_t));
// output shift and bias shift
layer->output_rshift = (nnom_qformat_param_t *)&w->shift;
layer->bias_lshift = (nnom_qformat_param_t *)&b->shift;
}
return (nnom_layer_t *)layer;
}
// keras's implementation.
// source: https://github.com/keras-team/keras/blob/7a39b6c62d43c25472b2c2476bd2a8983ae4f682/keras/utils/conv_utils.py#L85
uint32_t conv_output_length(uint32_t input_length, uint32_t filter_size, nnom_padding_t padding, uint32_t stride, uint32_t dilation)
{
if (input_length == 0)
return 0;
uint32_t dilated_filter_size = (filter_size - 1) * dilation + 1;
uint32_t output_length;
if(padding == PADDING_SAME)
output_length = input_length;
else
output_length = input_length - dilated_filter_size + 1;
return (output_length + stride - 1) / stride;
}
nnom_status_t conv2d_build(nnom_layer_t *layer)
{
nnom_conv2d_layer_t *cl = (nnom_conv2d_layer_t *)layer;
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// create new tensor for the output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, cl->filter_mult);
// copy then change later.
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// calculate the output tensor q format, only support per tensor quantise now
layer->out->tensor->q_dec[0] = layer->in->tensor->q_dec[0] + cl->weight->q_dec[0] - cl->output_rshift[0]; // need some modification for 16bit.
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// now we set up the tensor shape, always HWC format
layer->out->tensor->dim[0] = conv_output_length(layer->in->tensor->dim[0], cl->kernel.h, cl->padding_type, cl->stride.h, cl->dilation.h);
layer->out->tensor->dim[1] = conv_output_length(layer->in->tensor->dim[1], cl->kernel.w, cl->padding_type, cl->stride.w, cl->dilation.w);
layer->out->tensor->dim[2] = cl->filter_mult; // channel stays the same
// fill padding
if (cl->padding_type == PADDING_SAME)
{
cl->pad.w = cl->dilation.w * (cl->kernel.w - 1) / 2;
cl->pad.h = cl->dilation.h * (cl->kernel.h - 1) / 2;
cl->pad.c = 0;
}
#ifdef NNOM_USING_CMSIS_NN
// bufferA size: (1D shape)
// 2*ch_im_in*dim_kernel*dim_kernel
layer->comp->size = 2 * 2 * layer->in->tensor->dim[2] * cl->kernel.w * cl->kernel.h;
#endif
// computational cost: K x K x Cin x Hour x Wout x Cout
layer->stat.macc = cl->kernel.w * cl->kernel.h * layer->in->tensor->dim[2] * tensor_size(layer->out->tensor);
return NN_SUCCESS;
}
nnom_status_t conv2d_free(nnom_layer_t *layer)
{
// free weight and bias tensor when we are not initialised from structured configuration.
if(!layer->config)
{
nnom_conv2d_layer_t* cl = (nnom_conv2d_layer_t*)layer;
delete_tensor(cl->weight);
delete_tensor(cl->bias);
}
return NN_SUCCESS;
}
nnom_status_t conv2d_run(nnom_layer_t *layer)
{
nnom_conv2d_layer_t *cl = (nnom_conv2d_layer_t *)layer;
#ifdef NNOM_USING_CHW
// CHW format
if(layer->in->tensor->bitwidth == 16)
local_convolve_CHW_q15_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
else
local_convolve_CHW_q7_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
return NN_SUCCESS;
#else
// HWC format
#ifdef NNOM_USING_CMSIS_NN
// current cmsis nn does not support dilation
if(cl->dilation.w == 1 && cl->dilation.h == 1 && cl->weight->qtype == NNOM_QTYPE_PER_TENSOR)
{
// 8 bit cmsis nn
if(layer->in->tensor->bitwidth == 8)
{
//RGB
// ch_im_in = 3, w = h
if (layer->in->tensor->dim[2] == 3 && layer->in->tensor->dim[0] == layer->in->tensor->dim[1])
// squared
if((cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
return (nnom_status_t)arm_convolve_HWC_q7_RGB(
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2],
cl->kernel.w, cl->pad.w, cl->stride.w,
cl->bias->p_data, cl->bias_lshift[0],
cl->output_rshift[0], layer->out->tensor->p_data, layer->out->tensor->dim[1],
(q15_t *)(layer->comp->mem->blk), NULL);
// check if can use optimized function
// ch_im_in is multiple of 4
// ch_im_out is multiple of 2
if ((layer->in->tensor->dim[2] % 4 == 0) && (layer->out->tensor->dim[2] % 2 == 0))
{
// squared
if((layer->in->tensor->dim[0] == layer->in->tensor->dim[1])
&& (layer->out->tensor->dim[0] == layer->out->tensor->dim[1])
&& (cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
{
// 1x1 fast
if (cl->kernel.w == 1 && cl->kernel.h == 1 && cl->stride.w == 1 && cl->stride.h == 1 && cl->pad.w == 0 && cl->pad.h == 0)
return (nnom_status_t)arm_convolve_1x1_HWC_q7_fast_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
cl->bias->p_data, cl->bias_lshift[0],
cl->output_rshift[0], layer->out->tensor->p_data, layer->out->tensor->dim[1], layer->out->tensor->dim[0],
(q15_t *)(layer->comp->mem->blk), NULL);
// opt square shape
else
return (nnom_status_t)arm_convolve_HWC_q7_fast(
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2], cl->kernel.w, cl->pad.w, cl->stride.w,
cl->bias->p_data, cl->bias_lshift[0],
cl->output_rshift[0], layer->out->tensor->p_data,
layer->out->tensor->dim[1], (q15_t *)(layer->comp->mem->blk), NULL);
}
// opt none square shape
else
return (nnom_status_t)arm_convolve_HWC_q7_fast_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
cl->bias->p_data, cl->bias_lshift[0], cl->output_rshift[0],
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], (q15_t *)(layer->comp->mem->blk), NULL);
}
// none optimized
else
{
// none opt square shape
if ((layer->in->tensor->dim[0] == layer->in->tensor->dim[1] &&
layer->out->tensor->dim[0] == layer->out->tensor->dim[1]) &&
(cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
return (nnom_status_t)arm_convolve_HWC_q7_basic(
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2], cl->kernel.w, cl->pad.w, cl->stride.w,
cl->bias->p_data, cl->bias_lshift[0],
cl->output_rshift[0], layer->out->tensor->p_data,
layer->out->tensor->dim[1], (q15_t *)(layer->comp->mem->blk), NULL);
// none opt none square shape
else
return (nnom_status_t)arm_convolve_HWC_q7_basic_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
cl->bias->p_data, cl->bias_lshift[0], cl->output_rshift[0],
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], (q15_t *)(layer->comp->mem->blk), NULL);
} //end of cmsis-nn none-opt
} //end of 8 bit cmsis-nn
else if (layer->in->tensor->bitwidth == 16)
{
// fast opt
if ((layer->in->tensor->dim[2] % 2 == 0) && (layer->out->tensor->dim[2] % 2 == 0))
{
if((layer->in->tensor->dim[0] == layer->in->tensor->dim[1])
&& (layer->out->tensor->dim[0] == layer->out->tensor->dim[1])
&& (cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
return (nnom_status_t)arm_convolve_HWC_q15_fast(
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2], cl->kernel.w, cl->pad.w, cl->stride.w,
cl->bias->p_data, cl->bias_lshift[0],
cl->output_rshift[0], layer->out->tensor->p_data,
layer->out->tensor->dim[1], (q15_t *)(layer->comp->mem->blk), NULL);
else
return (nnom_status_t)arm_convolve_HWC_q15_fast_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
cl->bias->p_data, cl->bias_lshift[0], cl->output_rshift[0],
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], (q15_t *)(layer->comp->mem->blk), NULL);
}
// none opt basic
else
{
local_convolve_HWC_q7_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
return NN_SUCCESS;
}
} // end of 16 bit cmsis-nn
} // end of dilation == 1
else
#endif // NNOM_USING_CMSIS_NN
{
if(layer->in->tensor->bitwidth == 16)
local_convolve_HWC_q15_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
else
local_convolve_HWC_q7_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
return NN_SUCCESS;
}
#endif // end of CHW/HWC
return NN_SUCCESS;
}

Some files were not shown because too many files have changed in this diff Show More