forked from xuos/xiuos
Merge branch 'prepare_for_master' of https://git.trustie.net/xuos/xiuos into prepare_for_master
This commit is contained in:
commit
5ba501eae7
|
@ -1,7 +1,7 @@
|
|||
[submodule "Ubiquitous/RT_Thread/rt-thread"]
|
||||
path = Ubiquitous/RT_Thread/rt-thread
|
||||
url = https://code.gitlink.org.cn/chunyexixiaoyu/rt-thread.git
|
||||
[submodule "Ubiquitous/RT_Thread/bsp/k210/kendryte-sdk/kendryte-sdk-source"]
|
||||
[submodule "Ubiquitous/RT_Thread/aiit_board/k210/kendryte-sdk/kendryte-sdk-source"]
|
||||
path = Ubiquitous/RT_Thread/aiit_board/k210/kendryte-sdk/kendryte-sdk-source
|
||||
url = https://code.gitlink.org.cn/chunyexixiaoyu/kendryte-sdk-source.git
|
||||
[submodule "Ubiquitous/Nuttx/apps"]
|
||||
|
|
|
@ -7,7 +7,7 @@ ifeq ($(CONFIG_ADD_NUTTX_FETURES),y)
|
|||
endif
|
||||
|
||||
|
||||
ifeq ($(CONFIG_ADD_XIUOS_FETURES),y)
|
||||
ifeq ($(CONFIG_ADD_XIZI_FETURES),y)
|
||||
SRC_DIR := general_functions app_test
|
||||
|
||||
SRC_FILES := main.c framework_init.c
|
||||
|
|
|
@ -12,7 +12,7 @@ menu "test app"
|
|||
bool "Config test adc"
|
||||
default n
|
||||
if USER_TEST_ADC
|
||||
if ADD_XIUOS_FETURES
|
||||
if ADD_XIZI_FETURES
|
||||
config ADC_DEV_DRIVER
|
||||
string "Set ADC dev path"
|
||||
default "/dev/adc1_dev"
|
||||
|
@ -23,7 +23,7 @@ menu "test app"
|
|||
bool "Config test dac"
|
||||
default n
|
||||
if USER_TEST_DAC
|
||||
if ADD_XIUOS_FETURES
|
||||
if ADD_XIZI_FETURES
|
||||
config DAC_DEV_DRIVER
|
||||
string "Set DAC dev path"
|
||||
default "/dev/dac_dev"
|
||||
|
|
|
@ -31,7 +31,7 @@ void test_adc()
|
|||
|
||||
adc_fd = PrivOpen(ADC_DEV_DRIVER, O_RDWR);
|
||||
if (adc_fd < 0) {
|
||||
KPrintf("open adc fd error %d\n", adc_fd);
|
||||
printf("open adc fd error %d\n", adc_fd);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -39,7 +39,7 @@ void test_adc()
|
|||
ioctl_cfg.ioctl_driver_type = ADC_TYPE;
|
||||
ioctl_cfg.args = &adc_channel;
|
||||
if (0 != PrivIoctl(adc_fd, OPE_CFG, &ioctl_cfg)) {
|
||||
KPrintf("ioctl adc fd error %d\n", adc_fd);
|
||||
printf("ioctl adc fd error %d\n", adc_fd);
|
||||
PrivClose(adc_fd);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ Modification:
|
|||
1. support spi flash open, read and write function
|
||||
*************************************************/
|
||||
|
||||
#include <xiuos.h>
|
||||
#include <xizi.h>
|
||||
#include <device.h>
|
||||
#include <flash_spi.h>
|
||||
#include <user_api.h>
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021-05-29
|
||||
*/
|
||||
#include <transform.h>
|
||||
#include <xiuos.h>
|
||||
#include <xizi.h>
|
||||
#include "board.h"
|
||||
#include "sys_arch.h"
|
||||
#include <lwip/sockets.h>
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021-05-29
|
||||
*/
|
||||
#include <transform.h>
|
||||
#include <xiuos.h>
|
||||
#include <xizi.h>
|
||||
#include "board.h"
|
||||
#include "sys_arch.h"
|
||||
#include "lwip/udp.h"
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
* See the Mulan PSL v2 for more details.
|
||||
*/
|
||||
|
||||
#include <xiuos.h>
|
||||
#include <xizi.h>
|
||||
#include <stdio.h>
|
||||
#include <cstdlib>
|
||||
using namespace std;
|
||||
|
|
|
@ -5,7 +5,7 @@ ifeq ($(CONFIG_ADD_NUTTX_FETURES),y)
|
|||
include $(APPDIR)/Application.mk
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ADD_XIUOS_FETURES),y)
|
||||
ifeq ($(CONFIG_ADD_XIZI_FETURES),y)
|
||||
SRC_FILES := double_list.c single_list.c
|
||||
include $(KERNEL_ROOT)/compiler.mk
|
||||
endif
|
|
@ -9,7 +9,8 @@ menu "knowing app"
|
|||
source "$APP_DIR/Applications/knowing_app/iris_ml_demo/Kconfig"
|
||||
source "$APP_DIR/Applications/knowing_app/k210_fft_test/Kconfig"
|
||||
source "$APP_DIR/Applications/knowing_app/image_processing/Kconfig"
|
||||
source "$APP_DIR/Applications/knowing_app/cmsis_5_demo/Kconfig"
|
||||
source "$APP_DIR/Applications/knowing_app/cmsis_5_demo/Kconfig"
|
||||
source "$APP_DIR/Applications/knowing_app/nnom_demo/Kconfig"
|
||||
|
||||
endif
|
||||
endmenu
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
menuconfig USING_CMSIS_5_DEMOAPP
|
||||
bool "CMSIS-5 demo app"
|
||||
depends on USING_USING_CMSIS_5_NN
|
||||
depends on USING_CMSIS_5_NN
|
||||
default n
|
||||
|
||||
if USING_CMSIS_5_DEMOAPP
|
||||
|
|
|
@ -13,6 +13,6 @@ path = [
|
|||
cwd + '/demo'
|
||||
]
|
||||
|
||||
group = DefineGroup('CMSISNN-cifar10', src, depend = ['USING_CMSIS_5_DEMOAPP'], CPPPATH = path)
|
||||
group = DefineGroup('CMSISNN-cifar10', src, depend = ['USING_CMSIS_5_NN_DEMOAPP'], CPPPATH = path)
|
||||
|
||||
Return('group')
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
unsigned char mnist_model[] = {
|
||||
const unsigned char mnist_model[] = {
|
||||
0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x14, 0x00, 0x20, 0x00,
|
||||
0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x1c, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
menuconfig USING_NNOM_DEMOAPP
|
||||
bool "NNOM demo app"
|
||||
depends on USING_NNOM
|
||||
default n
|
||||
|
||||
if USING_NNOM_DEMOAPP
|
||||
|
||||
config USING_NNOM_MNIST_DEMOAPP
|
||||
bool "Using NNOM mnist demo app"
|
||||
default n
|
||||
|
||||
endif
|
||||
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
import os
|
||||
Import('RTT_ROOT')
|
||||
from building import *
|
||||
|
||||
cwd = GetCurrentDir()
|
||||
objs = []
|
||||
list = os.listdir(cwd)
|
||||
|
||||
for d in list:
|
||||
path = os.path.join(cwd, d)
|
||||
if os.path.isfile(os.path.join(path, 'SConscript')):
|
||||
objs = objs + SConscript(os.path.join(path, 'SConscript'))
|
||||
|
||||
Return('objs')
|
|
@ -0,0 +1,16 @@
|
|||
# NNoM Mnist-simple Example
|
||||
|
||||
This example is from [[NNoM](https://github.com/majianjia/nnom)/**[mnist-simple](https://github.com/majianjia/nnom/tree/master/examples/mnist-simple)**] and can be deployed on Arm CPUs and RISC-V CPUs. CMSIS-NN can be used to accelerate on Arm Cortex-M CPUs.
|
||||
|
||||
## Requirements:
|
||||
|
||||
- NNoM in Framework/knowing/nnom
|
||||
- To use CMSIS-NN backend, select in menuconfig "APP_Framework->Framework->support knowing framework->NNoM->Select NNoM Backend"
|
||||
|
||||
## To run this demo:
|
||||
|
||||
- Run demo by type the command
|
||||
|
||||
```
|
||||
mnist_nnom num
|
||||
```
|
|
@ -0,0 +1,10 @@
|
|||
import os
|
||||
from building import *
|
||||
|
||||
cwd = GetCurrentDir()
|
||||
src = Glob('*.c')
|
||||
path = [cwd]
|
||||
|
||||
group = DefineGroup('NNOM mnist application', src, depend = ['USING_NNOM_MNIST_DEMOAPP'], CPPPATH = path)
|
||||
|
||||
Return('group')
|
|
@ -0,0 +1,36 @@
|
|||
#define IMG0 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 126, 126, 126, 126, 127, 64, 56, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 81, 126, 126, 126, 126, 126, 126, 126, 126, 109, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 28, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 89, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 126, 126, 127, 126, 114, 23, 0, 31, 89, 126, 126, 126, 127, 126, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 100, 126, 126, 126, 111, 26, 0, 0, 0, 28, 116, 126, 126, 126, 126, 107, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 126, 83, 0, 0, 0, 0, 0, 37, 116, 126, 126, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 126, 37, 0, 0, 0, 0, 0, 0, 84, 126, 126, 126, 126, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 126, 126, 126, 126, 13, 0, 0, 0, 0, 0, 0, 84, 126, 126, 126, 126, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 126, 126, 126, 127, 13, 0, 0, 0, 0, 0, 0, 84, 126, 127, 126, 126, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 126, 126, 126, 126, 13, 0, 0, 0, 0, 0, 0, 61, 126, 126, 126, 126, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 126, 68, 0, 0, 0, 0, 0, 0, 30, 126, 126, 126, 126, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 126, 112, 29, 5, 0, 0, 5, 69, 112, 126, 126, 126, 126, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 110, 126, 126, 126, 126, 126, 88, 70, 70, 89, 126, 126, 126, 126, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 126, 126, 127, 126, 126, 126, 126, 127, 126, 126, 126, 126, 127, 126, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 111, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 77, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 114, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 42, 118, 126, 126, 126, 126, 126, 126, 126, 124, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 56, 56, 64, 126, 126, 126, 70, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define IMG0_LABLE 0
|
||||
|
||||
#define IMG1 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 127, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 113, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 107, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 127, 127, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 98, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 127, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 127, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 98, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 127, 127, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 117, 126, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 126, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define IMG1_LABLE 1
|
||||
|
||||
#define IMG2 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 83, 87, 87, 47, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 78, 122, 121, 113, 113, 127, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 117, 106, 64, 16, 0, 0, 67, 112, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 120, 75, 7, 0, 0, 0, 0, 0, 93, 73, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 124, 60, 0, 0, 0, 0, 0, 0, 0, 29, 120, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 111, 30, 0, 0, 0, 0, 0, 0, 0, 0, 12, 121, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 122, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 54, 121, 114, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 28, 70, 112, 127, 125, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 101, 126, 126, 126, 127, 117, 63, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 127, 127, 127, 127, 127, 127, 127, 127, 100, 53, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 40, 27, 6, 6, 6, 29, 69, 92, 117, 127, 122, 78, 34, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 45, 94, 121, 126, 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 53, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define IMG2_LABLE 2
|
||||
|
||||
#define IMG3 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 61, 120, 108, 108, 96, 37, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 127, 127, 127, 127, 127, 127, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 58, 70, 70, 115, 127, 127, 121, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 105, 127, 127, 119, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 102, 127, 127, 117, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 116, 127, 127, 71, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 127, 127, 127, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 82, 127, 127, 124, 85, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 22, 110, 127, 127, 98, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 76, 127, 127, 94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 73, 127, 124, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 110, 127, 111, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 127, 127, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 102, 127, 127, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 69, 123, 127, 120, 63, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 44, 119, 127, 127, 127, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 69, 103, 52, 70, 125, 127, 127, 100, 63, 21, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, 127, 127, 127, 127, 127, 127, 94, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 98, 127, 127, 127, 115, 88, 38, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 110, 47, 19, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define IMG3_LABLE 3
|
||||
|
||||
#define IMG4 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 103, 126, 0, 0, 0, 0, 0, 57, 126, 111, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 123, 126, 126, 63, 0, 0, 0, 0, 56, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 55, 0, 0, 0, 0, 56, 126, 126, 99, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 0, 0, 0, 0, 0, 19, 117, 126, 126, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 0, 0, 0, 0, 0, 0, 73, 126, 126, 94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 126, 126, 126, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 0, 0, 0, 0, 0, 0, 42, 126, 126, 126, 81, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 100, 126, 126, 126, 0, 0, 0, 0, 0, 0, 9, 104, 126, 126, 126, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 126, 126, 126, 126, 14, 38, 84, 84, 84, 54, 9, 90, 126, 126, 126, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 107, 44, 126, 126, 126, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 120, 126, 126, 126, 127, 126, 126, 126, 126, 127, 126, 126, 126, 126, 127, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 126, 126, 126, 126, 88, 83, 75, 53, 126, 126, 126, 126, 126, 126, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 27, 27, 27, 28, 3, 0, 0, 0, 28, 27, 27, 93, 126, 126, 116, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 126, 126, 126, 101, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 94, 126, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 127, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126, 126, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126, 126, 126, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 126, 126, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 79, 126, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define IMG4_LABLE 4
|
||||
|
||||
#define IMG5 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 9, 66, 95, 78, 83, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 12, 64, 95, 126, 126, 126, 126, 126, 123, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 92, 126, 126, 126, 126, 126, 126, 115, 96, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 36, 114, 126, 126, 126, 126, 126, 102, 67, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 126, 126, 126, 126, 100, 96, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 126, 126, 115, 69, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 94, 126, 126, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 126, 126, 126, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 82, 126, 126, 105, 54, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 124, 126, 126, 126, 126, 121, 94, 60, 61, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 98, 126, 126, 126, 126, 126, 126, 126, 127, 126, 80, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 54, 54, 54, 54, 105, 114, 119, 126, 126, 126, 126, 87, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 51, 109, 126, 126, 126, 112, 58, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 50, 102, 119, 126, 126, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 126, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 48, 117, 126, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 102, 102, 9, 20, 43, 76, 106, 126, 126, 114, 64, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 126, 126, 110, 117, 126, 126, 126, 126, 91, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 104, 126, 126, 126, 126, 126, 126, 115, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 65, 65, 123, 95, 65, 28, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define IMG5_LABLE 5
|
||||
|
||||
#define IMG6 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 56, 88, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 97, 121, 126, 103, 121, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 100, 126, 99, 27, 12, 99, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 53, 126, 121, 96, 14, 0, 0, 14, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 126, 126, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 72, 126, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 126, 121, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 123, 126, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 53, 126, 121, 96, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 126, 126, 107, 70, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 126, 127, 126, 126, 126, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 121, 126, 126, 88, 93, 126, 118, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 126, 126, 28, 3, 6, 93, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 117, 126, 118, 0, 0, 0, 84, 126, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 126, 126, 56, 0, 0, 0, 84, 126, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 126, 126, 87, 0, 0, 0, 84, 126, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 101, 126, 126, 81, 28, 9, 98, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 126, 126, 126, 116, 104, 126, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 49, 111, 126, 126, 126, 116, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126, 126, 110, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define IMG6_LABLE 6
|
||||
|
||||
#define IMG7 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 52, 122, 126, 126, 127, 126, 78, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 98, 126, 126, 126, 126, 114, 103, 126, 115, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 123, 126, 118, 105, 44, 26, 28, 123, 126, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 66, 4, 0, 0, 0, 0, 82, 126, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 126, 121, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 126, 126, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 126, 106, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 126, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 89, 126, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 126, 126, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 126, 126, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 126, 126, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78, 126, 126, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 110, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 126, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 126, 126, 126, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 126, 126, 104, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 126, 124, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 64, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define IMG7_LABLE 7
|
||||
|
||||
#define IMG8 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 107, 126, 127, 106, 66, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 81, 126, 126, 126, 126, 126, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 126, 127, 65, 31, 51, 96, 126, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 126, 86, 5, 0, 0, 35, 126, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 127, 126, 0, 0, 0, 0, 25, 126, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 71, 126, 85, 0, 0, 0, 0, 46, 126, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101, 127, 45, 0, 0, 0, 20, 107, 126, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 121, 116, 15, 0, 0, 20, 121, 126, 106, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 126, 117, 15, 0, 0, 86, 126, 127, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 126, 126, 86, 10, 91, 126, 126, 126, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91, 127, 126, 127, 126, 127, 86, 127, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 106, 126, 126, 126, 65, 5, 65, 126, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 126, 127, 25, 0, 0, 25, 126, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 126, 86, 5, 0, 0, 25, 126, 111, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 126, 0, 0, 0, 0, 25, 126, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 126, 126, 0, 0, 0, 0, 46, 126, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 127, 126, 0, 0, 5, 86, 127, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 126, 126, 20, 20, 86, 126, 106, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 117, 126, 127, 126, 127, 86, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 126, 126, 106, 45, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define IMG8_LABLE 8
|
||||
|
||||
#define IMG9 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 59, 109, 120, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 49, 116, 121, 127, 127, 127, 120, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 80, 127, 127, 114, 89, 96, 127, 127, 83, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 78, 127, 127, 70, 20, 0, 19, 117, 127, 127, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 127, 127, 82, 1, 0, 0, 0, 36, 127, 127, 110, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 127, 123, 25, 0, 0, 0, 0, 18, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 127, 120, 0, 0, 0, 0, 0, 83, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 127, 90, 0, 0, 0, 0, 0, 15, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 124, 127, 75, 0, 0, 0, 0, 0, 14, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 102, 127, 126, 103, 36, 22, 0, 0, 10, 112, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 119, 127, 127, 127, 115, 93, 93, 86, 83, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 121, 127, 127, 127, 127, 127, 127, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 55, 84, 124, 124, 127, 127, 127, 127, 127, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 76, 93, 127, 127, 127, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 75, 127, 127, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 127, 127, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 127, 127, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 127, 127, 92, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 122, 127, 127, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 127, 127, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
#define IMG9_LABLE 9
|
||||
|
||||
#define TOTAL_IMAGE 10
|
||||
|
||||
static const int8_t img[10][784] = {IMG0,IMG1,IMG2,IMG3,IMG4,IMG5,IMG6,IMG7,IMG8,IMG9};
|
||||
|
||||
static const int8_t label[10] = {IMG0_LABLE,IMG1_LABLE,IMG2_LABLE,IMG3_LABLE,IMG4_LABLE,IMG5_LABLE,IMG6_LABLE,IMG7_LABLE,IMG8_LABLE,IMG9_LABLE};
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020, Jianjia Ma
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-03-29 Jianjia Ma first implementation
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <transform.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "image.h"
|
||||
#include "weights.h"
|
||||
|
||||
nnom_model_t *model;
|
||||
|
||||
const char codeLib[] = "@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\\|()1{}[]?-_+~<>i!lI;:,\"^`'. ";
|
||||
void print_img(int8_t * buf)
|
||||
{
|
||||
for(int y = 0; y < 28; y++)
|
||||
{
|
||||
for (int x = 0; x < 28; x++)
|
||||
{
|
||||
int index = 69 / 127.0 * (127 - buf[y*28+x]);
|
||||
if(index > 69) index =69;
|
||||
if(index < 0) index = 0;
|
||||
printf("%c",codeLib[index]);
|
||||
printf("%c",codeLib[index]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Do simple test using image in "image.h" with model created previously.
|
||||
void mnist_nnom(int argc, char **argv)
|
||||
{
|
||||
model = nnom_model_create();
|
||||
|
||||
uint32_t tick, time;
|
||||
uint32_t predic_label;
|
||||
float prob;
|
||||
int32_t index = atoi(argv[1]);
|
||||
|
||||
if (index >= TOTAL_IMAGE || argc != 2)
|
||||
{
|
||||
printf("Please input image number within %d\n", TOTAL_IMAGE - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
printf("\nprediction start.. \n");
|
||||
#ifdef __RT_THREAD_H__
|
||||
tick = rt_tick_get();
|
||||
#endif
|
||||
|
||||
memcpy(nnom_input_data, (int8_t *)&img[index][0], 784);
|
||||
nnom_predict(model, &predic_label, &prob);
|
||||
|
||||
#ifdef __RT_THREAD_H__
|
||||
time = rt_tick_get() - tick;
|
||||
#endif
|
||||
// print original image to console
|
||||
print_img((int8_t *)&img[index][0]);
|
||||
|
||||
#ifdef __RT_THREAD_H__
|
||||
printf("Time: %d tick\n", time);
|
||||
#endif
|
||||
printf("Truth label: %d\n", label[index]);
|
||||
printf("Predicted label: %d\n", predic_label);
|
||||
printf("Probability: %d%%\n", (int)(prob * 100));
|
||||
}
|
||||
|
||||
#ifdef __RT_THREAD_H__
|
||||
MSH_CMD_EXPORT(mnist_nnom, nnom mnist demo and image number should be followed);
|
||||
#endif
|
|
@ -0,0 +1,166 @@
|
|||
'''
|
||||
Copyright (c) 2018-2020
|
||||
Jianjia Ma
|
||||
majianjia@live.com
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
Change Logs:
|
||||
Date Author Notes
|
||||
2019-02-12 Jianjia Ma The first version
|
||||
'''
|
||||
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import sys
|
||||
import os
|
||||
nnscript = os.path.abspath('../../../Framework/knowing/nnom/scripts')
|
||||
sys.path.append(nnscript)
|
||||
|
||||
from tensorflow.keras import *
|
||||
from tensorflow.keras.datasets import mnist
|
||||
from tensorflow.keras.layers import *
|
||||
from tensorflow.keras.models import load_model, save_model
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
from nnom import *
|
||||
|
||||
model_name = 'mnist_simple_trained_model.h5'
|
||||
|
||||
def image_to_cfile(data, label, num_of_image, file='image.h'):
|
||||
with open(file, 'w') as f:
|
||||
for i in range(num_of_image):
|
||||
selected = np.random.randint(0, 1000) # select 10 out of 1000.
|
||||
f.write('#define IMG%d {'% (i))
|
||||
np.round(data[selected]).flatten().tofile(f, sep=", ", format="%d") # convert 0~1 to 0~127
|
||||
f.write('} \n')
|
||||
f.write('#define IMG%d_LABLE'% (i))
|
||||
f.write(' %d \n \n' % label[selected])
|
||||
f.write('#define TOTAL_IMAGE %d \n \n'%(num_of_image))
|
||||
|
||||
f.write('static const int8_t img[%d][%d] = {' % (num_of_image, data[0].flatten().shape[0]))
|
||||
f.write('IMG0')
|
||||
for i in range(num_of_image -1):
|
||||
f.write(',IMG%d'%(i+1))
|
||||
f.write('};\n\n')
|
||||
|
||||
f.write('static const int8_t label[%d] = {' % (num_of_image))
|
||||
f.write('IMG0_LABLE')
|
||||
for i in range(num_of_image -1):
|
||||
f.write(',IMG%d_LABLE'%(i+1))
|
||||
f.write('};\n\n')
|
||||
|
||||
|
||||
def train(x_train, y_train, x_test, y_test, batch_size=64, epochs=100):
|
||||
inputs = Input(shape=x_train.shape[1:])
|
||||
x = Conv2D(12, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
|
||||
x = ReLU()(x)
|
||||
x = MaxPool2D((2,2),strides=(2,2), padding="same")(x)
|
||||
|
||||
x = Conv2D(24 ,kernel_size=(3,3), strides=(1,1), padding="same")(x)
|
||||
x = ReLU()(x)
|
||||
x = MaxPool2D((2,2),strides=(2,2), padding="same")(x)
|
||||
|
||||
x = Conv2D(48, kernel_size=(3,3), strides=(1,1), padding="same")(x)
|
||||
x = ReLU()(x)
|
||||
x = Dropout(0.2)(x)
|
||||
x = MaxPool2D((2,2),strides=(2,2), padding="same")(x)
|
||||
|
||||
x = Flatten()(x)
|
||||
x = Dense(96)(x)
|
||||
x = Dropout(0.2)(x)
|
||||
|
||||
x = ReLU()(x)
|
||||
x = Dense(10)(x)
|
||||
predictions = Softmax()(x)
|
||||
|
||||
model = Model(inputs=inputs, outputs=predictions)
|
||||
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='adam',
|
||||
metrics=['accuracy'])
|
||||
model.summary()
|
||||
|
||||
history = model.fit(x_train, y_train,
|
||||
batch_size=batch_size,
|
||||
epochs=epochs,
|
||||
verbose=2,
|
||||
validation_data=(x_test, y_test),
|
||||
shuffle=True)
|
||||
|
||||
# free the session to avoid nesting naming while we load the best model after.
|
||||
save_model(model, model_name)
|
||||
del model
|
||||
tf.keras.backend.clear_session()
|
||||
return history
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
epochs = 2
|
||||
num_classes = 10
|
||||
|
||||
# The data, split between train and test sets:
|
||||
(x_train, y_train_num), (x_test, y_test_num) = mnist.load_data()
|
||||
|
||||
print(x_train.shape[0], 'train samples')
|
||||
print(x_test.shape[0], 'test samples')
|
||||
|
||||
# Convert class vectors to binary class matrices.
|
||||
y_train = tf.keras.utils.to_categorical(y_train_num, num_classes)
|
||||
y_test = tf.keras.utils.to_categorical(y_test_num, num_classes)
|
||||
|
||||
# reshape to 4 d becaue we build for 4d?
|
||||
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
|
||||
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)
|
||||
print('x_train shape:', x_train.shape)
|
||||
|
||||
# quantize the range to 0~255 -> 0~1
|
||||
x_test = x_test/255
|
||||
x_train = x_train/255
|
||||
print("data range", x_test.min(), x_test.max())
|
||||
|
||||
# select a few image and write them to image.h
|
||||
image_to_cfile(x_test*127, y_test_num, 10, file='image.h')
|
||||
|
||||
# train model, save the best accuracy model
|
||||
history = train(x_train, y_train, x_test, y_test, batch_size=64, epochs=epochs)
|
||||
|
||||
# reload best model
|
||||
model = load_model(model_name)
|
||||
|
||||
# evaluate
|
||||
evaluate_model(model, x_test, y_test)
|
||||
|
||||
# save weight
|
||||
generate_model(model, np.vstack((x_train, x_test)), name="weights.h")
|
||||
|
||||
# plot
|
||||
acc = history.history['accuracy']
|
||||
val_acc = history.history['val_accuracy']
|
||||
|
||||
plt.plot(range(0, epochs), acc, color='red', label='Training acc')
|
||||
plt.plot(range(0, epochs), val_acc, color='green', label='Validation acc')
|
||||
plt.title('Training and validation accuracy')
|
||||
plt.xlabel('Epochs')
|
||||
plt.ylabel('Loss')
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
|
@ -51,7 +51,7 @@ ifeq ($(CONFIG_ADD_NUTTX_FETURES),y)
|
|||
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_ADD_XIUOS_FETURES),y)
|
||||
ifeq ($(CONFIG_ADD_XIZI_FETURES),y)
|
||||
SRC_FILES :=
|
||||
|
||||
ifeq ($(CONFIG_APPLICATION_SENSOR_HCHO_TB600B_WQ_HCHO1OS), y)
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021.12.10
|
||||
*/
|
||||
|
||||
#ifdef ADD_XIUOS_FETURES
|
||||
#ifdef ADD_XIZI_FETURES
|
||||
# include <user_api.h>
|
||||
#endif
|
||||
#include <sensor.h>
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021.04.23
|
||||
*/
|
||||
|
||||
#ifdef ADD_XIUOS_FETURES
|
||||
#ifdef ADD_XIZI_FETURES
|
||||
# include <user_api.h>
|
||||
#endif
|
||||
#include <sensor.h>
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021.12.15
|
||||
*/
|
||||
|
||||
#ifdef ADD_XIUOS_FETURES
|
||||
#ifdef ADD_XIZI_FETURES
|
||||
# include <user_api.h>
|
||||
#endif
|
||||
#include <sensor.h>
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021.04.23
|
||||
*/
|
||||
|
||||
#ifdef ADD_XIUOS_FETURES
|
||||
#ifdef ADD_XIZI_FETURES
|
||||
# include <user_api.h>
|
||||
#endif
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021.12.14
|
||||
*/
|
||||
|
||||
#ifdef ADD_XIUOS_FETURES
|
||||
#ifdef ADD_XIZI_FETURES
|
||||
# include <user_api.h>
|
||||
#endif
|
||||
#include <sensor.h>
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021.04.23
|
||||
*/
|
||||
|
||||
#ifdef ADD_XIUOS_FETURES
|
||||
#ifdef ADD_XIZI_FETURES
|
||||
# include <user_api.h>
|
||||
#endif
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021.04.23
|
||||
*/
|
||||
|
||||
#ifdef ADD_XIUOS_FETURES
|
||||
#ifdef ADD_XIZI_FETURES
|
||||
# include <user_api.h>
|
||||
#endif
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021.04.23
|
||||
*/
|
||||
|
||||
#ifdef ADD_XIUOS_FETURES
|
||||
#ifdef ADD_XIZI_FETURES
|
||||
# include <user_api.h>
|
||||
#endif
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021.04.23
|
||||
*/
|
||||
|
||||
#ifdef ADD_XIUOS_FETURES
|
||||
#ifdef ADD_XIZI_FETURES
|
||||
# include <user_api.h>
|
||||
#endif
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* @date 2021.12.15
|
||||
*/
|
||||
|
||||
#ifdef ADD_XIUOS_FETURES
|
||||
#ifdef ADD_XIZI_FETURES
|
||||
# include <user_api.h>
|
||||
#endif
|
||||
#include <sensor.h>
|
||||
|
|
|
@ -5,10 +5,10 @@ menu "Framework"
|
|||
default y
|
||||
choice
|
||||
prompt "select os features"
|
||||
default ADD_XIUOS_FETURES
|
||||
default ADD_XIZI_FETURES
|
||||
|
||||
config ADD_XIUOS_FETURES
|
||||
bool "add xiuos fetures"
|
||||
config ADD_XIZI_FETURES
|
||||
bool "add xizi fetures"
|
||||
|
||||
config ADD_NUTTX_FETURES
|
||||
bool "add nuttx fetures"
|
||||
|
|
|
@ -2,7 +2,7 @@ config ADAPTER_4G_EC200T
|
|||
string "EC200T adapter name"
|
||||
default "ec200t"
|
||||
|
||||
if ADD_XIUOS_FETURES
|
||||
if ADD_XIZI_FETURES
|
||||
config ADAPTER_EC200T_PWRKEY
|
||||
int "EC200T PWRKEY pin number"
|
||||
default "97"
|
||||
|
|
|
@ -2,7 +2,7 @@ config ADAPTER_BLUETOOTH_HC08
|
|||
string "HC08 adapter name"
|
||||
default "hc08"
|
||||
|
||||
if ADD_XIUOS_FETURES
|
||||
if ADD_XIZI_FETURES
|
||||
config ADAPTER_HC08_RECV_BUFFER_SIZE
|
||||
int "HC08 recv data buffer size"
|
||||
default "128"
|
||||
|
|
|
@ -2,7 +2,7 @@ config ADAPTER_ETHERNET_HFA21
|
|||
string "HFA21 ETHERNET adapter name"
|
||||
default "hfa21_ethernet"
|
||||
|
||||
if ADD_XIUOS_FETURES
|
||||
if ADD_XIZI_FETURES
|
||||
|
||||
config ADAPTER_HFA21_DRIVER_EXTUART
|
||||
bool "Using extra uart to support ethernet"
|
||||
|
|
|
@ -333,7 +333,7 @@ static int Hfa21EthernetConnect(struct Adapter *adapter, enum NetRoleType net_ro
|
|||
{
|
||||
int ret = 0;
|
||||
char hfa21_ethernet_cmd[128];
|
||||
char net_role_string[6] = {0};
|
||||
char net_role_string[7] = {0};
|
||||
|
||||
/*Step1 : enter AT mode*/
|
||||
Hfa21EthernetInitAtCmd(adapter->agent);
|
||||
|
|
|
@ -2,7 +2,7 @@ config ADAPTER_LORA_SX1278
|
|||
string "SX1278 adapter name"
|
||||
default "sx1278"
|
||||
|
||||
if ADD_XIUOS_FETURES
|
||||
if ADD_XIZI_FETURES
|
||||
config ADAPTER_SX1278_DRIVER
|
||||
string "SX1278 device spi driver path"
|
||||
default "/dev/spi2_lora"
|
||||
|
|
|
@ -2,7 +2,7 @@ config ADAPTER_NBIOT_BC28
|
|||
string "BC28 adapter name"
|
||||
default "bc28"
|
||||
|
||||
if ADD_XIUOS_FETURES
|
||||
if ADD_XIZI_FETURES
|
||||
config ADAPTER_BC28_RESETPIN
|
||||
int "BC28 RESET pin number"
|
||||
default "100"
|
||||
|
|
|
@ -2,7 +2,7 @@ config ADAPTER_WIFI_HFA21
|
|||
string "HFA21 WIFI adapter name"
|
||||
default "hfa21_wifi"
|
||||
|
||||
if ADD_XIUOS_FETURES
|
||||
if ADD_XIZI_FETURES
|
||||
|
||||
config ADAPTER_HFA21_DRIVER_EXTUART
|
||||
bool "Using extra uart to support wifi"
|
||||
|
|
|
@ -17,7 +17,7 @@ choice
|
|||
endchoice
|
||||
|
||||
|
||||
if ADD_XIUOS_FETURES
|
||||
if ADD_XIZI_FETURES
|
||||
|
||||
config ADAPTER_E18_DRIVER_EXTUART
|
||||
bool "Using extra uart to support zigbee"
|
||||
|
|
|
@ -17,8 +17,9 @@
|
|||
* @author AIIT XUOS Lab
|
||||
* @date 2021.12.15
|
||||
*/
|
||||
|
||||
#ifdef USING_CONTROL_PLC_OPCUA
|
||||
#include "../interoperability/opcua/open62541.h"
|
||||
#endif
|
||||
#include "plc.h"
|
||||
|
||||
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
menuconfig SUPPORT_KNOWING_FRAMEWORK
|
||||
bool "support knowing framework"
|
||||
default n
|
||||
select TRANSFORM_LAYER_ATTRIUBUTE
|
||||
|
||||
if SUPPORT_KNOWING_FRAMEWORK
|
||||
source "$APP_DIR/Framework/knowing/tensorflow-lite/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/filter/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/ota/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/image_processing/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/cmsis_5/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/kpu/Kconfig"
|
||||
endif
|
||||
menuconfig SUPPORT_KNOWING_FRAMEWORK
|
||||
bool "support knowing framework"
|
||||
default n
|
||||
select TRANSFORM_LAYER_ATTRIUBUTE
|
||||
|
||||
if SUPPORT_KNOWING_FRAMEWORK
|
||||
source "$APP_DIR/Framework/knowing/tensorflow-lite/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/filter/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/ota/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/image_processing/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/cmsis_5/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/kpu/Kconfig"
|
||||
source "$APP_DIR/Framework/knowing/nnom/Kconfig"
|
||||
endif
|
||||
|
|
|
@ -4,11 +4,11 @@ menuconfig USING_CMSIS_5
|
|||
|
||||
if USING_CMSIS_5
|
||||
|
||||
menuconfig USING_USING_CMSIS_5_NN
|
||||
menuconfig USING_CMSIS_5_NN
|
||||
bool "CMSIS-5 NN"
|
||||
default n
|
||||
|
||||
if USING_USING_CMSIS_5_NN
|
||||
if USING_CMSIS_5_NN
|
||||
|
||||
config USING_CMSIS_5_NN_ACTIVATION
|
||||
bool "CMSIS-5 NN ACTIVATION"
|
||||
|
|
|
@ -27,7 +27,8 @@
|
|||
* Target Processor: Cortex-M
|
||||
*
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include <inttypes.h>
|
||||
#include "../../../Core/Include/cmsis_gcc.h"
|
||||
#include "arm_nnsupportfunctions.h"
|
||||
|
||||
/**
|
||||
|
|
|
@ -27,7 +27,8 @@
|
|||
* Target Processor: Cortex-M
|
||||
*
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include <inttypes.h>
|
||||
#include "../../../Core/Include/cmsis_gcc.h"
|
||||
#include "arm_nnsupportfunctions.h"
|
||||
|
||||
/**
|
||||
|
|
|
@ -28,7 +28,8 @@
|
|||
* Target Processor: Cortex-M
|
||||
*
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include <inttypes.h>
|
||||
#include "../../../Core/Include/cmsis_gcc.h"
|
||||
#include "arm_nnsupportfunctions.h"
|
||||
|
||||
/**
|
||||
|
|
|
@ -8,7 +8,7 @@ CPPPATH = []
|
|||
|
||||
CPPPATH += [os.path.join(cwd, 'Core/Include')]
|
||||
|
||||
if GetDepend('USING_USING_CMSIS_5_NN'):
|
||||
if GetDepend('USING_CMSIS_5_NN'):
|
||||
CPPPATH += [os.path.join(cwd, 'DSP/Include')]
|
||||
CPPPATH += [os.path.join(cwd, 'NN/Include')]
|
||||
CPPDEFINES += ['__FPU_PRESENT=1']
|
||||
|
|
|
@ -46,8 +46,8 @@ void k210_detect(char *json_file_path)
|
|||
printf("open ov2640 fail !!");
|
||||
return;
|
||||
}
|
||||
_ioctl_set_dvp_reso set_dvp_reso = {detect_params.sensor_output_size[1], detect_params.sensor_output_size[0]};
|
||||
ioctl(g_fd, IOCTRL_CAMERA_SET_DVP_RESO, &set_dvp_reso);
|
||||
_ioctl_set_reso set_dvp_reso = {detect_params.sensor_output_size[1], detect_params.sensor_output_size[0]};
|
||||
ioctl(g_fd, IOCTRL_CAMERA_OUT_SIZE_RESO, &set_dvp_reso);
|
||||
showbuffer =
|
||||
(unsigned char *)rt_malloc_align(detect_params.sensor_output_size[0] * detect_params.sensor_output_size[1] * 2, 64);
|
||||
if (NULL == showbuffer) {
|
||||
|
@ -199,6 +199,8 @@ static void *thread_detect_entry(void *parameter)
|
|||
/* display result */
|
||||
|
||||
for (int cnt = 0; cnt < detect_info.obj_number; cnt++) {
|
||||
detect_info.obj[cnt].y1 += (detect_params.sensor_output_size[0] - detect_params.net_input_size[0])/2;
|
||||
detect_info.obj[cnt].y2 += (detect_params.sensor_output_size[0] - detect_params.net_input_size[0])/2;
|
||||
draw_edge((uint32_t *)showbuffer, &detect_info, cnt, 0xF800, (uint16_t)detect_params.sensor_output_size[1],
|
||||
(uint16_t)detect_params.sensor_output_size[0]);
|
||||
printf("%d: (%d, %d, %d, %d) cls: %s conf: %f\t", cnt, detect_info.obj[cnt].x1, detect_info.obj[cnt].y1,
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
menuconfig USING_NNOM
|
||||
bool "NNOM"
|
||||
default n
|
||||
|
||||
if USING_NNOM
|
||||
|
||||
config NNOM_USING_STATIC_MEMORY
|
||||
bool "Using static memory"
|
||||
default n
|
||||
help
|
||||
must set buf using "nnom_set_static_buf()" before creating a model.
|
||||
|
||||
config NNOM_TRUNCATE
|
||||
bool "Using NNOM Truncate"
|
||||
default n
|
||||
help
|
||||
disable: backend ops use round to the nearest int (default). enable: floor
|
||||
|
||||
choice
|
||||
prompt "Select NNOM Format"
|
||||
default NNOM_USING_HWC
|
||||
|
||||
config NNOM_USING_HWC
|
||||
bool "Using HWC Format"
|
||||
|
||||
config NNOM_USING_CHW
|
||||
bool "Using CHW Format"
|
||||
help
|
||||
CHW is incompatible with CMSIS-NN and must be used when using hardware accelerator such as KPU in K210 chip
|
||||
endchoice
|
||||
|
||||
choice
|
||||
prompt "Select NNOM Backend"
|
||||
default USING_NNOM_NORMAL
|
||||
|
||||
config NNOM_USING_LOCAL
|
||||
bool "Using NNOM local backend"
|
||||
|
||||
config NNOM_USING_CMSIS_NN
|
||||
bool "Using CMSIS-NN backend"
|
||||
select USING_CMSIS_5
|
||||
select USING_CMSIS_5_NN
|
||||
endchoice
|
||||
|
||||
endif
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
# Neural Network on Microcontroller (NNoM)
|
||||
|
||||
NNoM is a high-level inference Neural Network library specifically for microcontrollers, released under Apache License 2.0.
|
||||
|
||||
Current version is 0.4.3. More information available in [NNOM](https://github.com/majianjia/nnom).
|
||||
|
||||
## CMSIS-NN Backend
|
||||
|
||||
[CMSIS-NN/DSP](https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN) is an inference acceleration libraries for Arm Cortex-M CPUs and can be used as the backend of NNoM for high performance.
|
||||
|
||||
## Notes
|
||||
|
||||
- CHW format is incompatible with CMSIS-NN and must be used when using hardware accelerator such as KPU in K210 chip.
|
||||
- Static memory buffer must be set by using "nnom_set_static_buf()" before creating a model.
|
|
@ -0,0 +1,18 @@
|
|||
import os
|
||||
from building import *
|
||||
|
||||
cwd = GetCurrentDir()
|
||||
src = []
|
||||
CPPDEFINES = []
|
||||
CPPPATH = []
|
||||
|
||||
src += Glob('src/core/*.c')
|
||||
src += Glob('src/layers/*.c')
|
||||
src += Glob('src/backends/*.c')
|
||||
|
||||
CPPPATH+=['%s/inc'%(cwd), '%s/port'%(cwd)]
|
||||
|
||||
|
||||
group = DefineGroup('nnom', src, depend = ['USING_NNOM'], CPPPATH = CPPPATH, LOCAL_CPPDEFINES=CPPDEFINES)
|
||||
|
||||
Return('group')
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_ACTIVATION_H__
|
||||
#define __NNOM_ACTIVATION_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
|
||||
// activation layer
|
||||
typedef struct _nnom_activation_layer_t
|
||||
{
|
||||
nnom_layer_t super;
|
||||
nnom_activation_t *act;
|
||||
} nnom_activation_layer_t;
|
||||
|
||||
|
||||
// activation with fixed q format (tanh and sigmoid)
|
||||
typedef struct _nnom_activation_fixed_q_t
|
||||
{
|
||||
nnom_activation_t super;
|
||||
uint8_t dec_bit;
|
||||
} nnom_activation_fixed_q_t;
|
||||
|
||||
// leaky relu
|
||||
typedef struct _nnom_activation_leaky_relu_t
|
||||
{
|
||||
nnom_activation_t super;
|
||||
q7_t alpha; // alpha is present by q0.7 format. (-128 = -1)
|
||||
} nnom_activation_leaky_relu_t;
|
||||
|
||||
// advance relu (full ReLU)
|
||||
typedef struct _nnom_activation_adv_relu_t
|
||||
{
|
||||
nnom_activation_t super;
|
||||
q7_t negative_slope; // negative_slope is present by q0.7 format. (-128 = -1)
|
||||
float max; // cap of the max value
|
||||
float threshold; // threshold
|
||||
} nnom_activation_adv_relu_t;
|
||||
|
||||
// method
|
||||
nnom_status_t activation_run(nnom_layer_t* layer);
|
||||
nnom_status_t activation_free(nnom_layer_t *layer);
|
||||
|
||||
// activation delete
|
||||
void act_delete(nnom_activation_t* act);
|
||||
|
||||
// a direct api on tensor
|
||||
nnom_status_t act_tensor_run(nnom_activation_t* act, nnom_tensor_t* tensor);
|
||||
|
||||
|
||||
// Layer API
|
||||
nnom_layer_t *Activation(nnom_activation_t *act);
|
||||
nnom_layer_t *ReLU(void);
|
||||
nnom_layer_t *LeakyReLU(float alpha);
|
||||
nnom_layer_t *AdvReLU(float alpha, float max, float threshold);
|
||||
nnom_layer_t *Sigmoid(int32_t dec_bit);
|
||||
nnom_layer_t *TanH(int32_t dec_bit);
|
||||
|
||||
// Activation API.
|
||||
nnom_activation_t* act_relu(void);
|
||||
nnom_activation_t* act_leaky_relu(float alpha);
|
||||
nnom_activation_t* act_adv_relu(float negative_slope, float max, float threshold);
|
||||
nnom_activation_t* act_tanh(int32_t dec_bit);
|
||||
nnom_activation_t* act_sigmoid(int32_t dec_bit);
|
||||
nnom_activation_t* act_hard_tanh(int32_t dec_bit);
|
||||
nnom_activation_t* act_hard_sigmoid(int32_t dec_bit);
|
||||
|
||||
// utils
|
||||
int32_t act_get_dec_bit(nnom_activation_type_t type, int32_t dec_bit);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_ACTIVATION_H__ */
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_AVGPOOL_H__
|
||||
#define __NNOM_AVGPOOL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
#include "layers/nnom_maxpool.h"
|
||||
|
||||
// Avg Pooling
|
||||
typedef nnom_maxpool_layer_t nnom_avgpool_layer_t;
|
||||
|
||||
// method
|
||||
nnom_status_t avgpooling_build(nnom_layer_t *layer);
|
||||
nnom_status_t avgpool_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *avgpool_s(const nnom_pool_config_t * config);
|
||||
nnom_layer_t *AvgPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_AVGPOOL_H__ */
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_BASELAYER_H__
|
||||
#define __NNOM_BASELAYER_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
#include "layers/nnom_input.h"
|
||||
|
||||
// method
|
||||
nnom_status_t default_build(nnom_layer_t *layer);
|
||||
nnom_status_t default_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *baselayer_s(const nnom_layer_config_t * config);
|
||||
nnom_layer_t *BaseLayer(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_BASELAYER_H__ */
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_CONCAT_H__
|
||||
#define __NNOM_CONCAT_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
// concatenate layer
|
||||
typedef struct _nnom_concat_layer
|
||||
{
|
||||
nnom_layer_t super;
|
||||
int8_t axis;
|
||||
} nnom_concat_layer_t;
|
||||
|
||||
typedef struct _nnom_concat_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
int8_t axis;
|
||||
} nnom_concat_config_t;
|
||||
|
||||
// method
|
||||
nnom_status_t concat_build(nnom_layer_t *layer);
|
||||
nnom_status_t concat_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *concat_s(const nnom_concat_config_t *config);
|
||||
nnom_layer_t *Concat(int8_t axis);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_CONCAT_H__ */
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_CONV2D_H__
|
||||
#define __NNOM_CONV2D_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
// child layers parameters
|
||||
typedef struct _nnom_conv2d_layer_t
|
||||
{
|
||||
nnom_layer_t super;
|
||||
nnom_3d_shape_t kernel;
|
||||
nnom_3d_shape_t stride;
|
||||
nnom_3d_shape_t pad;
|
||||
nnom_3d_shape_t dilation;
|
||||
nnom_padding_t padding_type;
|
||||
uint32_t filter_mult; // filter size (for conv) or multilplier (for depthwise)
|
||||
|
||||
nnom_tensor_t *weight;
|
||||
nnom_tensor_t *bias;
|
||||
|
||||
// test
|
||||
nnom_qformat_param_t * output_rshift;
|
||||
nnom_qformat_param_t * bias_lshift;
|
||||
} nnom_conv2d_layer_t;
|
||||
|
||||
// a machine interface for configuration
|
||||
typedef struct _nnom_conv2d_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_qtype_t qtype; //quantisation type(per channel or per layer)
|
||||
nnom_tensor_t *weight;
|
||||
nnom_tensor_t *bias;
|
||||
nnom_qformat_param_t *output_shift;
|
||||
nnom_qformat_param_t *bias_shift;
|
||||
uint32_t filter_size;
|
||||
int8_t kernel_size[2];
|
||||
int8_t stride_size[2];
|
||||
int8_t padding_size[2];
|
||||
int8_t dilation_size[2];
|
||||
nnom_padding_t padding_type;
|
||||
} nnom_conv2d_config_t;
|
||||
|
||||
// method
|
||||
nnom_status_t conv2d_run(nnom_layer_t *layer);
|
||||
nnom_status_t conv2d_build(nnom_layer_t *layer);
|
||||
nnom_status_t conv2d_free(nnom_layer_t *layer);
|
||||
|
||||
// utils
|
||||
uint32_t conv_output_length(uint32_t input_length, uint32_t filter_size, nnom_padding_t padding, uint32_t stride, uint32_t dilation);
|
||||
|
||||
// API
|
||||
nnom_layer_t *conv2d_s(const nnom_conv2d_config_t *config);
|
||||
nnom_layer_t *Conv2D(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
|
||||
const nnom_weight_t *w, const nnom_bias_t *b);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_CONV2D_H__ */
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-30 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_DECONV2D_H__
|
||||
#define __NNOM_DECONV2D_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
#include "layers/nnom_conv2d.h"
|
||||
|
||||
// child layers parameters
|
||||
typedef nnom_conv2d_layer_t nnom_conv2d_trans_layer_t;
|
||||
|
||||
typedef nnom_conv2d_config_t nnom_conv2d_trans_config_t;
|
||||
|
||||
// method
|
||||
nnom_status_t conv2d_trans_run(nnom_layer_t *layer);
|
||||
nnom_status_t conv2d_trans_build(nnom_layer_t *layer);
|
||||
|
||||
// utils
|
||||
uint32_t conv_trans_output_length(uint32_t input_length, uint32_t filter_size, nnom_padding_t padding, uint32_t stride, uint32_t dilation);
|
||||
|
||||
// API
|
||||
nnom_layer_t *conv2d_trans_s(const nnom_conv2d_config_t *config);
|
||||
nnom_layer_t *Conv2DTrans(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
|
||||
const nnom_weight_t *w, const nnom_bias_t *b);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_DECONV2D_H__ */
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_CROPPING_H__
|
||||
#define __NNOM_CROPPING_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
#include "layers/nnom_zero_padding.h"
|
||||
|
||||
// Cropping, same as zeropadding
|
||||
typedef nnom_zero_padding_layer_t nnom_cropping_layer_t;
|
||||
|
||||
typedef nnom_zero_padding_config_t nnom_cropping_config_t;
|
||||
|
||||
// method
|
||||
nnom_status_t cropping_build(nnom_layer_t *layer);
|
||||
nnom_status_t cropping_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t * cropping_s(const nnom_cropping_config_t *config);
|
||||
nnom_layer_t *Cropping(nnom_border_t pad);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_CROPPING_H__ */
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_DENSE_H__
|
||||
#define __NNOM_DENSE_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
typedef struct _nnom_dense_layer_t
|
||||
{
|
||||
nnom_layer_t super;
|
||||
size_t output_unit;
|
||||
nnom_tensor_t *weight;
|
||||
nnom_tensor_t *bias;
|
||||
nnom_qformat_param_t *output_rshift;
|
||||
nnom_qformat_param_t *bias_lshift;
|
||||
} nnom_dense_layer_t;
|
||||
|
||||
// a machine interface for configuration
|
||||
typedef struct _nnom_dense_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_qtype_t qtype; //quantisation type(per channel or per layer)
|
||||
nnom_tensor_t *weight;
|
||||
nnom_tensor_t *bias;
|
||||
nnom_qformat_param_t *output_shift;
|
||||
nnom_qformat_param_t *bias_shift;
|
||||
} nnom_dense_config_t;
|
||||
|
||||
// method
|
||||
nnom_status_t dense_free(nnom_layer_t *layer);
|
||||
nnom_status_t dense_build(nnom_layer_t *layer);
|
||||
nnom_status_t dense_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *dense_s(const nnom_dense_config_t *config);
|
||||
nnom_layer_t *Dense(size_t output_unit, const nnom_weight_t *w, const nnom_bias_t *b);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_DENSE_H__ */
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_DW_CONV2D_H__
|
||||
#define __NNOM_DW_CONV2D_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
#include "layers/nnom_conv2d.h"
|
||||
|
||||
// method
|
||||
nnom_status_t dw_conv2d_build(nnom_layer_t *layer);
|
||||
nnom_status_t dw_conv2d_run(nnom_layer_t *layer);
|
||||
|
||||
//API
|
||||
nnom_layer_t *dw_conv2d_s(const nnom_conv2d_config_t *config);
|
||||
nnom_layer_t *DW_Conv2D(uint32_t multiplier, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
|
||||
const nnom_weight_t *w, const nnom_bias_t *b);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_DW_CONV2D_H__ */
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_FLATTEN_H__
|
||||
#define __NNOM_FLATTEN_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
// no special parameters but we need it.
|
||||
typedef struct _nnom_flatten_config_t{
|
||||
nnom_layer_config_t super;
|
||||
} nnom_flatten_config_t;
|
||||
|
||||
// method
|
||||
nnom_status_t flatten_build(nnom_layer_t *layer);
|
||||
nnom_status_t flatten_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *flatten_s(const nnom_flatten_config_t *config);
|
||||
nnom_layer_t *Flatten(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_FLATTEN_H__ */
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_GLOBAL_POOL_H__
|
||||
#define __NNOM_GLOBAL_POOL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
#include "layers/nnom_maxpool.h"
|
||||
|
||||
typedef struct _nnom_global_pool_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
int16_t output_shift;
|
||||
}nnom_global_pool_config_t;
|
||||
|
||||
// method
|
||||
nnom_status_t global_pool_build(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t * global_maxpool_s(const nnom_global_pool_config_t *config);
|
||||
nnom_layer_t * global_avgpool_s(const nnom_global_pool_config_t *config);
|
||||
nnom_layer_t * global_sumpool_s(const nnom_global_pool_config_t *config);
|
||||
|
||||
nnom_layer_t *GlobalMaxPool(void);
|
||||
nnom_layer_t *GlobalAvgPool(void);
|
||||
nnom_layer_t *GlobalSumPool(void);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_GLOBAL_POOL_H__ */
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-08-27 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_GRU_CELL_H__
|
||||
#define __NNOM_GRU_CELL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nnom_rnn.h"
|
||||
#include "nnom_activation.h"
|
||||
|
||||
typedef struct _nnom_gru_cell_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_tensor_t *weights;
|
||||
nnom_tensor_t* recurrent_weights;
|
||||
nnom_tensor_t *bias;
|
||||
nnom_qformat_param_t q_dec_z, q_dec_h; // z, r, h
|
||||
uint16_t units;
|
||||
} nnom_gru_cell_config_t;
|
||||
|
||||
|
||||
typedef struct _nnom_gru_cell_t
|
||||
{
|
||||
nnom_rnn_cell_t super;
|
||||
|
||||
nnom_tensor_t* weights;
|
||||
nnom_tensor_t* recurrent_weights;
|
||||
nnom_tensor_t* bias;
|
||||
|
||||
// decide later.
|
||||
// z, r, h
|
||||
nnom_qformat_param_t q_dec_z, q_dec_h;
|
||||
nnom_qformat_param_t oshift_iw, oshift_hw, bias_shift;
|
||||
|
||||
} nnom_gru_cell_t;
|
||||
|
||||
// gru
|
||||
nnom_rnn_cell_t *gru_cell_s(const nnom_gru_cell_config_t* config);
|
||||
|
||||
nnom_status_t gru_cell_free(nnom_rnn_cell_t* cell);
|
||||
nnom_status_t gru_cell_build(nnom_rnn_cell_t* cell);
|
||||
nnom_status_t gru_cell_run(nnom_rnn_cell_t* cell);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_GRU_CELL_H__ */
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_INPUT_H__
|
||||
#define __NNOM_INPUT_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
// IO layer
|
||||
typedef struct _nnom_io_layer
|
||||
{
|
||||
nnom_layer_t super;
|
||||
nnom_3d_shape_t shape;
|
||||
nnom_qformat_param_t dec_bit;
|
||||
void *buf; //input or output
|
||||
} nnom_io_layer_t;
|
||||
|
||||
typedef struct _nnom_io_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_tensor_t *tensor;
|
||||
}nnom_io_config_t;
|
||||
|
||||
// method
|
||||
nnom_status_t input_build(nnom_layer_t *layer);
|
||||
nnom_status_t input_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *input_s(const nnom_io_config_t* config);
|
||||
nnom_layer_t *Input(nnom_3d_shape_t input_shape, void *p_buf);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_INPUT_H__ */
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_LAMBDA_H__
|
||||
#define __NNOM_LAMBDA_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
#include "layers/nnom_input.h"
|
||||
|
||||
// lambda layer
|
||||
typedef struct _nnom_lambda_layer_t
|
||||
{
|
||||
nnom_layer_t super;
|
||||
void *parameters; // parameters for lambda
|
||||
} nnom_lambda_layer_t;
|
||||
|
||||
// lambda layer
|
||||
typedef struct _nnom_lambda_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_status_t (*run_func_name)(nnom_layer_t *layer); // run method. required
|
||||
nnom_status_t (*build_func_name)(nnom_layer_t *layer);// compute output buffer shape. can be left null, will call default_build()
|
||||
nnom_status_t (*free_func_name)(nnom_layer_t *layer); // a callback to free private resources (comp buf not included) can be left null
|
||||
void *parameters; // parameters for lambda
|
||||
} nnom_lambda_config_t;
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_LAMBDA_H__ */
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-08-24 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_LSTM_CELL_H__
|
||||
#define __NNOM_LSTM_CELL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nnom_rnn.h"
|
||||
#include "nnom_activation.h"
|
||||
|
||||
// a machine interface for configuration
|
||||
typedef struct _nnom_lstm_cell_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_tensor_t *weights;
|
||||
nnom_tensor_t* recurrent_weights;
|
||||
nnom_tensor_t *bias;
|
||||
nnom_qformat_param_t q_dec_z, q_dec_h, q_dec_c; // z = iw + hw, c = cell state; h=output and memory
|
||||
uint16_t units;
|
||||
} nnom_lstm_cell_config_t;
|
||||
|
||||
|
||||
typedef struct _nnom_lstm_cell_t
|
||||
{
|
||||
nnom_rnn_cell_t super;
|
||||
|
||||
nnom_tensor_t* weights;
|
||||
nnom_tensor_t* recurrent_weights;
|
||||
nnom_tensor_t* bias;
|
||||
|
||||
// experimental,
|
||||
// iw: input x weight
|
||||
// hw: hidden state x recurrent weight
|
||||
// h: hidden state (memor)
|
||||
// c: cell state
|
||||
nnom_qformat_param_t q_dec_z, q_dec_h, q_dec_c;
|
||||
nnom_qformat_param_t oshift_iw, oshift_hw, oshift_zc, bias_shift;
|
||||
|
||||
} nnom_lstm_cell_t;
|
||||
|
||||
// LSTM
|
||||
nnom_rnn_cell_t *lstm_cell_s(const nnom_lstm_cell_config_t* config);
|
||||
|
||||
nnom_status_t lstm_cell_free(nnom_rnn_cell_t* cell);
|
||||
nnom_status_t lstm_cell_q7_q15_build(nnom_rnn_cell_t* cell);
|
||||
nnom_status_t lstm_cell_q7_q15_run(nnom_rnn_cell_t* cell);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_LSTM_CELL_H__ */
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_MATRIX_H__
|
||||
#define __NNOM_MATRIX_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
// the maximum input layer hooked to this layer
|
||||
#define MAX_INPUT_LAYER 8
|
||||
|
||||
// matrix layer
|
||||
typedef struct _nnom_matrix_layer_t
|
||||
{
|
||||
nnom_layer_t super;
|
||||
int16_t oshift; // output right shift
|
||||
} nnom_matrix_layer_t;
|
||||
|
||||
typedef struct _nnom_matrix_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
int16_t output_shift; // output right shift
|
||||
} nnom_matrix_config_t;
|
||||
|
||||
// methods
|
||||
nnom_layer_t* _same_shape_matrix_layer(void);
|
||||
nnom_status_t add_run(nnom_layer_t *layer);
|
||||
nnom_status_t sub_run(nnom_layer_t *layer);
|
||||
nnom_status_t mult_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *add_s(const nnom_matrix_config_t * config);
|
||||
nnom_layer_t *sub_s(const nnom_matrix_config_t * config);
|
||||
nnom_layer_t *mult_s(const nnom_matrix_config_t * config);
|
||||
nnom_layer_t *Add(int16_t oshift);
|
||||
nnom_layer_t *Sub(int16_t oshift);
|
||||
nnom_layer_t *Mult(int16_t oshift);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_MATRIX_H__ */
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_MAXPOOL_H__
|
||||
#define __NNOM_MAXPOOL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
// Max Pooling
|
||||
typedef struct _nnom_maxpool_layer_t
|
||||
{
|
||||
nnom_layer_t super;
|
||||
nnom_3d_shape_t kernel;
|
||||
nnom_3d_shape_t stride;
|
||||
nnom_3d_shape_t pad;
|
||||
nnom_padding_t padding_type;
|
||||
int16_t output_shift; // reserve
|
||||
} nnom_maxpool_layer_t;
|
||||
|
||||
// a machine interface for configuration
|
||||
typedef struct _nnom_pool_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_padding_t padding_type;
|
||||
int16_t output_shift;
|
||||
int8_t kernel_size[2];
|
||||
int8_t stride_size[2];
|
||||
int8_t num_dim;
|
||||
} nnom_pool_config_t;
|
||||
|
||||
// method
|
||||
nnom_status_t maxpool_build(nnom_layer_t *layer);
|
||||
nnom_status_t maxpool_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *maxpool_s(const nnom_pool_config_t * config);
|
||||
nnom_layer_t *MaxPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_MATRIX_H__ */
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_OUTPUT_H__
|
||||
#define __NNOM_OUTPUT_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
#include "layers/nnom_input.h"
|
||||
|
||||
// method
|
||||
nnom_status_t output_build(nnom_layer_t *layer);
|
||||
nnom_status_t output_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *output_s(const nnom_io_config_t* config);
|
||||
nnom_layer_t *Output(nnom_3d_shape_t output_shape, void *p_buf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_OUTPUT_H__ */
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-12-07 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_RESHAPE_H__
|
||||
#define __NNOM_RESHAPE_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
typedef struct _nnom_reshape_layer_t
|
||||
{
|
||||
nnom_layer_t super;
|
||||
nnom_shape_data_t* dim;
|
||||
uint8_t num_dim;
|
||||
|
||||
} nnom_reshape_layer_t;
|
||||
|
||||
typedef struct nnom_reshape_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_shape_data_t* dim;
|
||||
uint8_t num_dim;
|
||||
} nnom_reshape_config_t;
|
||||
|
||||
// method
|
||||
nnom_status_t reshape_run(nnom_layer_t *layer);
|
||||
nnom_status_t reshape_build(nnom_layer_t *layer);
|
||||
nnom_status_t reshape_free(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *reshape_s(const nnom_reshape_config_t *config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_CONV2D_H__ */
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_RNN_H__
|
||||
#define __NNOM_RNN_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
// a machine interface for configuration
|
||||
typedef struct _nnom_rnn_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
bool return_sequence;
|
||||
bool stateful;
|
||||
bool go_backwards;
|
||||
} nnom_rnn_config_t;
|
||||
|
||||
// RNN cell base type
|
||||
typedef struct _nnom_rnn_cell_t
|
||||
{
|
||||
nnom_status_t (*run)(struct _nnom_rnn_cell_t* cell); // cell runner
|
||||
nnom_status_t (*build)(struct _nnom_rnn_cell_t* cell); // cell builder, calculate buffer size, output data size
|
||||
nnom_status_t (*free)(struct _nnom_rnn_cell_t* cell); //
|
||||
nnom_layer_t *layer; // pointer to its layer holder
|
||||
nnom_layer_config_t *config; // config for the cell event it is a layer type
|
||||
nnom_rnn_cell_type_t type;
|
||||
|
||||
void *in_data; // input data
|
||||
void *out_data; // output data
|
||||
void *in_state; // input state data (or hidden state)
|
||||
void *out_state; // output state data
|
||||
|
||||
size_t comp_buf_size; // the size of temporary buffer.
|
||||
size_t state_size; // the size of hidden state
|
||||
uint16_t units; // the output units
|
||||
uint16_t feature_size; // the input feature size (vector size)
|
||||
|
||||
size_t macc; // stat of MAC count.
|
||||
} nnom_rnn_cell_t;
|
||||
|
||||
typedef struct _nnom_rnn_layer_t
|
||||
{
|
||||
nnom_layer_t super;
|
||||
nnom_rnn_cell_t *cell;
|
||||
void *state_buf; // memory allocated to store state, size = 2 x size of state required by cell.
|
||||
|
||||
uint16_t timestamp_size;// size of timestamp
|
||||
bool return_sequence; // whether to return the output for each unit (sequence)
|
||||
bool stateful; // whether the states are kept after one inteference
|
||||
bool go_backwards; // whether go backwards timestamping
|
||||
} nnom_rnn_layer_t;
|
||||
|
||||
|
||||
// rnn layer
|
||||
nnom_layer_t *rnn_s(nnom_rnn_cell_t *cell, const nnom_rnn_config_t* config);
|
||||
|
||||
nnom_status_t rnn_run(nnom_layer_t* layer);
|
||||
nnom_status_t rnn_build(nnom_layer_t* layer);
|
||||
nnom_status_t rnn_free(nnom_layer_t* layer);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_RNN_H__ */
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-08-20 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_SIMPLE_CELL_H__
|
||||
#define __NNOM_SIMPLE_CELL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nnom_rnn.h"
|
||||
#include "nnom_activation.h"
|
||||
|
||||
|
||||
// This Simple Cell replicate the Keras's SimpleCell as blow
|
||||
/*
|
||||
def call(self, inputs, states, training=None):
|
||||
prev_output = states[0] if nest.is_sequence(states) else states
|
||||
|
||||
h = K.dot(inputs, self.kernel)
|
||||
h = K.bias_add(h, self.bias)
|
||||
|
||||
output = h + K.dot(prev_output, self.recurrent_kernel)
|
||||
output = self.activation(output)
|
||||
|
||||
new_state = [output] if nest.is_sequence(states) else output
|
||||
return output, new_state
|
||||
*/
|
||||
|
||||
// a machine interface for configuration
|
||||
typedef struct _nnom_simple_cell_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_tensor_t *weights;
|
||||
nnom_tensor_t* recurrent_weights;
|
||||
nnom_tensor_t *bias;
|
||||
nnom_qformat_param_t q_dec_iw, q_dec_hw, q_dec_h;
|
||||
nnom_activation_type_t act_type; // type of the activation
|
||||
uint16_t units;
|
||||
} nnom_simple_cell_config_t;
|
||||
|
||||
|
||||
typedef struct _nnom_simple_cell_t
|
||||
{
|
||||
nnom_rnn_cell_t super;
|
||||
nnom_activation_type_t act_type;
|
||||
|
||||
nnom_tensor_t* weights;
|
||||
nnom_tensor_t* recurrent_weights;
|
||||
nnom_tensor_t* bias;
|
||||
|
||||
// experimental,
|
||||
// iw: input x weight
|
||||
// hw: hidden state x recurrent weight
|
||||
// h: hidden state
|
||||
nnom_qformat_param_t q_dec_iw, q_dec_hw, q_dec_h;
|
||||
nnom_qformat_param_t oshift_iw, oshift_hw, bias_shift;
|
||||
|
||||
} nnom_simple_cell_t;
|
||||
|
||||
|
||||
// RNN cells
|
||||
// The shape for RNN input is (batch, timestamp, feature), where batch is always 1.
|
||||
//
|
||||
// SimpleCell
|
||||
nnom_rnn_cell_t *simple_cell_s(const nnom_simple_cell_config_t* config);
|
||||
|
||||
nnom_status_t simple_cell_free(nnom_rnn_cell_t* cell);
|
||||
nnom_status_t simple_cell_build(nnom_rnn_cell_t* cell);
|
||||
nnom_status_t simple_cell_run(nnom_rnn_cell_t* cell);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_SIMPLE_CELL_H__ */
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_SOFTMAX_H__
|
||||
#define __NNOM_SOFTMAX_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
typedef struct _nnom_softmax_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
} nnom_softmax_config_t;
|
||||
|
||||
|
||||
// method
|
||||
nnom_status_t softmax_run(nnom_layer_t *layer);
|
||||
nnom_status_t softmax_build(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *softmax_s(const nnom_softmax_config_t * config);
|
||||
nnom_layer_t *Softmax(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_SOFTMAX_H__ */
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_SUMPOOL_H__
|
||||
#define __NNOM_SUMPOOL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
#include "layers/nnom_maxpool.h"
|
||||
|
||||
// Sum Pooling
|
||||
typedef nnom_maxpool_layer_t nnom_sumpool_layer_t;
|
||||
|
||||
// method
|
||||
nnom_status_t sumpool_build(nnom_layer_t *layer);
|
||||
nnom_status_t sumpool_run(nnom_layer_t *layer);
|
||||
|
||||
// API
|
||||
nnom_layer_t *sumpool_s(const nnom_pool_config_t * config);
|
||||
nnom_layer_t *SumPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_SUMPOOL_H__ */
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_UPSAMPLE_H__
|
||||
#define __NNOM_UPSAMPLE_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
// Up Sampling layer (UnPooling)
|
||||
typedef struct _nnom_upsample_layer_t
|
||||
{
|
||||
nnom_layer_t super;
|
||||
nnom_3d_shape_t kernel;
|
||||
} nnom_upsample_layer_t;
|
||||
|
||||
typedef struct _nnom_upsample_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_shape_data_t kernel[2];
|
||||
} nnom_upsample_config_t;
|
||||
|
||||
// API
|
||||
nnom_layer_t *upsample_s(const nnom_upsample_config_t *config);
|
||||
nnom_layer_t *UpSample(nnom_3d_shape_t kernel);
|
||||
|
||||
// Methods
|
||||
nnom_status_t upsample_build(nnom_layer_t *layer);
|
||||
nnom_status_t upsample_run(nnom_layer_t *layer);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_UPSAMPLE_H__ */
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2020-05-03 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_ZERO_PADDING_H__
|
||||
#define __NNOM_ZERO_PADDING_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
typedef struct _nnom_zero_padding_config_t
|
||||
{
|
||||
nnom_layer_config_t super;
|
||||
nnom_border_t pad;
|
||||
} nnom_zero_padding_config_t;
|
||||
|
||||
// zero padding
|
||||
typedef struct _nnom_zero_padding_layer_t
|
||||
{
|
||||
nnom_layer_t super;
|
||||
nnom_border_t pad;
|
||||
} nnom_zero_padding_layer_t;
|
||||
|
||||
// API
|
||||
nnom_layer_t *zeropadding_s(const nnom_zero_padding_config_t* config);
|
||||
nnom_layer_t *ZeroPadding(nnom_border_t pad);
|
||||
|
||||
// method
|
||||
nnom_status_t zero_padding_build(nnom_layer_t *layer);
|
||||
nnom_status_t zero_padding_run(nnom_layer_t *layer);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_ZERO_PADDING_H__ */
|
|
@ -0,0 +1,415 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-02-05 Jianjia Ma The first version
|
||||
* 2019-02-10 Jianjia Ma Compiler supports dense net connection
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_H__
|
||||
#define __NNOM_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdarg.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "nnom_port.h"
|
||||
|
||||
#define NNOM_ALIGN (sizeof(char*)) // alignment when doing memory ops. Equal to size of pointer in byte.
|
||||
#define q7_t int8_t
|
||||
#define q15_t int16_t
|
||||
#define q31_t int32_t
|
||||
#define q63_t int64_t
|
||||
|
||||
/* version */
|
||||
#define NNOM_MAJORVERSION 0 /**< major version number */
|
||||
#define NNOM_SUBVERSION 4 /**< minor version number */
|
||||
#define NNOM_REVISION 3 /**< revise version number */
|
||||
#define NNOM_VERSION ((NNOM_MAJORVERSION * 10000) + (NNOM_SUBVERSION * 100) + NNOM_REVISION)
|
||||
|
||||
#ifdef ARM_NN_TRUNCATE
|
||||
#define NNOM_TRUNCATE
|
||||
#endif
|
||||
|
||||
#ifndef NNOM_TRUNCATE
|
||||
#define NNOM_ROUND(out_shift) ((0x1 << out_shift) >> 1 )
|
||||
#else
|
||||
#define NNOM_ROUND(out_shift) 0
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NN_SUCCESS = 0, /**< No error */
|
||||
NN_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */
|
||||
NN_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */
|
||||
NN_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation. */
|
||||
NN_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */
|
||||
NN_SINGULAR = -5, /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
|
||||
NN_TEST_FAILURE = -6, /**< Test Failed */
|
||||
NN_NO_MEMORY = -7,
|
||||
NN_MORE_TODO = -8
|
||||
} nnom_status_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NNOM_INVALID = 0,
|
||||
NNOM_BASE,
|
||||
NNOM_INPUT,
|
||||
NNOM_OUTPUT,
|
||||
NNOM_CONV_2D,
|
||||
NNOM_DW_CONV_2D,
|
||||
NNOM_CONV2D_TRANS,
|
||||
NNOM_BATCHNORM,
|
||||
NNOM_DENSE,
|
||||
NNOM_ZERO_PADDING,
|
||||
NNOM_CROPPING,
|
||||
NNOM_RNN,
|
||||
NNOM_ACTIVATION,
|
||||
NNOM_RELU,
|
||||
NNOM_LEAKY_RELU,
|
||||
NNOM_ADV_RELU,
|
||||
NNOM_SIGMOID,
|
||||
NNOM_TANH,
|
||||
NNOM_SOFTMAX,
|
||||
NNOM_MAXPOOL,
|
||||
NNOM_GLOBAL_MAXPOOL,
|
||||
NNOM_AVGPOOL,
|
||||
NNOM_GLOBAL_AVGPOOL,
|
||||
NNOM_SUMPOOL,
|
||||
NNOM_GLOBAL_SUMPOOL,
|
||||
NNOM_UPSAMPLE,
|
||||
NNOM_FLATTEN,
|
||||
NNOM_RESHAPE,
|
||||
NNOM_LAMBDA,
|
||||
NNOM_CONCAT,
|
||||
NNOM_ADD,
|
||||
NNOM_SUB,
|
||||
NNOM_MULT,
|
||||
NNOM_TYPE_MAX
|
||||
|
||||
} nnom_layer_type_t;
|
||||
|
||||
#define DEFUALT_LAYER_NAMES \
|
||||
{ \
|
||||
"Unknown", \
|
||||
"Base", \
|
||||
"Input", \
|
||||
"Output", \
|
||||
"Conv2D", \
|
||||
"DW_Conv2D", \
|
||||
"Conv2DTrsp", \
|
||||
"BatchNorm", \
|
||||
"Dense", \
|
||||
"ZeroPad", \
|
||||
"Cropping", \
|
||||
"RNN", \
|
||||
"Activation", \
|
||||
"ReLU", \
|
||||
"Leaky_ReLU", \
|
||||
"Adv_ReLU", \
|
||||
"Sigmoid", \
|
||||
"Tanh", \
|
||||
"Softmax", \
|
||||
"MaxPool", \
|
||||
"GL_MaxPool", \
|
||||
"AvgPool", \
|
||||
"GL_AvgPool", \
|
||||
"SumPool", \
|
||||
"GL_SumPool", \
|
||||
"UpSample", \
|
||||
"Flatten", \
|
||||
"Reshape", \
|
||||
"Lambda", \
|
||||
"Concat", \
|
||||
"Add", \
|
||||
"Sub", \
|
||||
"Mult", \
|
||||
}
|
||||
extern const char default_layer_names[][12];
|
||||
|
||||
// We dont count softmax an activation here, softmax is instanced as a layer
|
||||
typedef enum
|
||||
{
|
||||
ACT_UNKNOWN = 0,
|
||||
ACT_RELU,
|
||||
ACT_LEAKY_RELU,
|
||||
ACT_ADV_RELU,
|
||||
ACT_TANH,
|
||||
ACT_SIGMOID,
|
||||
ACT_HARD_TANH,
|
||||
ACT_HARD_SIGMOID
|
||||
} nnom_activation_type_t;
|
||||
|
||||
#define ACTIVATION_NAMES \
|
||||
{ \
|
||||
"Unknown", \
|
||||
"ReLU", \
|
||||
"LkyReLU", \
|
||||
"AdvReLU", \
|
||||
"TanH", \
|
||||
"Sigmoid", \
|
||||
"HrdTanH", \
|
||||
"HrdSigd", \
|
||||
}
|
||||
extern const char default_activation_names[][8];
|
||||
|
||||
// RNN cell type
|
||||
typedef enum
|
||||
{
|
||||
NNOM_UNKOWN_CELL = 0,
|
||||
NNOM_SIMPLE_CELL,
|
||||
NNOM_GRU_CELL,
|
||||
NNOM_LSTM_CELL,
|
||||
NNOM_CELL_TYPE_MAX
|
||||
} nnom_rnn_cell_type_t;
|
||||
|
||||
#define DEFUALT_CELL_NAMES \
|
||||
{ \
|
||||
"Unknown", \
|
||||
"Simple", \
|
||||
"GRU", \
|
||||
"LSTM", \
|
||||
}
|
||||
extern const char default_cell_names[][8];
|
||||
|
||||
|
||||
// parameters
|
||||
typedef enum
|
||||
{
|
||||
PADDING_VALID = 0,
|
||||
PADDING_SAME
|
||||
} nnom_padding_t;
|
||||
|
||||
#define NNOM_TENSOR_BUF_NULL (0) // This buffer is not in used
|
||||
#define NNOM_TENSOR_BUF_TEMP (1) // The memory in IO is temporary occupided, can be reused by other layer once the computation is done.
|
||||
#define NNOM_TENSOR_BUF_RESERVED (2) // the mem is reserve for this layer only (not to be reused by other layer.
|
||||
|
||||
// currently used in compiling.
|
||||
#define NNOM_BUF_EMPTY (0)
|
||||
#define NNOM_BUF_FILLED (1)
|
||||
|
||||
// basic types
|
||||
#define nnom_qformat_param_t int32_t // this should match the backend, need a better way to do it.
|
||||
#define nnom_shape_data_t uint16_t
|
||||
|
||||
typedef struct _nnom_3d_shape_t
|
||||
{
|
||||
nnom_shape_data_t h, w, c;
|
||||
} nnom_3d_shape_t;
|
||||
|
||||
typedef struct _nnom_border_t
|
||||
{
|
||||
nnom_shape_data_t top, bottom, left, right;
|
||||
} nnom_border_t;
|
||||
|
||||
// nnom_3d_shape_axis_t type provide the axis[] format access to nnom_3d_shape_t
|
||||
typedef union {
|
||||
nnom_3d_shape_t s;
|
||||
nnom_shape_data_t axis[sizeof(nnom_3d_shape_t) / sizeof(nnom_shape_data_t)];
|
||||
} nnom_3d_shape_axis_t;
|
||||
|
||||
// tensor quantisation types
|
||||
typedef enum
|
||||
{
|
||||
NNOM_QTYPE_PER_TENSOR = 0,
|
||||
NNOM_QTYPE_PER_AXIS = 1
|
||||
} nnom_qtype_t;
|
||||
|
||||
typedef struct _nnom_weights
|
||||
{
|
||||
const void *p_value;
|
||||
nnom_qformat_param_t shift;
|
||||
} nnom_weight_t;
|
||||
|
||||
typedef struct _nnom_bias
|
||||
{
|
||||
const void *p_value;
|
||||
nnom_qformat_param_t shift;
|
||||
} nnom_bias_t;
|
||||
|
||||
// experimental
|
||||
typedef struct _nnom_tensor_t
|
||||
{
|
||||
void* p_data; // value
|
||||
nnom_shape_data_t *dim; // dimension of this tensor
|
||||
nnom_qformat_param_t *q_dec; // number of decimal bit for Q format (scale)
|
||||
nnom_qformat_param_t *q_offset; // offset for each channel
|
||||
nnom_qtype_t qtype; // the quantisation type
|
||||
uint8_t num_dim; // the number of dimension
|
||||
uint8_t bitwidth; // the data bit width, only support 8bit now
|
||||
} nnom_tensor_t;
|
||||
|
||||
// nn wrappers
|
||||
typedef struct _nnom_layer_t nnom_layer_t;
|
||||
typedef struct _nnom_layer_io_t nnom_layer_io_t;
|
||||
typedef struct _nnom_layer_hook_t nnom_layer_hook_t;
|
||||
typedef struct _nnom_mem_block_t nnom_mem_block_t;
|
||||
|
||||
// activation wrapper
|
||||
typedef struct _nnom_activation_t nnom_activation_t;
|
||||
|
||||
typedef struct _nnom_buf
|
||||
{
|
||||
nnom_mem_block_t *mem;
|
||||
size_t size;
|
||||
uint8_t type;
|
||||
} nnom_buf_t;
|
||||
|
||||
// a memory block to store pre-assign memories during compiling. then assigned to each tensor after.
|
||||
struct _nnom_mem_block_t
|
||||
{
|
||||
void *blk; // data block location
|
||||
size_t size; // the maximum size for this block
|
||||
uint8_t owners; // how many layers own this block
|
||||
uint8_t state; // empty? filled? for static nn, currently only used in compiling
|
||||
};
|
||||
|
||||
typedef struct _nnom_stat_t
|
||||
{
|
||||
size_t macc; //num. of mac operation
|
||||
uint32_t time;
|
||||
} nnom_layer_stat_t;
|
||||
|
||||
struct _nnom_layer_hook_t
|
||||
{
|
||||
nnom_layer_io_t *io; // hooked io
|
||||
nnom_layer_hook_t *next; // next hook include secondary hooked layer
|
||||
};
|
||||
|
||||
struct _nnom_layer_io_t
|
||||
{
|
||||
nnom_layer_hook_t hook; // for example: (layer->out)--hook--(layer->in)
|
||||
nnom_layer_io_t *aux; // point to auxilary I/O (multiple I/O layer)
|
||||
nnom_tensor_t *tensor; // experimental
|
||||
nnom_mem_block_t *mem; // memory blocks handles for compiling only. The memory are now pass by tensor. trying to remove it.
|
||||
nnom_layer_t *owner; // which layer owns this io.
|
||||
uint8_t type;
|
||||
};
|
||||
|
||||
// structured configuration base type
|
||||
typedef struct _nnom_layer_config_t
|
||||
{
|
||||
char* name; // the name of the layer prequantiesd model (the model trained by user before converted to nnom)
|
||||
} nnom_layer_config_t;
|
||||
|
||||
// layers base
|
||||
struct _nnom_layer_t
|
||||
{
|
||||
nnom_layer_t *shortcut; // shortcut points to the next layer, applied on compiling
|
||||
|
||||
nnom_status_t (*run)(nnom_layer_t *layer); // run method. required
|
||||
nnom_status_t (*build)(nnom_layer_t *layer); // compute output buffer shape. can be left null, will call default_build()
|
||||
nnom_status_t (*free)(nnom_layer_t *layer); // a callback to free private resources (comp buf not included) can be left null
|
||||
nnom_buf_t *comp; // computational buf
|
||||
nnom_activation_t *actail; // I have an activation, I have a tail, wooo haaaa, act-tail!!!
|
||||
|
||||
nnom_layer_config_t *config; // point to the configuration of the layers. for machine api only.
|
||||
nnom_layer_type_t type; // layer types
|
||||
nnom_layer_io_t *in; // IO buff, last*layer, states
|
||||
nnom_layer_io_t *out; // IO buff, next*layer, states
|
||||
nnom_layer_stat_t stat; // stats, timing, ops
|
||||
};
|
||||
|
||||
// activation base
|
||||
struct _nnom_activation_t
|
||||
{
|
||||
nnom_status_t (*run)(struct _nnom_activation_t *act);
|
||||
nnom_tensor_t *tensor;
|
||||
nnom_activation_type_t type;
|
||||
};
|
||||
|
||||
// local static functions when libc is not available
|
||||
#ifdef NNOM_USING_STATIC_MEMORY
|
||||
void nnom_set_static_buf(void* buf, size_t size);
|
||||
void *nnom_malloc(size_t size);
|
||||
void nnom_free(void* p);
|
||||
#endif //NNOM_USING_STATIC_BUF
|
||||
|
||||
typedef struct _nnom_model nnom_model_t;
|
||||
|
||||
#include "nnom_tensor.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "nnom_utils.h"
|
||||
|
||||
// models, I dont want to make model class as a child of layer class yet
|
||||
struct _nnom_model
|
||||
{
|
||||
nnom_layer_t *head;
|
||||
nnom_layer_t *tail;
|
||||
|
||||
// model constructor
|
||||
nnom_status_t (*add)(struct _nnom_model *m, nnom_layer_t *layer); // has too pass a raw value
|
||||
nnom_layer_t *(*hook)(nnom_layer_t *curr, nnom_layer_t *last); // create hook between 2 layers' primary IO.
|
||||
nnom_layer_t *(*merge)(nnom_layer_t *method, nnom_layer_t *in1, nnom_layer_t *in2); // an older interface of merge 2 inputs.
|
||||
nnom_layer_t *(*mergex)(nnom_layer_t *method, int num, ...); // merge a few layers using mutiple input method (concate, add, ...)
|
||||
nnom_layer_t *(*active)(nnom_activation_t *act, nnom_layer_t *target_layer); // add the activation to the existing layer's tail
|
||||
|
||||
// callback
|
||||
nnom_status_t (*layer_callback)(nnom_model_t *m, nnom_layer_t *layer); // layer callback will be called after each layer(after actail).
|
||||
|
||||
// block memory for layers
|
||||
nnom_mem_block_t blocks[NNOM_BLOCK_NUM];
|
||||
|
||||
size_t total_ops;
|
||||
|
||||
bool is_inited; // is this structure initialized
|
||||
bool is_allocated; // is this structure allocated by nnom (not by user)
|
||||
};
|
||||
|
||||
#define NNOM_NULL_CHECK(p) \
|
||||
if ((p) == NULL) \
|
||||
{ \
|
||||
NNOM_LOG("Error: NULL object.\n"); \
|
||||
return NN_ARGUMENT_ERROR; \
|
||||
}
|
||||
|
||||
|
||||
// utils
|
||||
size_t nnom_alignto(size_t value, uint32_t alignment);
|
||||
size_t nnom_io_length(nnom_layer_io_t *io);
|
||||
size_t nnom_hook_length(nnom_layer_hook_t *hook);
|
||||
|
||||
// memory (malloc + memeset 0)
|
||||
void *nnom_mem(size_t size);
|
||||
|
||||
// get how much memory has been taken
|
||||
size_t nnom_mem_stat(void);
|
||||
|
||||
// Model APIs
|
||||
// create or init a model
|
||||
nnom_model_t *new_model(nnom_model_t *m);
|
||||
// compile as sequencial model
|
||||
nnom_status_t sequencial_compile(nnom_model_t *m);
|
||||
// compile as functional model
|
||||
nnom_status_t model_compile(nnom_model_t *m, nnom_layer_t *input, nnom_layer_t *output);
|
||||
// run a prediction
|
||||
nnom_status_t model_run(nnom_model_t *m);
|
||||
// delete model.
|
||||
void model_delete(nnom_model_t *m);
|
||||
// check version
|
||||
nnom_status_t check_model_version(unsigned long model_version);
|
||||
|
||||
// callback, called after each layer has finished the calculation.
|
||||
// this callback must return NN_SUCCESS for continually run the model. otherwise, model will be returned with the ERROR code.
|
||||
// this function return NN_LENGTH_ERROR if the callback is already set to other.
|
||||
nnom_status_t model_set_callback(nnom_model_t *m, nnom_status_t (*layer_callback)(nnom_model_t *m, nnom_layer_t *layer));
|
||||
// delete callback.
|
||||
void model_delete_callback(nnom_model_t *m);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_H__ */
|
|
@ -0,0 +1,194 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-02-05 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_LAYERS_H__
|
||||
#define __NNOM_LAYERS_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
|
||||
// properties
|
||||
nnom_3d_shape_t shape(size_t h, size_t w, size_t c);
|
||||
nnom_3d_shape_t kernel(size_t h, size_t w);
|
||||
nnom_3d_shape_t stride(size_t h, size_t w);
|
||||
nnom_3d_shape_t dilation(size_t h, size_t w);
|
||||
nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right);
|
||||
//nnom_qformat_t qformat(int8_t m, int8_t n);
|
||||
size_t shape_size(nnom_3d_shape_t* s);
|
||||
|
||||
// this function is to add a new IO to current inited IO
|
||||
// input, the targeted IO that the new IO will be added to
|
||||
// output , the new IO
|
||||
nnom_layer_io_t* io_add_aux(nnom_layer_io_t* targeted_io);
|
||||
nnom_layer_io_t *io_init(void *owner_layer, nnom_layer_io_t *io);
|
||||
|
||||
#define NN_CEILIF(x,y) ((x+y-1)/y)
|
||||
|
||||
#include "layers/nnom_activation.h"
|
||||
#include "layers/nnom_concat.h"
|
||||
#include "layers/nnom_conv2d.h"
|
||||
#include "layers/nnom_cropping.h"
|
||||
#include "layers/nnom_conv2d_trans.h"
|
||||
#include "layers/nnom_dense.h"
|
||||
#include "layers/nnom_dw_conv2d.h"
|
||||
#include "layers/nnom_flatten.h"
|
||||
#include "layers/nnom_reshape.h"
|
||||
#include "layers/nnom_global_pool.h"
|
||||
#include "layers/nnom_input.h"
|
||||
#include "layers/nnom_lambda.h"
|
||||
#include "layers/nnom_matrix.h"
|
||||
#include "layers/nnom_maxpool.h"
|
||||
#include "layers/nnom_avgpool.h"
|
||||
#include "layers/nnom_output.h"
|
||||
#include "layers/nnom_rnn.h"
|
||||
#include "layers/nnom_softmax.h"
|
||||
#include "layers/nnom_sumpool.h"
|
||||
#include "layers/nnom_upsample.h"
|
||||
#include "layers/nnom_zero_padding.h"
|
||||
#include "layers/nnom_rnn.h"
|
||||
#include "layers/nnom_simple_cell.h"
|
||||
#include "layers/nnom_lstm_cell.h"
|
||||
#include "layers/nnom_gru_cell.h"
|
||||
|
||||
// Layer APIs ******
|
||||
// (a summary for each individual layer's files)
|
||||
|
||||
// input/output
|
||||
nnom_layer_t *Input(nnom_3d_shape_t input_shape, void *p_buf);
|
||||
nnom_layer_t *Output(nnom_3d_shape_t output_shape, void *p_buf);
|
||||
|
||||
// Pooling
|
||||
nnom_layer_t *MaxPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad);
|
||||
nnom_layer_t *AvgPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad);
|
||||
nnom_layer_t *SumPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad);
|
||||
nnom_layer_t *GlobalMaxPool(void);
|
||||
nnom_layer_t *GlobalAvgPool(void);
|
||||
nnom_layer_t *GlobalSumPool(void);
|
||||
|
||||
// padding, cropping, upsample
|
||||
nnom_layer_t *UpSample(nnom_3d_shape_t kernel);
|
||||
nnom_layer_t *ZeroPadding(nnom_border_t pad);
|
||||
nnom_layer_t *Cropping(nnom_border_t pad);
|
||||
|
||||
// Activation
|
||||
nnom_layer_t *Activation(nnom_activation_t *act);
|
||||
nnom_layer_t *ReLU(void);
|
||||
nnom_layer_t *LeakyReLU(float alpha);
|
||||
nnom_layer_t *Softmax(void);
|
||||
nnom_layer_t *Sigmoid(int32_t dec_bit); // input dec bit
|
||||
nnom_layer_t *TanH(int32_t dec_bit); // input dec bit
|
||||
|
||||
// Matrix
|
||||
nnom_layer_t *Add(int16_t oshift); // output shift
|
||||
nnom_layer_t *Sub(int16_t oshift); // output shift
|
||||
nnom_layer_t *Mult(int16_t oshift); // output shift
|
||||
|
||||
nnom_layer_t *Flatten(void);
|
||||
nnom_layer_t *Concat(int8_t axis);
|
||||
// -- NN Constructers --
|
||||
// conv2d
|
||||
nnom_layer_t *Conv2D(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad,
|
||||
const nnom_weight_t *w, const nnom_bias_t *b);
|
||||
|
||||
// deconv2d
|
||||
nnom_layer_t *Conv2DTrans(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad,
|
||||
const nnom_weight_t *w, const nnom_bias_t *b);
|
||||
|
||||
// depthwise_convolution
|
||||
nnom_layer_t *DW_Conv2D(uint32_t multiplier, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad,
|
||||
const nnom_weight_t *w, const nnom_bias_t *b);
|
||||
|
||||
// fully connected, dense
|
||||
nnom_layer_t *Dense(size_t output_unit, const nnom_weight_t *w, const nnom_bias_t *b);
|
||||
|
||||
|
||||
// Lambda Layers
|
||||
nnom_layer_t *Lambda(nnom_status_t (*run)(nnom_layer_t *), // run method, required
|
||||
nnom_status_t (*build)(nnom_layer_t *), // optional, call default_build() if left null
|
||||
nnom_status_t (*free)(nnom_layer_t *), // not required if no resources needs to be deleted, can be left null.
|
||||
void *parameters); // user private parameters for run method, left null if not needed.
|
||||
|
||||
// building methods
|
||||
nnom_status_t default_build(nnom_layer_t* layer);
|
||||
nnom_status_t input_build(nnom_layer_t* layer);
|
||||
|
||||
nnom_status_t conv2d_build(nnom_layer_t* layer);
|
||||
nnom_status_t dw_conv2d_build(nnom_layer_t* layer);
|
||||
nnom_status_t conv2d_trans_build(nnom_layer_t* layer);
|
||||
nnom_status_t dense_build(nnom_layer_t* layer);
|
||||
nnom_status_t rnn_build(nnom_layer_t* layer);
|
||||
|
||||
nnom_status_t upsample_build(nnom_layer_t* layer);
|
||||
nnom_status_t zero_padding_build(nnom_layer_t* layer);
|
||||
nnom_status_t cropping_build(nnom_layer_t* layer);
|
||||
|
||||
nnom_status_t maxpool_build(nnom_layer_t* layer);
|
||||
nnom_status_t avgpool_build(nnom_layer_t* layer);
|
||||
nnom_status_t sumpool_build(nnom_layer_t* layer);
|
||||
nnom_status_t global_pool_build(nnom_layer_t* layer);
|
||||
|
||||
nnom_status_t flatten_build(nnom_layer_t* layer);
|
||||
nnom_status_t reshape_build(nnom_layer_t* layer);
|
||||
nnom_status_t concat_build(nnom_layer_t* layer);
|
||||
|
||||
// run
|
||||
nnom_status_t input_run(nnom_layer_t* layer);
|
||||
nnom_status_t output_run(nnom_layer_t* layer);
|
||||
nnom_status_t flatten_run(nnom_layer_t* layer);
|
||||
nnom_status_t reshape_run(nnom_layer_t* layer);
|
||||
nnom_status_t default_run(nnom_layer_t* layer); // simply copy data from input to output
|
||||
|
||||
nnom_status_t dw_conv2d_run(nnom_layer_t* layer);
|
||||
nnom_status_t conv2d_run(nnom_layer_t* layer);
|
||||
nnom_status_t conv2d_trans_run(nnom_layer_t* layer);
|
||||
nnom_status_t dense_run(nnom_layer_t* layer);
|
||||
nnom_status_t rnn_run(nnom_layer_t* layer);
|
||||
|
||||
nnom_status_t upsample_run(nnom_layer_t* layer);
|
||||
nnom_status_t zero_padding_run(nnom_layer_t* layer);
|
||||
nnom_status_t cropping_run(nnom_layer_t* layer);
|
||||
|
||||
nnom_status_t activation_run(nnom_layer_t* layer);
|
||||
nnom_status_t softmax_run(nnom_layer_t* layer);
|
||||
|
||||
nnom_status_t maxpool_run(nnom_layer_t* layer);
|
||||
nnom_status_t avgpool_run(nnom_layer_t* layer);
|
||||
nnom_status_t sumpool_run(nnom_layer_t* layer);
|
||||
|
||||
nnom_status_t concat_run(nnom_layer_t* layer);
|
||||
nnom_status_t add_run(nnom_layer_t* layer);
|
||||
nnom_status_t sub_run(nnom_layer_t* layer);
|
||||
nnom_status_t mult_run(nnom_layer_t* layer);
|
||||
|
||||
// Activation APIs
|
||||
// Softmax is not considered as activation in NNoM, Softmax is in layer API.
|
||||
nnom_activation_t* act_relu(void);
|
||||
nnom_activation_t* act_leaky_relu(float alpha);
|
||||
nnom_activation_t* act_sigmoid(int32_t dec_bit);
|
||||
nnom_activation_t* act_tanh(int32_t dec_bit);
|
||||
|
||||
// direct API
|
||||
nnom_status_t act_tensor_run(nnom_activation_t* act, nnom_tensor_t* tensor);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_LAYERS_H__ */
|
|
@ -0,0 +1,974 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Notice:
|
||||
* Code in this file inlcudes derivative works from CMSIS, which is released under alternative license.
|
||||
* Please check the LICENSE file for detial.
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-02-05 Jianjia Ma The first version
|
||||
* 2019-03-19 Jianjia Ma Local C implementation partly from CMSIS-NN
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_LOCAL_H__
|
||||
#define __NNOM_LOCAL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include "stdint.h"
|
||||
#include "nnom_port.h"
|
||||
|
||||
#ifdef ARM_NN_TRUNCATE
|
||||
#define NNOM_TRUNCATE
|
||||
#endif
|
||||
|
||||
// SSAT implementation with C code
|
||||
#ifndef __NNOM_SSAT
|
||||
static inline int __NNOM_SSAT(int32_t value, int32_t bit) {
|
||||
int32_t min = -(1<<(bit-1));
|
||||
int32_t max = (1<<(bit-1)) - 1;
|
||||
if (value < min)
|
||||
return min;
|
||||
else if (value > max)
|
||||
return max;
|
||||
else
|
||||
return value;
|
||||
}
|
||||
#endif
|
||||
|
||||
// USAT implementation with C code
|
||||
#ifndef __NNOM_USAT
|
||||
static inline int __NNOM_USAT(int32_t value, int32_t bit) {
|
||||
int32_t max = (1<<(bit-1)) - 1;
|
||||
if (value < 0)
|
||||
return 0;
|
||||
else if (value > max)
|
||||
return max;
|
||||
else
|
||||
return value;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define MAX(A, B) ((A) > (B) ? (A) : (B))
|
||||
#define MIN(A, B) ((A) < (B) ? (A) : (B))
|
||||
|
||||
|
||||
// Those functions/tables below are partially modifed from CMSIS-NN lib
|
||||
// https://github.com/ARM-software/CMSIS_5
|
||||
//
|
||||
void local_avepool_q7_HWC(const q7_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
const uint16_t output_shift, // output right shift
|
||||
q7_t *bufferA, // a buffer for local storage, NULL by now
|
||||
q7_t *Im_out);
|
||||
|
||||
void local_avepool_q7_CHW(const q7_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
const uint16_t output_shift, // output right shift
|
||||
q7_t *bufferA, // a buffer for local storage, NULL by now
|
||||
q7_t *Im_out);
|
||||
|
||||
// modified from CMSIS-NN test_ref
|
||||
void local_maxpool_q7_HWC(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
q7_t * bufferA, // a buffer for local storage, NULL by now
|
||||
q7_t * Im_out);
|
||||
|
||||
void local_maxpool_q7_CHW(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
q7_t * bufferA, // a buffer for local storage, NULL by now
|
||||
q7_t * Im_out);
|
||||
|
||||
void local_sumpool_q7_HWC(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
q7_t * bufferA, // a buffer for local storage, size = 4*output_size
|
||||
q7_t * Im_out);
|
||||
|
||||
void local_sumpool_q7_CHW(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
q7_t * bufferA, // a buffer for local storage, size = 4*output_size
|
||||
q7_t * Im_out);
|
||||
|
||||
// customised up sample pooling
|
||||
void local_up_sampling_q7_HWC(const q7_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
q7_t *bufferA, // NULL
|
||||
q7_t *Im_out);
|
||||
|
||||
void local_up_sampling_q7_CHW(const q7_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
q7_t *bufferA, // NULL
|
||||
q7_t *Im_out);
|
||||
|
||||
void local_convolve_HWC_q7_nonsquare(const q7_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t *wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const uint16_t dilation_x, // dilation x
|
||||
const uint16_t dilation_y, // dilation y
|
||||
const q7_t *bias, // bias
|
||||
const nnom_qformat_param_t *bias_shift, // bias shifts
|
||||
const nnom_qformat_param_t *out_shift, // output shift
|
||||
const nnom_qtype_t q_type, // per channel or per tensor
|
||||
q7_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t *bufferA, //buffer space for input
|
||||
q7_t *bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void local_convolve_CHW_q7_nonsquare(const q7_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t *wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const uint16_t dilation_x, // dilation x
|
||||
const uint16_t dilation_y, // dilation y
|
||||
const q7_t *bias, // bias
|
||||
const nnom_qformat_param_t *bias_shift, // bias shifts
|
||||
const nnom_qformat_param_t *out_shift, // output shift
|
||||
const nnom_qtype_t q_type, // per channel or per tensor
|
||||
q7_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t *bufferA, //buffer space for input
|
||||
q7_t *bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void local_conv_trans_HWC_q7_nonsquare(const int8_t * Im_in,
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t *wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const uint16_t dilation_x, // dilation x
|
||||
const uint16_t dilation_y, // dilation y
|
||||
const q7_t *bias, // bias
|
||||
const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t *bufferA, //buffer space for input
|
||||
q7_t *bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void local_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,// input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t *wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const uint16_t dilation_x, // dilation x
|
||||
const uint16_t dilation_y, // dilation y
|
||||
const q7_t *bias, // bias
|
||||
const nnom_qformat_param_t *bias_shift, // bias shifts
|
||||
const nnom_qformat_param_t *out_shift, // output shift
|
||||
const nnom_qtype_t q_type, // per channel or per tensor
|
||||
q7_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t *bufferA, //buffer space for input
|
||||
q7_t *bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void local_depthwise_separable_conv_CHW_q7_nonsquare(const q7_t *Im_in,// input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t *wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const uint16_t dilation_x, // dilation x
|
||||
const uint16_t dilation_y, // dilation y
|
||||
const q7_t *bias, // bias
|
||||
const nnom_qformat_param_t *bias_shift, // bias shifts
|
||||
const nnom_qformat_param_t *out_shift, // output shift
|
||||
const nnom_qtype_t q_type, // per channel or per tensor
|
||||
q7_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t *bufferA, //buffer space for input
|
||||
q7_t *bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void local_zero_padding_HWC_q7(const q7_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t padding_top, // padding sizes y
|
||||
const uint16_t padding_bottom, // padding sizes y
|
||||
const uint16_t padding_left, // padding sizes x
|
||||
const uint16_t padding_right, // padding sizes x
|
||||
q7_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y); // output image dimension y
|
||||
|
||||
void local_zero_padding_CHW_q7(const q7_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t padding_top, // padding sizes y
|
||||
const uint16_t padding_bottom, // padding sizes y
|
||||
const uint16_t padding_left, // padding sizes x
|
||||
const uint16_t padding_right, // padding sizes x
|
||||
q7_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y); // output image dimension y
|
||||
|
||||
void local_cropping_HWC_q7(const q7_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t padding_top, // padding sizes y
|
||||
const uint16_t padding_bottom, // padding sizes y
|
||||
const uint16_t padding_left, // padding sizes x
|
||||
const uint16_t padding_right, // padding sizes x
|
||||
q7_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y); // output image dimension y
|
||||
|
||||
void local_cropping_CHW_q7(const q7_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t padding_top, // padding sizes y
|
||||
const uint16_t padding_bottom, // padding sizes y
|
||||
const uint16_t padding_left, // padding sizes x
|
||||
const uint16_t padding_right, // padding sizes x
|
||||
q7_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y); // output image dimension y
|
||||
|
||||
void local_fully_connected_q7_opt(const q7_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, q7_t * pOut, // output operand
|
||||
q15_t * vec_buffer);
|
||||
|
||||
|
||||
void local_fully_connected_q7(const q7_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, q7_t * pOut, // output operand
|
||||
q15_t * vec_buffer);
|
||||
|
||||
// matrix dot,
|
||||
// it takes reorderd weight as input, (see dense layer for detail. this is basiclly a dense opt without bias)
|
||||
void local_dot_q7_opt(const q7_t *pV, // pointer to vector
|
||||
const q7_t *pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
q7_t *pOut); // result buffer
|
||||
|
||||
void local_dot_q7(const q7_t *pV, // pointer to vector
|
||||
const q7_t *pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
q7_t *pOut); // output operand)
|
||||
|
||||
|
||||
|
||||
// softmax
|
||||
void local_softmax_q7(const q7_t * vec_in, const uint32_t dim_vec, q7_t * p_out);
|
||||
|
||||
// sigmoid
|
||||
void local_sigmoid_q7(q7_t * data, uint32_t size, int16_t int_width);
|
||||
|
||||
// tanh
|
||||
void local_tanh_q7(q7_t * data, uint32_t size, int16_t int_width);
|
||||
|
||||
// relu
|
||||
void local_relu_q7(q7_t * data, uint32_t size);
|
||||
|
||||
// leaky relu
|
||||
void local_leaky_relu_q7(q7_t *data, q7_t alpha, uint32_t size);
|
||||
|
||||
// alpha in q7 format with dec_bit=7
|
||||
// max and threshold has the same Q format with the activation
|
||||
void local_adv_relu_q7(q7_t *data, q7_t alpha, q7_t max, q7_t threshold, uint32_t size);
|
||||
|
||||
// hard sigmoid,
|
||||
// y=-1 if x < -2.5
|
||||
// y=1 if x > 2.5
|
||||
// otherwise y = 0.2 * x + 0.5 (y=0.20315 * x + 0.5)
|
||||
void local_hard_sigmoid_q7(q7_t *data, uint32_t size, int16_t dec_bit);
|
||||
|
||||
// hard tanh
|
||||
// y=-1 if x < -1
|
||||
// y=1 if x > 1
|
||||
// otherwise y = x
|
||||
void local_hard_tanh_q7(q7_t *data, uint32_t size, int16_t dec_bit);
|
||||
|
||||
// matrix ops
|
||||
void local_mult_q7(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
|
||||
|
||||
// add
|
||||
void local_add_q7(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
|
||||
|
||||
// sub
|
||||
void local_sub_q7(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
|
||||
|
||||
// take multiple blocks (>2) as input
|
||||
void local_multiple_add_q7( q7_t *p_dst,
|
||||
const int16_t out_shift,
|
||||
uint32_t block_size,
|
||||
uint32_t num_block,
|
||||
q7_t **p_src);
|
||||
|
||||
void local_multiple_mult_q7( q7_t *p_dst,
|
||||
const int16_t out_shift,
|
||||
uint32_t block_size,
|
||||
uint32_t num_block,
|
||||
q7_t **p_src);
|
||||
|
||||
void local_multiple_sub_q7( q7_t *p_dst,
|
||||
const int16_t out_shift,
|
||||
uint32_t block_size,
|
||||
uint32_t num_block,
|
||||
q7_t **p_src);
|
||||
|
||||
|
||||
// Below tables credit to CMSIS
|
||||
// For more info. check CMSIS-NN lib
|
||||
// https://github.com/ARM-software/CMSIS_5/blob/develop/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c
|
||||
static const q7_t nnom_sigmoid_table_q7[256] = {
|
||||
0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e,
|
||||
0x50, 0x52, 0x53, 0x55, 0x57, 0x59, 0x5a, 0x5c,
|
||||
0x5e, 0x5f, 0x61, 0x62, 0x63, 0x65, 0x66, 0x67,
|
||||
0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
|
||||
0x71, 0x72, 0x72, 0x73, 0x74, 0x74, 0x75, 0x76,
|
||||
0x76, 0x77, 0x77, 0x78, 0x78, 0x79, 0x79, 0x7a,
|
||||
0x7a, 0x7a, 0x7b, 0x7b, 0x7b, 0x7c, 0x7c, 0x7c,
|
||||
0x7c, 0x7c, 0x7d, 0x7d, 0x7d, 0x7d, 0x7d, 0x7e,
|
||||
0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||
0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||
0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04,
|
||||
0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06,
|
||||
0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09,
|
||||
0x0a, 0x0a, 0x0b, 0x0c, 0x0c, 0x0d, 0x0e, 0x0e,
|
||||
0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
|
||||
0x17, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f, 0x21,
|
||||
0x22, 0x24, 0x26, 0x27, 0x29, 0x2b, 0x2d, 0x2e,
|
||||
0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
|
||||
};
|
||||
|
||||
|
||||
static const q7_t nnom_tanh_table_q7[256] = {
|
||||
0x00, 0x08, 0x10, 0x18, 0x1f, 0x27, 0x2e, 0x35,
|
||||
0x3b, 0x41, 0x47, 0x4c, 0x51, 0x56, 0x5a, 0x5e,
|
||||
0x61, 0x65, 0x68, 0x6a, 0x6d, 0x6f, 0x71, 0x72,
|
||||
0x74, 0x75, 0x76, 0x78, 0x78, 0x79, 0x7a, 0x7b,
|
||||
0x7b, 0x7c, 0x7c, 0x7d, 0x7d, 0x7e, 0x7e, 0x7e,
|
||||
0x7e, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x81,
|
||||
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x82,
|
||||
0x82, 0x82, 0x82, 0x82, 0x83, 0x83, 0x84, 0x84,
|
||||
0x85, 0x85, 0x86, 0x87, 0x88, 0x88, 0x8a, 0x8b,
|
||||
0x8c, 0x8e, 0x8f, 0x91, 0x93, 0x96, 0x98, 0x9b,
|
||||
0x9f, 0xa2, 0xa6, 0xaa, 0xaf, 0xb4, 0xb9, 0xbf,
|
||||
0xc5, 0xcb, 0xd2, 0xd9, 0xe1, 0xe8, 0xf0, 0xf8,
|
||||
};
|
||||
|
||||
|
||||
// ------------ 16bit ops --------------------
|
||||
|
||||
void local_avepool_q15_HWC(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
const uint16_t output_shift, // output right shift
|
||||
q7_t *bufferA, // a buffer for local storage, NULL by now
|
||||
q15_t *Im_out);
|
||||
|
||||
void local_avepool_q15_CHW(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
const uint16_t output_shift, // output right shift
|
||||
q7_t *bufferA, // a buffer for local storage, NULL by now
|
||||
q15_t *Im_out);
|
||||
|
||||
void local_maxpool_q15_HWC(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
q7_t *bufferA, // a buffer for local storage, NULL by now
|
||||
q15_t *Im_out);
|
||||
|
||||
void local_maxpool_q15_CHW(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
q7_t *bufferA, // a buffer for local storage, NULL by now
|
||||
q15_t *Im_out);
|
||||
|
||||
void local_sumpool_q15_HWC(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
const uint16_t output_shift, // output right shift
|
||||
q7_t *bufferA, // a buffer for local storage, size = 4*output_size
|
||||
q15_t *Im_out);
|
||||
|
||||
void local_sumpool_q15_CHW(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t padding_x, // padding sizes
|
||||
const uint16_t padding_y, // padding sizes
|
||||
const uint16_t stride_x, // stride
|
||||
const uint16_t stride_y, // stride
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
const uint16_t output_shift, // output right shift
|
||||
q7_t *bufferA, // a buffer for local storage, size = 4*output_size
|
||||
q15_t *Im_out);
|
||||
|
||||
void local_up_sampling_q15_HWC(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
q7_t *bufferA, // a buffer for local storage, NULL by now
|
||||
q15_t *Im_out);
|
||||
|
||||
void local_up_sampling_q15_CHW(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimension x or W
|
||||
const uint16_t dim_im_in_y, // input image dimension y or H
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel_x, // window kernel size
|
||||
const uint16_t dim_kernel_y, // window kernel size
|
||||
const uint16_t dim_im_out_x, // output image dimension x or W
|
||||
const uint16_t dim_im_out_y, // output image dimension y or H
|
||||
q7_t *bufferA, // a buffer for local storage, NULL by now
|
||||
q15_t *Im_out);
|
||||
|
||||
void local_convolve_HWC_q15_nonsquare(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t *wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const uint16_t dilation_x, // dilation x
|
||||
const uint16_t dilation_y, // dilation y
|
||||
const q7_t *bias, // bias
|
||||
const nnom_qformat_param_t *bias_shift, // bias shifts
|
||||
const nnom_qformat_param_t *out_shift, // output shift
|
||||
const nnom_qtype_t q_type, // per channel or per tensor
|
||||
q15_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t *bufferA, //buffer space for input
|
||||
q7_t *bufferB //buffer space for output
|
||||
);
|
||||
void local_convolve_CHW_q15_nonsquare(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t *wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const uint16_t dilation_x, // dilation x
|
||||
const uint16_t dilation_y, // dilation y
|
||||
const q7_t *bias, // bias
|
||||
const nnom_qformat_param_t *bias_shift, // bias shifts
|
||||
const nnom_qformat_param_t *out_shift, // output shift
|
||||
const nnom_qtype_t q_type, // per channel or per tensor
|
||||
q15_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t *bufferA, //buffer space for input
|
||||
q7_t *bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void local_conv_trans_HWC_q15_nonsquare(const int8_t * Im_in,
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t *wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const uint16_t dilation_x, // dilation x
|
||||
const uint16_t dilation_y, // dilation y
|
||||
const q7_t *bias, // bias
|
||||
const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t *bufferA, //buffer space for input
|
||||
q7_t *bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void local_depthwise_separable_conv_HWC_q15_nonsquare(const q15_t *Im_in,// input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t *wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const uint16_t dilation_x, // dilation x
|
||||
const uint16_t dilation_y, // dilation y
|
||||
const q7_t *bias, // bias
|
||||
const nnom_qformat_param_t *bias_shift, // bias shifts
|
||||
const nnom_qformat_param_t *out_shift, // output shift
|
||||
const nnom_qtype_t q_type, // per channel or per tensor
|
||||
q15_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t *bufferA, //buffer space for input
|
||||
q7_t *bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void local_depthwise_separable_conv_CHW_q15_nonsquare(const q15_t *Im_in,// input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t *wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const uint16_t dilation_x, // dilation x
|
||||
const uint16_t dilation_y, // dilation y
|
||||
const q7_t *bias, // bias
|
||||
const nnom_qformat_param_t *bias_shift, // bias shifts
|
||||
const nnom_qformat_param_t *out_shift, // output shift
|
||||
const nnom_qtype_t q_type, // per channel or per tensor
|
||||
q15_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t *bufferA, //buffer space for input
|
||||
q7_t *bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void local_zero_padding_HWC_q15(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t padding_top, // padding sizes y
|
||||
const uint16_t padding_bottom, // padding sizes y
|
||||
const uint16_t padding_left, // padding sizes x
|
||||
const uint16_t padding_right, // padding sizes x
|
||||
q15_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y); // output image dimension y
|
||||
|
||||
void local_zero_padding_CHW_q15(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t padding_top, // padding sizes y
|
||||
const uint16_t padding_bottom, // padding sizes y
|
||||
const uint16_t padding_left, // padding sizes x
|
||||
const uint16_t padding_right, // padding sizes x
|
||||
q15_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y); // output image dimension y
|
||||
|
||||
void local_cropping_HWC_q15(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t padding_top, // padding sizes y
|
||||
const uint16_t padding_bottom, // padding sizes y
|
||||
const uint16_t padding_left, // padding sizes x
|
||||
const uint16_t padding_right, // padding sizes x
|
||||
q15_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y); // output image dimension y
|
||||
|
||||
void local_cropping_CHW_q15(const q15_t *Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t padding_top, // padding sizes y
|
||||
const uint16_t padding_bottom, // padding sizes y
|
||||
const uint16_t padding_left, // padding sizes x
|
||||
const uint16_t padding_right, // padding sizes x
|
||||
q15_t *Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y); // output image dimension y
|
||||
|
||||
|
||||
void local_dot_q15(const q15_t *pV, // pointer to vector
|
||||
const q15_t *pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
q15_t *pOut); // output operand)
|
||||
|
||||
void local_dot_q15_opt(const q15_t * pV,
|
||||
const q15_t * pM,
|
||||
const uint16_t dim_vec,
|
||||
const uint16_t num_of_rows,
|
||||
const uint16_t out_shift,
|
||||
q15_t * pOut);
|
||||
|
||||
// original implementation
|
||||
// this support none bias, the it will perform like a dot.
|
||||
// set the `bias=NULL` to work
|
||||
void local_fully_connected_mat_q7_vec_q15(const q15_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, // bias
|
||||
q15_t * pOut, // output
|
||||
q15_t * vec_buffer); // not used but to keep the interface same as the ARM's version
|
||||
|
||||
// work on recorder matrix
|
||||
// this support none bias, set the bias=NULL to work
|
||||
void local_fully_connected_mat_q7_vec_q15_opt(const q15_t * pV,
|
||||
const q7_t * pM,
|
||||
const uint16_t dim_vec,
|
||||
const uint16_t num_of_rows,
|
||||
const uint16_t bias_shift,
|
||||
const uint16_t out_shift,
|
||||
const q7_t * bias,
|
||||
q15_t * pOut,
|
||||
q15_t * vec_buffer);
|
||||
|
||||
// matrix operation Q15
|
||||
void local_multiple_add_q15( q15_t *p_dst,
|
||||
const int16_t out_shift,
|
||||
uint32_t block_size,
|
||||
uint32_t num_block,
|
||||
q15_t **p_src);
|
||||
|
||||
void local_multiple_mult_q15( q15_t *p_dst,
|
||||
const int16_t out_shift,
|
||||
uint32_t block_size,
|
||||
uint32_t num_block,
|
||||
q15_t **p_src);
|
||||
|
||||
void local_multiple_sub_q15( q15_t *p_dst,
|
||||
const int16_t out_shift,
|
||||
uint32_t block_size,
|
||||
uint32_t num_block,
|
||||
q15_t **p_src);
|
||||
|
||||
void local_mult_q15(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
|
||||
|
||||
// add
|
||||
void local_add_q15(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
|
||||
|
||||
// sub
|
||||
void local_sub_q15(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
|
||||
|
||||
// Convert Q7 to Q15
|
||||
void local_q7_to_q15_no_shift(const q7_t *src, q15_t *des, uint32_t size);
|
||||
void local_q7_to_q15(const q7_t *src, q15_t *des, uint32_t size);
|
||||
|
||||
// q15 shift to q7
|
||||
void local_q15_to_q7(const q15_t *src, q7_t *des, uint32_t shift, uint32_t size);
|
||||
|
||||
// y = 1 - x
|
||||
void local_1_minor_z_q15(q15_t *src, q15_t *des, uint16_t dec_bit, uint32_t size);
|
||||
|
||||
void local_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out);
|
||||
void local_hard_sigmoid_q15(q15_t *data, uint32_t size, int16_t dec_bit);
|
||||
void local_hard_tanh_q15(q15_t *data, uint32_t size, int16_t dec_bit);
|
||||
void local_relu_q15(q15_t *data, uint32_t size);
|
||||
void local_leaky_relu_q15(q15_t *data, q7_t alpha, uint32_t size);
|
||||
void local_adv_relu_q15(q15_t *data, q7_t negative_slope, q15_t max, q15_t threshold, uint32_t size);
|
||||
void local_sigmoid_q15(q15_t * data, uint32_t size, uint16_t int_width);
|
||||
void local_tanh_q15(q15_t * data, uint32_t size, uint16_t int_width);
|
||||
|
||||
|
||||
static const q15_t nnom_sigmoid_table_q15[256] = {
|
||||
0x4000, 0x4200, 0x43ff, 0x45fc, 0x47f5, 0x49eb, 0x4bdc, 0x4dc8,
|
||||
0x4fad, 0x518a, 0x5360, 0x552c, 0x56ef, 0x58a8, 0x5a57, 0x5bfb,
|
||||
0x5d93, 0x5f20, 0x60a1, 0x6216, 0x637f, 0x64db, 0x662b, 0x676f,
|
||||
0x68a6, 0x69d2, 0x6af1, 0x6c05, 0x6d0d, 0x6e09, 0x6efb, 0x6fe2,
|
||||
0x70be, 0x7190, 0x7258, 0x7316, 0x73cc, 0x7478, 0x751b, 0x75b7,
|
||||
0x764a, 0x76d6, 0x775b, 0x77d8, 0x784f, 0x78c0, 0x792a, 0x798f,
|
||||
0x79ee, 0x7a48, 0x7a9d, 0x7aed, 0x7b39, 0x7b80, 0x7bc4, 0x7c03,
|
||||
0x7c3f, 0x7c78, 0x7cad, 0x7ce0, 0x7d0f, 0x7d3c, 0x7d66, 0x7d8d,
|
||||
0x7db3, 0x7dd6, 0x7df7, 0x7e16, 0x7e33, 0x7e4f, 0x7e69, 0x7e81,
|
||||
0x7e98, 0x7eae, 0x7ec2, 0x7ed5, 0x7ee7, 0x7ef8, 0x7f08, 0x7f17,
|
||||
0x7f25, 0x7f32, 0x7f3e, 0x7f4a, 0x7f55, 0x7f5f, 0x7f69, 0x7f72,
|
||||
0x7f7b, 0x7f83, 0x7f8a, 0x7f91, 0x7f98, 0x7f9e, 0x7fa4, 0x7faa,
|
||||
0x7faf, 0x7fb4, 0x7fb8, 0x7fbd, 0x7fc1, 0x7fc5, 0x7fc8, 0x7fcc,
|
||||
0x7fcf, 0x7fd2, 0x7fd5, 0x7fd7, 0x7fda, 0x7fdc, 0x7fde, 0x7fe0,
|
||||
0x7fe2, 0x7fe4, 0x7fe6, 0x7fe7, 0x7fe9, 0x7fea, 0x7feb, 0x7fed,
|
||||
0x7fee, 0x7fef, 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff4,
|
||||
0x000b, 0x000c, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
|
||||
0x0012, 0x0013, 0x0015, 0x0016, 0x0017, 0x0019, 0x001a, 0x001c,
|
||||
0x001e, 0x0020, 0x0022, 0x0024, 0x0026, 0x0029, 0x002b, 0x002e,
|
||||
0x0031, 0x0034, 0x0038, 0x003b, 0x003f, 0x0043, 0x0048, 0x004c,
|
||||
0x0051, 0x0056, 0x005c, 0x0062, 0x0068, 0x006f, 0x0076, 0x007d,
|
||||
0x0085, 0x008e, 0x0097, 0x00a1, 0x00ab, 0x00b6, 0x00c2, 0x00ce,
|
||||
0x00db, 0x00e9, 0x00f8, 0x0108, 0x0119, 0x012b, 0x013e, 0x0152,
|
||||
0x0168, 0x017f, 0x0197, 0x01b1, 0x01cd, 0x01ea, 0x0209, 0x022a,
|
||||
0x024d, 0x0273, 0x029a, 0x02c4, 0x02f1, 0x0320, 0x0353, 0x0388,
|
||||
0x03c1, 0x03fd, 0x043c, 0x0480, 0x04c7, 0x0513, 0x0563, 0x05b8,
|
||||
0x0612, 0x0671, 0x06d6, 0x0740, 0x07b1, 0x0828, 0x08a5, 0x092a,
|
||||
0x09b6, 0x0a49, 0x0ae5, 0x0b88, 0x0c34, 0x0cea, 0x0da8, 0x0e70,
|
||||
0x0f42, 0x101e, 0x1105, 0x11f7, 0x12f3, 0x13fb, 0x150f, 0x162e,
|
||||
0x175a, 0x1891, 0x19d5, 0x1b25, 0x1c81, 0x1dea, 0x1f5f, 0x20e0,
|
||||
0x226d, 0x2405, 0x25a9, 0x2758, 0x2911, 0x2ad4, 0x2ca0, 0x2e76,
|
||||
0x3053, 0x3238, 0x3424, 0x3615, 0x380b, 0x3a04, 0x3c01, 0x3e00,
|
||||
};
|
||||
|
||||
|
||||
static const q15_t nnom_tanh_table_q15[256] = {
|
||||
0x0000, 0x07fd, 0x0feb, 0x17b9, 0x1f59, 0x26bf, 0x2ddf, 0x34ae,
|
||||
0x3b27, 0x4142, 0x46fd, 0x4c56, 0x514d, 0x55e2, 0x5a1a, 0x5df6,
|
||||
0x617c, 0x64b0, 0x6797, 0x6a37, 0x6c95, 0x6eb5, 0x709e, 0x7254,
|
||||
0x73dc, 0x753a, 0x7672, 0x7788, 0x787f, 0x795b, 0x7a1e, 0x7acb,
|
||||
0x7b65, 0x7bee, 0x7c66, 0x7cd1, 0x7d30, 0x7d84, 0x7dce, 0x7e0f,
|
||||
0x7e49, 0x7e7d, 0x7eaa, 0x7ed2, 0x7ef5, 0x7f14, 0x7f30, 0x7f48,
|
||||
0x7f5e, 0x7f71, 0x7f82, 0x7f91, 0x7f9e, 0x7fa9, 0x7fb3, 0x7fbc,
|
||||
0x7fc4, 0x7fcb, 0x7fd1, 0x7fd7, 0x7fdc, 0x7fe0, 0x7fe4, 0x7fe7,
|
||||
0x7fea, 0x7fed, 0x7fef, 0x7ff1, 0x7ff3, 0x7ff4, 0x7ff6, 0x7ff7,
|
||||
0x7ff8, 0x7ff9, 0x7ffa, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffc, 0x7ffd,
|
||||
0x7ffd, 0x7ffd, 0x7ffe, 0x7ffe, 0x7ffe, 0x7ffe, 0x7fff, 0x7fff,
|
||||
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001,
|
||||
0x8001, 0x8001, 0x8001, 0x8002, 0x8002, 0x8002, 0x8002, 0x8003,
|
||||
0x8003, 0x8003, 0x8004, 0x8004, 0x8005, 0x8006, 0x8006, 0x8007,
|
||||
0x8008, 0x8009, 0x800a, 0x800c, 0x800d, 0x800f, 0x8011, 0x8013,
|
||||
0x8016, 0x8019, 0x801c, 0x8020, 0x8024, 0x8029, 0x802f, 0x8035,
|
||||
0x803c, 0x8044, 0x804d, 0x8057, 0x8062, 0x806f, 0x807e, 0x808f,
|
||||
0x80a2, 0x80b8, 0x80d0, 0x80ec, 0x810b, 0x812e, 0x8156, 0x8183,
|
||||
0x81b7, 0x81f1, 0x8232, 0x827c, 0x82d0, 0x832f, 0x839a, 0x8412,
|
||||
0x849b, 0x8535, 0x85e2, 0x86a5, 0x8781, 0x8878, 0x898e, 0x8ac6,
|
||||
0x8c24, 0x8dac, 0x8f62, 0x914b, 0x936b, 0x95c9, 0x9869, 0x9b50,
|
||||
0x9e84, 0xa20a, 0xa5e6, 0xaa1e, 0xaeb3, 0xb3aa, 0xb903, 0xbebe,
|
||||
0xc4d9, 0xcb52, 0xd221, 0xd941, 0xe0a7, 0xe847, 0xf015, 0xf803,
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NNOM_LOCAL_H__ */
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-02-05 Jianjia Ma The first version
|
||||
* 2019-02-10 Jianjia Ma Compiler supports dense net connection
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_TENSOR_H__
|
||||
#define __NNOM_TENSOR_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "nnom.h"
|
||||
|
||||
|
||||
void delete_tensor(nnom_tensor_t* t);
|
||||
nnom_tensor_t* new_tensor(nnom_qtype_t type, uint32_t num_dim, uint32_t num_channel);
|
||||
// set tensor by value
|
||||
// for tensor with quantized type NNOM_QTYPE_PER_TENSOR
|
||||
nnom_tensor_t* tensor_set_attr_v(nnom_tensor_t* t,
|
||||
nnom_qformat_param_t dec_bit, nnom_qformat_param_t offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth);
|
||||
nnom_tensor_t* tensor_set_attr(nnom_tensor_t* t,
|
||||
nnom_qformat_param_t*dec_bit, nnom_qformat_param_t *offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth);
|
||||
nnom_tensor_t* tensor_cpy_attr(nnom_tensor_t* des, nnom_tensor_t* src);
|
||||
size_t tensor_get_num_channel(nnom_tensor_t* t);
|
||||
size_t tensor_size(nnom_tensor_t* t);
|
||||
size_t tensor_size_byte(nnom_tensor_t* t);
|
||||
|
||||
// only support 3d tensor
|
||||
// change format from CHW to HWC
|
||||
// the shape of the data, input data, output data
|
||||
void tensor_hwc2chw_q7(nnom_tensor_t* des, nnom_tensor_t* src);
|
||||
|
||||
// change format from CHW to HWC
|
||||
// the shape of the data, input data, output data
|
||||
void tensor_chw2hwc_q7(nnom_tensor_t* des, nnom_tensor_t* src);
|
||||
|
||||
// deprecated.
|
||||
void hwc2chw_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out);
|
||||
void chw2hwc_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /*__NNOM_TENSOR_H__ */
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-02-05 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_UTILS_H__
|
||||
#define __NNOM_UTILS_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
|
||||
typedef struct _nnom_predict_t
|
||||
{
|
||||
uint16_t *confusion_mat; // confusiong matrix
|
||||
uint32_t *top_k; // which stored the num of prediction in rank_k, example: Top-2 = top_k[0]+top_k[1]
|
||||
nnom_model_t *model; // the model to run
|
||||
int8_t *buf_prediction; // the pointer to the output of softmax layer(normally the end of classifier).
|
||||
|
||||
// setting
|
||||
uint32_t label_num; // number of types in classification
|
||||
uint32_t top_k_size; // number of k that wants to know.
|
||||
|
||||
// running
|
||||
uint32_t predict_count; // how many prediction is done
|
||||
|
||||
//timing
|
||||
uint32_t t_run_total; // total running time
|
||||
uint32_t t_predict_start; // when it is initial
|
||||
uint32_t t_predict_total; // total time of the whole test
|
||||
} nnom_predict_t;
|
||||
|
||||
// create a prediction
|
||||
// input model, the buf pointer to the softwmax output (Temporary, this can be extract from model)
|
||||
// the size of softmax output (the num of lable)
|
||||
// the top k that wants to record.
|
||||
nnom_predict_t *prediction_create(nnom_model_t *m, int8_t *buf_prediction, size_t label_num, size_t top_k_size); // currently int8_t
|
||||
|
||||
// after a new data is set in input
|
||||
// feed data to prediction
|
||||
// input the current label, (range from 0 to total number of label -1)
|
||||
// (the current input data should be set by user manully to the input buffer of the model.)
|
||||
// return NN_ARGUMENT_ERROR if parameter error
|
||||
nnom_status_t prediction_run(nnom_predict_t *pre, uint32_t true_label, uint32_t* predict_label, float* prob);
|
||||
|
||||
// to mark prediction finished
|
||||
void prediction_end(nnom_predict_t *pre);
|
||||
|
||||
// free all resources
|
||||
void prediction_delete(nnom_predict_t *pre);
|
||||
|
||||
// print matrix
|
||||
void prediction_matrix(nnom_predict_t *pre);
|
||||
|
||||
// print top-k
|
||||
void prediction_top_k(nnom_predict_t *pre);
|
||||
|
||||
// this function is to print sumarry
|
||||
void prediction_summary(nnom_predict_t *pre);
|
||||
|
||||
// -------------------------------
|
||||
|
||||
// stand alone prediction API
|
||||
// this api test one set of data, return the prediction
|
||||
// return the predicted label
|
||||
// return NN_ARGUMENT_ERROR if parameter error
|
||||
nnom_status_t nnom_predict(nnom_model_t *m, uint32_t *label, float *prob);
|
||||
|
||||
void model_stat(nnom_model_t *m);
|
||||
|
||||
void model_io_format(nnom_model_t *m);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /*__NNOM_UTILS_H__ */
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-02-05 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#ifndef __NNOM_PORT_H__
|
||||
#define __NNOM_PORT_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <transform.h>
|
||||
|
||||
/* use static memory */
|
||||
// must set buf using "nnom_set_static_buf()" before creating a model.
|
||||
|
||||
/* dynamic memory interfaces */
|
||||
/* when libc is not available, you shall implement the below memory interfaces (libc equivalents). */
|
||||
#ifndef NNOM_USING_STATIC_MEMORY
|
||||
#define nnom_malloc(n) malloc(n)
|
||||
#define nnom_free(p) free(p)
|
||||
#endif
|
||||
|
||||
/* memory interface */
|
||||
/* when libc is not available, you shall implement your equivalent functions here */
|
||||
#define nnom_memset(p,v,s) memset(p,v,s)
|
||||
#define nnom_memcpy(dst,src,len) memcpy(dst,src,len)
|
||||
|
||||
/* runtime & debug */
|
||||
#define nnom_us_get() 0 // return a microsecond timestamp
|
||||
#define nnom_ms_get() 0 // return a millisecond timestamp
|
||||
#define NNOM_LOG(...) printf(__VA_ARGS__)
|
||||
|
||||
/* NNoM configuration */
|
||||
#define NNOM_BLOCK_NUM (8) // maximum number of memory blocks, increase it when log request.
|
||||
#define DENSE_WEIGHT_OPT (1) // if used fully connected layer optimized weights.
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
fully_connected_opt_weight_generation.py - is from https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN/Scripts/NNFunctions witch is not a part of NNoM
|
||||
|
||||
Please refer to NNoM documents for its usages.
|
||||
|
|
@ -0,0 +1 @@
|
|||
# package
|
|
@ -0,0 +1,153 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
'''
|
||||
This file is apart of CMSIS-NN release
|
||||
https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN/Scripts/NNFunctions
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
|
||||
def convert_to_x4_q7_weights(weights):
|
||||
[r, h, w, c] = weights.shape
|
||||
weights = np.reshape(weights, (r, h*w*c))
|
||||
num_of_rows = r
|
||||
num_of_cols = h*w*c
|
||||
new_weights = np.copy(weights)
|
||||
new_weights = np.reshape(new_weights, (r*h*w*c))
|
||||
counter = 0
|
||||
for i in range(int(num_of_rows/4)):
|
||||
# we only need to do the re-ordering for every 4 rows
|
||||
row_base = 4*i
|
||||
for j in range(int(num_of_cols/4)):
|
||||
# for each 4 entries
|
||||
column_base = 4*j
|
||||
new_weights[counter] = weights[row_base ][column_base ]
|
||||
new_weights[counter+1] = weights[row_base+1][column_base ]
|
||||
new_weights[counter+2] = weights[row_base ][column_base+2]
|
||||
new_weights[counter+3] = weights[row_base+1][column_base+2]
|
||||
new_weights[counter+4] = weights[row_base+2][column_base ]
|
||||
new_weights[counter+5] = weights[row_base+3][column_base ]
|
||||
new_weights[counter+6] = weights[row_base+2][column_base+2]
|
||||
new_weights[counter+7] = weights[row_base+3][column_base+2]
|
||||
|
||||
new_weights[counter+8] = weights[row_base ][column_base+1]
|
||||
new_weights[counter+9] = weights[row_base+1][column_base+1]
|
||||
new_weights[counter+10] = weights[row_base ][column_base+3]
|
||||
new_weights[counter+11] = weights[row_base+1][column_base+3]
|
||||
new_weights[counter+12] = weights[row_base+2][column_base+1]
|
||||
new_weights[counter+13] = weights[row_base+3][column_base+1]
|
||||
new_weights[counter+14] = weights[row_base+2][column_base+3]
|
||||
new_weights[counter+15] = weights[row_base+3][column_base+3]
|
||||
counter = counter + 16
|
||||
# the remaining ones are in order
|
||||
for j in range((int)(num_of_cols-num_of_cols%4), int(num_of_cols)):
|
||||
new_weights[counter] = weights[row_base][j]
|
||||
new_weights[counter+1] = weights[row_base+1][j]
|
||||
new_weights[counter+2] = weights[row_base+2][j]
|
||||
new_weights[counter+3] = weights[row_base+3][j]
|
||||
counter = counter + 4
|
||||
return new_weights
|
||||
|
||||
def convert_to_x4_q15_weights(weights):
|
||||
[r, h, w, c] = weights.shape
|
||||
weights = np.reshape(weights, (r, h*w*c))
|
||||
num_of_rows = r
|
||||
num_of_cols = h*w*c
|
||||
new_weights = np.copy(weights)
|
||||
new_weights = np.reshape(new_weights, (r*h*w*c))
|
||||
counter = 0
|
||||
for i in range(int(num_of_rows/4)):
|
||||
# we only need to do the re-ordering for every 4 rows
|
||||
row_base = 4*i
|
||||
for j in range(int(num_of_cols/2)):
|
||||
# for each 2 entries
|
||||
column_base = 2*j
|
||||
new_weights[counter] = weights[row_base ][column_base ]
|
||||
new_weights[counter+1] = weights[row_base ][column_base+1]
|
||||
new_weights[counter+2] = weights[row_base+1][column_base ]
|
||||
new_weights[counter+3] = weights[row_base+1][column_base+1]
|
||||
new_weights[counter+4] = weights[row_base+2][column_base ]
|
||||
new_weights[counter+5] = weights[row_base+2][column_base+1]
|
||||
new_weights[counter+6] = weights[row_base+3][column_base ]
|
||||
new_weights[counter+7] = weights[row_base+3][column_base+1]
|
||||
|
||||
counter = counter + 8
|
||||
# the remaining ones are in order
|
||||
for j in range((int)(num_of_cols-num_of_cols%2), int(num_of_cols)):
|
||||
new_weights[counter] = weights[row_base][j]
|
||||
new_weights[counter+1] = weights[row_base+1][j]
|
||||
new_weights[counter+2] = weights[row_base+2][j]
|
||||
new_weights[counter+3] = weights[row_base+3][j]
|
||||
counter = counter + 4
|
||||
return new_weights
|
||||
|
||||
def convert_q7_q15_weights(weights):
|
||||
[r, h, w, c] = weights.shape
|
||||
weights = np.reshape(weights, (r, h*w*c))
|
||||
num_of_rows = r
|
||||
num_of_cols = h*w*c
|
||||
new_weights = np.copy(weights)
|
||||
new_weights = np.reshape(new_weights, (r*h*w*c))
|
||||
counter = 0
|
||||
for i in range(int(num_of_rows/4)):
|
||||
# we only need to do the re-ordering for every 4 rows
|
||||
row_base = 4*i
|
||||
for j in range(int(num_of_cols/2)):
|
||||
# for each 2 entries
|
||||
column_base = 2*j
|
||||
new_weights[counter] = weights[row_base ][column_base ]
|
||||
new_weights[counter+1] = weights[row_base+1][column_base ]
|
||||
new_weights[counter+2] = weights[row_base ][column_base+1]
|
||||
new_weights[counter+3] = weights[row_base+1][column_base+1]
|
||||
new_weights[counter+4] = weights[row_base+2][column_base ]
|
||||
new_weights[counter+5] = weights[row_base+3][column_base ]
|
||||
new_weights[counter+6] = weights[row_base+2][column_base+1]
|
||||
new_weights[counter+7] = weights[row_base+3][column_base+1]
|
||||
|
||||
counter = counter + 8
|
||||
# the remaining ones are in order
|
||||
for j in range((int)(num_of_cols-num_of_cols%2), int(num_of_cols)):
|
||||
new_weights[counter] = weights[row_base][j]
|
||||
new_weights[counter+1] = weights[row_base+1][j]
|
||||
new_weights[counter+2] = weights[row_base+2][j]
|
||||
new_weights[counter+3] = weights[row_base+3][j]
|
||||
counter = counter + 4
|
||||
return new_weights
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# input dimensions
|
||||
vec_dim = 127
|
||||
row_dim = 127
|
||||
|
||||
weight = np.zeros((row_dim,vec_dim), dtype=int)
|
||||
|
||||
# generate random inputs
|
||||
for i in range(row_dim):
|
||||
for j in range(vec_dim):
|
||||
weight[i][j] = np.random.randint(256)-128
|
||||
|
||||
weight = np.reshape(weight, (row_dim, vec_dim, 1, 1))
|
||||
|
||||
outfile = open("../Ref_Implementations/fully_connected_testing_weights.h", "w")
|
||||
outfile.write("#define IP2_WEIGHT {")
|
||||
weight.tofile(outfile,sep=",",format="%d")
|
||||
outfile.write("}\n\n")
|
||||
|
||||
new_weight = convert_to_x4_q7_weights(weight)
|
||||
outfile.write("#define IP4_WEIGHT {")
|
||||
new_weight.tofile(outfile,sep=",",format="%d")
|
||||
outfile.write("}\n\n")
|
||||
|
||||
new_weight = convert_q7_q15_weights(weight)
|
||||
outfile.write("#define IP4_q7_q15_WEIGHT {")
|
||||
new_weight.tofile(outfile,sep=",",format="%d")
|
||||
outfile.write("}\n\n")
|
||||
|
||||
new_weight = convert_to_x4_q15_weights(weight)
|
||||
outfile.write("#define IP4_WEIGHT_Q15 {")
|
||||
new_weight.tofile(outfile,sep=",",format="%d")
|
||||
outfile.write("}\n\n")
|
||||
|
||||
|
||||
outfile.close()
|
|
@ -0,0 +1,561 @@
|
|||
'''
|
||||
Copyright (c) 2018-2020
|
||||
Jianjia Ma
|
||||
majianjia@live.com
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
Change Logs:
|
||||
Date Author Notes
|
||||
2020-05-22 Jianjia Ma The first version
|
||||
'''
|
||||
from tensorflow.keras.layers import *
|
||||
import numpy as np
|
||||
|
||||
def convert_tensor_name(t):
|
||||
return 'tensor_'+t.name.replace('/', '_').replace(':', '_')
|
||||
|
||||
def to_cstyle(data, integer=True):
|
||||
#Convert an array to C style basket, not to be used for very large array. size > options['threshold'] will lead to ...
|
||||
if(integer):
|
||||
data = np.array(data, dtype=np.int).flatten()
|
||||
else:
|
||||
data = np.array(data).flatten()
|
||||
s = np.array2string(data, separator=',')
|
||||
s = s.replace("\n","").replace("\r","").replace(' ','')
|
||||
s = s.replace(',', ', ')
|
||||
s = s.replace('(', '[').replace(')', ']')
|
||||
return s.replace('[', '{').replace(']', '}')
|
||||
|
||||
def tensor_shape(tensor, is_io_tensor=False):
|
||||
# inconsistance of TF1 and TF2
|
||||
# get tensor shape without None or ?
|
||||
try:
|
||||
shape = tensor.shape.as_list() # tf1
|
||||
except:
|
||||
shape = tensor.get_shape().as_list() # tf2
|
||||
if(shape[0] == None or is_io_tensor):
|
||||
shape = shape[1:]
|
||||
else:
|
||||
shape = shape
|
||||
# for rnn input with timestamp = None, need a better implementation
|
||||
for i in range(len(shape)):
|
||||
shape[i] = shape[i] if shape[i] is not None else 1
|
||||
return shape
|
||||
|
||||
def gen_base_config(layer):
|
||||
config = '{.name = "%s"}' % (layer.name)
|
||||
return config
|
||||
|
||||
def gen_values(var_name, var, size='', dtype='const int8_t'):
|
||||
s = '<dtype> <var_name>[<size>] = <var>;\n'
|
||||
s = s.replace('<var_name>', var_name).replace('<var>', var).replace('<size>', size).replace('<dtype>', dtype)
|
||||
return s
|
||||
|
||||
# generate tensor by the tensor config
|
||||
def gen_tensor(tensor, dec_bits, tensor_value='NULL', per_axis=False, is_io_tensor=False):
|
||||
config = '''
|
||||
const nnom_shape_data_t <tensor_name>_dim[] = <dim>;
|
||||
const nnom_qformat_param_t <tensor_name>_dec[] = <q_dec>;
|
||||
const nnom_qformat_param_t <tensor_name>_offset[] = <q_offset>;
|
||||
const nnom_tensor_t <tensor_name> = {
|
||||
.p_data = (void*)<value>,
|
||||
.dim = (nnom_shape_data_t*)<tensor_name>_dim,
|
||||
.q_dec = (nnom_qformat_param_t*)<tensor_name>_dec,
|
||||
.q_offset = (nnom_qformat_param_t*)<tensor_name>_offset,
|
||||
.qtype = <qtype>,
|
||||
.num_dim = <num_dim>,
|
||||
.bitwidth = <bitwidth>
|
||||
};
|
||||
'''
|
||||
# inconsistance of TF1 and TF2
|
||||
shape = tensor_shape(tensor, is_io_tensor)
|
||||
config = config.replace('<tensor_name>', convert_tensor_name(tensor))#.name.replace('/','_').split(':')[0]) #conv2d/kernel:0
|
||||
config = config.replace('<bitwidth>', '8')
|
||||
config = config.replace('<value>', tensor_value)
|
||||
config = config.replace('<dim>', to_cstyle(shape))
|
||||
config = config.replace('<num_dim>', str(len(shape)))
|
||||
if(type(dec_bits) == str):
|
||||
config = config.replace('<q_dec>', dec_bits)
|
||||
config = config.replace('<q_offset>', to_cstyle([0]))
|
||||
else:
|
||||
config = config.replace('<q_dec>', to_cstyle(dec_bits))
|
||||
config = config.replace('<q_offset>', to_cstyle([0]))
|
||||
if(per_axis):
|
||||
config = config.replace('<qtype>', 'NNOM_QTYPE_PER_AXIS')
|
||||
else:
|
||||
config = config.replace('<qtype>', 'NNOM_QTYPE_PER_TENSOR')
|
||||
return config
|
||||
|
||||
# create tensor by directly setting up the value
|
||||
def gen_create_tensor(tensor_name, shape, dec_bits, tensor_value='NULL', per_axis=False):
|
||||
config = '''
|
||||
const nnom_shape_data_t <tensor_name>_dim[] = <dim>;
|
||||
const nnom_qformat_param_t <tensor_name>_dec[] = <q_dec>;
|
||||
const nnom_qformat_param_t <tensor_name>_offset[] = <q_offset>;
|
||||
const nnom_tensor_t <tensor_name> = {
|
||||
.p_data = (void*)<value>,
|
||||
.dim = (nnom_shape_data_t*)<tensor_name>_dim,
|
||||
.q_dec = (nnom_qformat_param_t*)<tensor_name>_dec,
|
||||
.q_offset = (nnom_qformat_param_t*)<tensor_name>_offset,
|
||||
.qtype = <qtype>,
|
||||
.num_dim = <num_dim>,
|
||||
.bitwidth = <bitwidth>
|
||||
};
|
||||
'''
|
||||
config = config.replace('<tensor_name>', tensor_name)
|
||||
config = config.replace('<bitwidth>', '8')
|
||||
config = config.replace('<value>', tensor_value)
|
||||
config = config.replace('<dim>', to_cstyle(shape))
|
||||
config = config.replace('<num_dim>', str(len(shape)))
|
||||
if(type(dec_bits) == str):
|
||||
config = config.replace('<q_dec>', dec_bits)
|
||||
config = config.replace('<q_offset>', to_cstyle([0]))
|
||||
else:
|
||||
config = config.replace('<q_dec>', to_cstyle(dec_bits))
|
||||
config = config.replace('<q_offset>', to_cstyle([0]))
|
||||
if(per_axis):
|
||||
config = config.replace('<qtype>', 'NNOM_QTYPE_PER_AXIS')
|
||||
else:
|
||||
config = config.replace('<qtype>', 'NNOM_QTYPE_PER_TENSOR')
|
||||
return config
|
||||
|
||||
def gen_conv2d_config(layer, output_shifts, bias_shifts):
|
||||
c = '''
|
||||
const nnom_qformat_param_t <layer_name>_output_shift[] = <output_shift_values>;
|
||||
const nnom_qformat_param_t <layer_name>_bias_shift[] = <bias_shift_values>;
|
||||
const nnom_conv2d_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.qtype = <qtype>,
|
||||
.weight = (nnom_tensor_t*)&<weight>,
|
||||
.bias = (nnom_tensor_t*)&<bias>,
|
||||
.output_shift = (nnom_qformat_param_t *)&<layer_name>_output_shift,
|
||||
.bias_shift = (nnom_qformat_param_t *)&<layer_name>_bias_shift,
|
||||
.filter_size = <filter_size>,
|
||||
.kernel_size = <kernel_size>,
|
||||
.stride_size = <stride_size>,
|
||||
.padding_size = <padding_size>,
|
||||
.dilation_size = <dilation_size>,
|
||||
.padding_type = <padding_type>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<qtype>', "NNOM_QTYPE_PER_TENSOR")
|
||||
c = c.replace('<weight>',convert_tensor_name(layer.weights[0]))
|
||||
c = c.replace('<bias>',convert_tensor_name(layer.weights[1]))
|
||||
c = c.replace('<output_shift_values>', output_shifts)
|
||||
c = c.replace('<bias_shift_values>', bias_shifts)
|
||||
c = c.replace('<filter_size>', str(layer.filters) if layer.filters is not None else str(layer.depth_multiplier)) # output channel
|
||||
c = c.replace('<kernel_size>', to_cstyle(layer.kernel_size))
|
||||
c = c.replace('<stride_size>', to_cstyle(layer.strides))
|
||||
c = c.replace('<padding_size>', '{0, 0}') # not using it with keras, defined by padding type instead
|
||||
c = c.replace('<dilation_size>', to_cstyle(layer.dilation_rate))
|
||||
c = c.replace('<padding_type>', 'PADDING_'+layer.padding.upper())
|
||||
return c
|
||||
|
||||
def gen_conv2d_trans_config(layer, output_shifts, bias_shifts):
|
||||
c = '''
|
||||
const nnom_qformat_param_t <layer_name>_output_shift[] = <output_shift_values>;
|
||||
const nnom_qformat_param_t <layer_name>_bias_shift[] = <bias_shift_values>;
|
||||
const nnom_conv2d_trans_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.qtype = <qtype>,
|
||||
.weight = (nnom_tensor_t*)&<weight>,
|
||||
.bias = (nnom_tensor_t*)&<bias>,
|
||||
.output_shift = (nnom_qformat_param_t *)&<layer_name>_output_shift,
|
||||
.bias_shift = (nnom_qformat_param_t *)&<layer_name>_bias_shift,
|
||||
.filter_size = <filter_size>,
|
||||
.kernel_size = <kernel_size>,
|
||||
.stride_size = <stride_size>,
|
||||
.padding_size = <padding_size>,
|
||||
.dilation_size = <dilation_size>,
|
||||
.padding_type = <padding_type>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<qtype>', "NNOM_QTYPE_PER_TENSOR")
|
||||
c = c.replace('<weight>',convert_tensor_name(layer.weights[0]))
|
||||
c = c.replace('<bias>',convert_tensor_name(layer.weights[1]))
|
||||
c = c.replace('<output_shift_values>', output_shifts)
|
||||
c = c.replace('<bias_shift_values>', bias_shifts)
|
||||
c = c.replace('<filter_size>', str(layer.filters)) # output channel
|
||||
c = c.replace('<kernel_size>', to_cstyle(layer.kernel_size))
|
||||
c = c.replace('<stride_size>', to_cstyle(layer.strides))
|
||||
c = c.replace('<padding_size>', '{0, 0}') # not using it with keras, defined by padding type instead
|
||||
c = c.replace('<dilation_size>', to_cstyle(layer.dilation_rate))
|
||||
c = c.replace('<padding_type>', 'PADDING_'+layer.padding.upper())
|
||||
return c
|
||||
|
||||
def gen_dense_config(layer, output_shifts, bias_shift):
|
||||
c = '''
|
||||
const nnom_qformat_param_t <layer_name>_output_shift[] = <output_shift_values>;
|
||||
const nnom_qformat_param_t <layer_name>_bias_shift[] = <bias_shift_values>;
|
||||
const nnom_dense_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.qtype = <qtype>,
|
||||
.weight = (nnom_tensor_t*)&<weight>,
|
||||
.bias = (nnom_tensor_t*)&<bias>,
|
||||
.output_shift = (nnom_qformat_param_t *)&<layer_name>_output_shift,
|
||||
.bias_shift = (nnom_qformat_param_t *)&<layer_name>_bias_shift
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<qtype>', "NNOM_QTYPE_PER_TENSOR")
|
||||
c = c.replace('<weight>', convert_tensor_name(layer.weights[0]))
|
||||
c = c.replace('<bias>', convert_tensor_name(layer.weights[1]))
|
||||
c = c.replace('<output_shift_values>', output_shifts)
|
||||
c = c.replace('<bias_shift_values>', bias_shift)
|
||||
return c
|
||||
|
||||
def gen_io_config(layer, tensor_name):
|
||||
c = '''
|
||||
const nnom_io_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.tensor = (nnom_tensor_t*)&<tensor>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<tensor>', tensor_name)
|
||||
return c
|
||||
|
||||
def gen_output_config(previous_layer, dec_bits, output_num, value_name='nnom_output_data'): #cheat at the moments
|
||||
c = '''
|
||||
const nnom_shape_data_t <tensor_name>_dim[] = <dim>;
|
||||
const nnom_qformat_param_t <tensor_name>_dec[] = <q_dec>;
|
||||
const nnom_qformat_param_t <tensor_name>_offset[] = <q_offset>;
|
||||
const nnom_tensor_t <tensor_name> = {
|
||||
.p_data = (void*)<value>,
|
||||
.dim = (nnom_shape_data_t*)<tensor_name>_dim,
|
||||
.q_dec = (nnom_qformat_param_t*)<tensor_name>_dec,
|
||||
.q_offset = (nnom_qformat_param_t*)<tensor_name>_offset,
|
||||
.qtype = <qtype>,
|
||||
.num_dim = <num_dim>,
|
||||
.bitwidth = 8
|
||||
};
|
||||
|
||||
const nnom_io_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.tensor = (nnom_tensor_t*)&<tensor_name>
|
||||
};
|
||||
'''
|
||||
shape = tensor_shape(previous_layer.output, is_io_tensor=True)
|
||||
|
||||
c = c.replace('<tensor_name>', 'tensor_output'+str(output_num))
|
||||
c = c.replace('<layer_name>', 'output'+str(output_num))
|
||||
c = c.replace('<base_config>', '{.name = "output'+str(output_num)+'"}') # cheating at the moment.
|
||||
c = c.replace('<value>', value_name)
|
||||
c = c.replace('<qtype>', 'NNOM_QTYPE_PER_TENSOR')
|
||||
c = c.replace('<num_dim>', str(len(shape)))
|
||||
c = c.replace('<dim>', to_cstyle(shape))
|
||||
c = c.replace('<q_dec>', '{'+dec_bits+'}')
|
||||
c = c.replace('<q_offset>', to_cstyle([0]))
|
||||
return c
|
||||
|
||||
|
||||
def gen_pooling_config(layer, output_shifts='0'):
|
||||
c = '''
|
||||
const nnom_pool_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.padding_type = <padding_type>,
|
||||
.output_shift = <output_shift>,
|
||||
.kernel_size = <kernel_size>,
|
||||
.stride_size = <stride_size>,
|
||||
.num_dim = <num_dim>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<padding_type>', 'PADDING_'+layer.padding.upper())
|
||||
c = c.replace('<kernel_size>', to_cstyle(layer.pool_size))
|
||||
c = c.replace('<stride_size>', to_cstyle(layer.strides))
|
||||
c = c.replace('<num_dim>', str(len(layer.pool_size)))
|
||||
c = c.replace('<output_shift>', output_shifts) # not used at the moment
|
||||
return c
|
||||
|
||||
def gen_gl_pooling_config(layer, output_shifts='0'):
|
||||
c = '''
|
||||
const nnom_global_pool_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.output_shift = <output_shift>,
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<output_shift>', output_shifts)
|
||||
return c
|
||||
|
||||
|
||||
|
||||
def gen_matrix_config(layer, output_shift_name='0'):
|
||||
c = '''
|
||||
const nnom_matrix_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.output_shift = <output_shift>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<output_shift>', output_shift_name) # not used at the moment
|
||||
return c
|
||||
|
||||
def gen_zero_padding_config(layer):
|
||||
c = '''
|
||||
const nnom_zero_padding_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.pad = <padding>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
try:
|
||||
c = c.replace('<padding>', to_cstyle(sum(layer.padding, ())))
|
||||
except:
|
||||
pad = ((0, 0), layer.padding)
|
||||
c = c.replace('<padding>', to_cstyle(sum(pad, ())))
|
||||
return c
|
||||
|
||||
def gen_cropping_config(layer):
|
||||
c = '''
|
||||
const nnom_cropping_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.pad = <padding>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
try:
|
||||
c = c.replace('<padding>', to_cstyle(sum(layer.cropping, ()))) #((top_crop, bottom_crop), (left_crop, right_crop))
|
||||
except:
|
||||
pad = ((0, 0), layer.cropping)
|
||||
c = c.replace('<padding>', to_cstyle(sum(pad, ())))
|
||||
return c
|
||||
|
||||
def gen_upsampling_config(layer):
|
||||
c = '''
|
||||
const nnom_upsample_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.kernel = <kernel>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<kernel>', to_cstyle(layer.size))
|
||||
return c
|
||||
|
||||
def gen_softmax_config(layer):
|
||||
c = '''
|
||||
const nnom_softmax_config_t <layer_name>_config = {
|
||||
.super = <base_config>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
return c
|
||||
|
||||
def gen_flatten_config(layer):
|
||||
c = '''
|
||||
const nnom_flatten_config_t <layer_name>_config = {
|
||||
.super = <base_config>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
return c
|
||||
|
||||
def gen_reshape_config(layer):
|
||||
c = '''
|
||||
const nnom_shape_data_t <layer_name>_targeted_shape[] = <shape>;
|
||||
const nnom_reshape_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.dim = (nnom_shape_data_t*)<layer_name>_targeted_shape,
|
||||
.num_dim = <num_dim>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<shape>', to_cstyle(layer.output_shape[1:]))
|
||||
c = c.replace('<num_dim>', str(len(layer.output_shape[1:])))
|
||||
return c
|
||||
|
||||
def gen_concat_config(layer):
|
||||
c = '''
|
||||
const nnom_concat_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.axis = <axis>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<axis>', str(layer.axis))
|
||||
return c
|
||||
|
||||
def gen_lambda_config(layer, run_func_name='NULL', build_func_name='NULL', free_func_name='NULL', parameters_name='NULL'):
|
||||
c = '''
|
||||
const nnom_lambda_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.run_func_name = <run_func_name>,
|
||||
.build_func_name = <build_func_name>,
|
||||
.free_func_name = <free_func_name>,
|
||||
.parameters = <parameters_name>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<run_func_name>', run_func_name)
|
||||
c = c.replace('<build_func_name>', build_func_name)
|
||||
c = c.replace('<free_func_name>', free_func_name)
|
||||
c = c.replace('<parameters_name>', parameters_name)
|
||||
return c
|
||||
|
||||
def gen_rnn_config(layer):
|
||||
c = '''
|
||||
const nnom_rnn_config_t <layer_name>_config = {
|
||||
.super = <base_config>,
|
||||
.return_sequence = <return_sequence>,
|
||||
.stateful = <stateful>,
|
||||
.go_backwards = <go_backwards>
|
||||
};
|
||||
'''
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<stateful>', 'true' if layer.stateful else 'false')
|
||||
c = c.replace('<go_backwards>', 'true' if layer.go_backwards else 'false')
|
||||
c = c.replace('<return_sequence>', 'true' if layer.return_sequences else 'false')
|
||||
return c
|
||||
|
||||
def gen_simple_cell_config(layer, q_list):
|
||||
c = '''
|
||||
const nnom_simple_cell_config_t <layer_name>_simple_cell_config = {
|
||||
.super = <base_config>,
|
||||
.weights = (nnom_tensor_t*)&<weights>,
|
||||
.recurrent_weights = (nnom_tensor_t*)&<recurrent_weights>,
|
||||
.bias = (nnom_tensor_t*)&<bias>,
|
||||
.q_dec_iw = <q_dec_iw>,
|
||||
.q_dec_hw = <q_dec_hw>,
|
||||
.q_dec_h = <q_dec_h>,
|
||||
.act_type = <act_type>,
|
||||
.units = <units>
|
||||
};
|
||||
'''
|
||||
try:
|
||||
cell_cfg = layer.get_config()['cell']['config']
|
||||
except:
|
||||
cell_cfg = layer.get_config()
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<weights>', convert_tensor_name(layer.weights[0]))
|
||||
c = c.replace('<recurrent_weights>', convert_tensor_name(layer.weights[1]))
|
||||
c = c.replace('<bias>', convert_tensor_name(layer.weights[2]))
|
||||
c = c.replace('<q_dec_iw>', str(q_list[1])) # the qfmt of input x weight
|
||||
c = c.replace('<q_dec_hw>', str(q_list[2])) # q of hidden x recurrent weight
|
||||
c = c.replace('<q_dec_h>', str(q_list[0])) # output, if act != relu, should be 7 (consider delete it.)
|
||||
c = c.replace('<act_type>', 'ACT_' + cell_cfg['activation'].upper())
|
||||
c = c.replace('<units>', str(cell_cfg['units']))
|
||||
return c
|
||||
|
||||
def gen_lstm_cell_config(layer, q_list):
|
||||
c = '''
|
||||
const nnom_lstm_cell_config_t <layer_name>_lstm_cell_config = {
|
||||
.super = <base_config>,
|
||||
.weights = (nnom_tensor_t*)&<weights>,
|
||||
.recurrent_weights = (nnom_tensor_t*)&<recurrent_weights>,
|
||||
.bias = (nnom_tensor_t*)&<bias>,
|
||||
.q_dec_z = <q_dec_z>,
|
||||
.q_dec_h = <q_dec_h>,
|
||||
.q_dec_c = <q_dec_c>,
|
||||
.units = <units>
|
||||
};
|
||||
'''
|
||||
try:
|
||||
cell_cfg = layer.get_config()['cell']['config']
|
||||
except:
|
||||
cell_cfg = layer.get_config()
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<weights>', convert_tensor_name(layer.weights[0]))
|
||||
c = c.replace('<recurrent_weights>', convert_tensor_name(layer.weights[1]))
|
||||
c = c.replace('<bias>', convert_tensor_name(layer.weights[2]))
|
||||
c = c.replace('<q_dec_h>', str(q_list[0])) # output and memory state, (should be q0.7. consider delete it)
|
||||
c = c.replace('<q_dec_c>', str(q_list[1])) # cell state
|
||||
c = c.replace('<q_dec_z>', str(q_list[2])) # input*weight + hidden*weight + bias
|
||||
c = c.replace('<units>', str(cell_cfg['units']))
|
||||
return c
|
||||
|
||||
|
||||
|
||||
def gen_gru_cell_config(layer, q_list):
|
||||
c = '''
|
||||
const nnom_gru_cell_config_t <layer_name>_gru_cell_config = {
|
||||
.super = <base_config>,
|
||||
.weights = (nnom_tensor_t*)&<weights>,
|
||||
.recurrent_weights = (nnom_tensor_t*)&<recurrent_weights>,
|
||||
.bias = (nnom_tensor_t*)&<bias>,
|
||||
.q_dec_z = <q_dec_z>,
|
||||
.q_dec_h = <q_dec_h>,
|
||||
.units = <units>
|
||||
};
|
||||
'''
|
||||
try:
|
||||
cell_cfg = layer.get_config()['cell']['config']
|
||||
except:
|
||||
cell_cfg = layer.get_config()
|
||||
c = c.replace('<layer_name>', layer.name)
|
||||
c = c.replace('<base_config>', gen_base_config(layer))
|
||||
c = c.replace('<weights>', convert_tensor_name(layer.weights[0]))
|
||||
c = c.replace('<recurrent_weights>', convert_tensor_name(layer.weights[1]))
|
||||
c = c.replace('<bias>', convert_tensor_name(layer.weights[2]))
|
||||
c = c.replace('<q_dec_h>', str(q_list[0])) #
|
||||
c = c.replace('<q_dec_z>', str(q_list[1])) #
|
||||
c = c.replace('<units>', str(cell_cfg['units']))
|
||||
return c
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test only
|
||||
from tensorflow.keras.models import load_model
|
||||
model = load_model("../model.h5")
|
||||
print(gen_tensor(model.layers[1].weights[0], dec_bits=(1, 2, 3, 4, 5)))
|
||||
print(gen_tensor(model.layers[1].weights[1], dec_bits=(1, 2, 3, 4, 5)))
|
||||
print(gen_conv2d_config(model.layers[1], (1,2,3), 3))
|
||||
|
||||
with open("test.h", 'w') as fp:
|
||||
# fp.write(gen_tensor(model.layers[1].weights[0], dec_bits=(1, 2, 3, 4, 5)))
|
||||
# fp.write(gen_tensor(model.layers[1].weights[1], dec_bits=(1, 2, 3, 4, 5)))
|
||||
# fp.write(gen_conv2d_config(model.layers[1], (1,2,3,)))
|
||||
|
||||
fp.write('#include "nnom.h"\n')
|
||||
|
||||
# test all
|
||||
for layer in model.layers:
|
||||
if(type(layer) in [Conv2D, Conv1D]):
|
||||
for w in layer.weights:
|
||||
fp.write(gen_tensor(w, [3]))
|
||||
fp.write(gen_conv2d_config(layer, {0}, 2))
|
||||
elif(type(layer) in [Dense]):
|
||||
for w in layer.weights:
|
||||
fp.write(gen_tensor(w, [3]))
|
||||
fp.write(gen_dense_config(layer, 2, 2))
|
||||
elif(type(layer) in [Input]):
|
||||
fp.write(gen_io_config(layer, [9,1,1]))
|
||||
elif(type(layer) in [MaxPooling2D, GlobalMaxPooling2D, AveragePooling2D, GlobalAveragePooling2D]):
|
||||
fp.write(gen_pooling_config(layer))
|
||||
elif(type(layer) in [Multiply, Add, Subtract]):
|
||||
fp.write(gen_matrix_config(layer))
|
||||
elif(type(layer) in [ZeroPadding2D, ZeroPadding1D]):
|
||||
fp.write(gen_zero_padding_config(layer))
|
||||
elif(type(layer) in [Cropping2D, Cropping1D]):
|
||||
fp.write(gen_cropping_config(layer))
|
||||
elif(type(layer) in [Softmax]):
|
||||
fp.write(gen_softmax_config(layer))
|
||||
elif(type(layer) in [Flatten]):
|
||||
fp.write(gen_flatten_config(layer))
|
||||
elif(type(layer) in [Concatenate]):
|
||||
fp.write(gen_concat_config(layer))
|
||||
elif(type(layer) in [Lambda]):
|
||||
fp.write(gen_lambda_config(layer))
|
||||
elif(type(layer) in [UpSampling2D, UpSampling1D]):
|
||||
fp.write(gen_upsampling_config(layer))
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,845 @@
|
|||
'''
|
||||
Copyright (c) 2018-2020
|
||||
Jianjia Ma
|
||||
majianjia@live.com
|
||||
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
Change Logs:
|
||||
Date Author Notes
|
||||
2019-02-05 Jianjia Ma The first version
|
||||
|
||||
|
||||
This file provides:
|
||||
-> fake_quantisation layers which simulate the output quantisation on fixed-point NN models.
|
||||
-> weights/bias quantisation of Convolution and Dense Layer. "weight.h" file generations
|
||||
-> export "testing set" binary data file.
|
||||
-> print output ranges of each layers.
|
||||
|
||||
Currently, this script does not support RNN (type) layers.
|
||||
'''
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.layers import InputLayer
|
||||
from tensorflow.keras.models import Model
|
||||
|
||||
from sklearn import metrics
|
||||
from .fully_connected_opt_weight_generation import *
|
||||
import time
|
||||
import warnings
|
||||
|
||||
"""
|
||||
this is the generate the test set data to a bin file
|
||||
bin file can be used to validate the implementation in MCU
|
||||
|
||||
"""
|
||||
def generate_test_bin(x, y, name='test_data_with_label.bin'):
|
||||
'''
|
||||
this method generate the
|
||||
:param x: input x data size
|
||||
:param y: input label (one hot label)
|
||||
:return:
|
||||
'''
|
||||
# quantize input x
|
||||
min_value = np.min(x)
|
||||
max_value = np.max(x)
|
||||
|
||||
int_bits = int(np.ceil(np.log2(max(abs(min_value), abs(max_value)))))
|
||||
dec_bits = 7 - int_bits
|
||||
x = np.round(x*2**dec_bits).astype(np.int8)
|
||||
# get label
|
||||
if(len(y.shape) >1):
|
||||
test_label = np.argwhere(y == 1).astype(np.int8) # test data
|
||||
test_label = test_label[:, 1]
|
||||
else:
|
||||
test_label = y
|
||||
|
||||
# get data
|
||||
dat = x.astype(dtype="byte") # test data
|
||||
batch_size = dat.shape[0] # total pices of data
|
||||
dat = dat.flatten() # flatten to get the total size.
|
||||
block_size = int(dat.size / batch_size) # this must be integer but... just to confirm
|
||||
|
||||
# write (label x 128) (data_block x 128)
|
||||
label_batch = 128 # the Y-modem example uses 128 batch
|
||||
with open(name, 'wb') as f:
|
||||
start = 0
|
||||
while start <= (test_label.size - label_batch):
|
||||
test_label[start: start + label_batch].tofile(f)
|
||||
dat[block_size * start: block_size * (start + label_batch)].tofile(f)
|
||||
start += label_batch
|
||||
|
||||
# the rest data
|
||||
if (start < test_label.size):
|
||||
rest_len = test_label.size - start
|
||||
new_labls = test_label[start:]
|
||||
new_labls = np.pad(new_labls, (0, label_batch - rest_len), mode='constant')
|
||||
new_labls.tofile(f)
|
||||
dat[block_size * start:].tofile(f)
|
||||
|
||||
print("binary test file generated:", name)
|
||||
print("test data length:", test_label.size)
|
||||
return
|
||||
|
||||
def is_shift_layer(layer):
|
||||
''' layer which can change the output encoding'''
|
||||
#FIXME: add more which will change the output shift
|
||||
if('input' in layer.name or
|
||||
'conv2d' in layer.name or
|
||||
'conv1d' in layer.name or
|
||||
'dense' in layer.name or
|
||||
'softmax' in layer.name or
|
||||
'sigmoid' in layer.name or
|
||||
'tanh' in layer.name or
|
||||
('add' in layer.name and 'zero' not in layer.name) or # the name, zero_padding contains 'add'
|
||||
'subtract' in layer.name or
|
||||
'multiply' in layer.name or
|
||||
('activation' in layer.name and layer.get_config()['activation'] == 'softmax')or
|
||||
('activation' in layer.name and layer.get_config()['activation'] == 'sigmoid') or
|
||||
('activation' in layer.name and layer.get_config()['activation'] == 'tanh')
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
def is_shift_fixed(layer):
|
||||
''' layer which shift to a fixed value'''
|
||||
#FIXME: add more which will change the output shift
|
||||
if('softmax' in layer.name or
|
||||
'sigmoid' in layer.name or
|
||||
'tanh' in layer.name or
|
||||
('activation' in layer.name and layer.get_config()['activation'] == 'softmax') or
|
||||
('activation' in layer.name and layer.get_config()['activation'] == 'sigmoid') or
|
||||
('activation' in layer.name and layer.get_config()['activation'] == 'tanh')
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
def fuse_bn_to_conv(layer):
|
||||
# try to fuse BN layer to convolutional
|
||||
if ('conv' in layer.name) and \
|
||||
('batch_normalization' in layer._outbound_nodes[0].outbound_layer.name):
|
||||
|
||||
print("fusing batch normalization to", layer.name)
|
||||
bn_layer = layer._outbound_nodes[0].outbound_layer
|
||||
c_w = layer.get_weights()[0]
|
||||
c_b = layer.get_weights()[1]
|
||||
print('original weight max', c_w.max(), 'min', c_w.min())
|
||||
print('original bias max', c_b.max(), 'min', c_b.min())
|
||||
bn_gamma = bn_layer.get_weights()[0]
|
||||
bn_beta = bn_layer.get_weights()[1]
|
||||
bn_mean = bn_layer.get_weights()[2]
|
||||
bn_variance = bn_layer.get_weights()[3]
|
||||
|
||||
if ('conv2d' in layer.name):
|
||||
epsilon = 1e-3 # default epsilon for tf.slim.batch_norm
|
||||
for l in range(c_w.shape[3]):
|
||||
for k in range(c_w.shape[2]):
|
||||
for j in range(c_w.shape[1]):
|
||||
for i in range(c_w.shape[0]):
|
||||
if "depthwise" in layer.name: # depthwise batchnorm params are ordered differently
|
||||
c_w[i][j][k][l] *= bn_gamma[k] / np.sqrt(bn_variance[k] + epsilon)
|
||||
else:
|
||||
c_w[i][j][k][l] *= bn_gamma[l] / np.sqrt(bn_variance[l] + epsilon)
|
||||
|
||||
if "depthwise" in layer.name:
|
||||
depth_dim = c_w.shape[2]
|
||||
else:
|
||||
depth_dim = c_w.shape[3]
|
||||
for l in range(depth_dim):
|
||||
c_b[l] = (bn_gamma[l] * (c_b[l] - bn_mean[l]) / np.sqrt(bn_variance[l] + epsilon)) + bn_beta[l]
|
||||
# conv1d
|
||||
else:
|
||||
epsilon = 1e-3 # default epsilon for tf.slim.batch_norm
|
||||
for k in range(c_w.shape[2]):
|
||||
for j in range(c_w.shape[1]):
|
||||
for i in range(c_w.shape[0]):
|
||||
if "depthwise" in layer.name: # depthwise batchnorm params are ordered differently
|
||||
c_w[i][j][k] *= bn_gamma[j] / np.sqrt(bn_variance[j] + epsilon)
|
||||
else:
|
||||
c_w[i][j][k] *= bn_gamma[k] / np.sqrt(bn_variance[k] + epsilon)
|
||||
|
||||
if "depthwise" in layer.name:
|
||||
depth_dim = c_w.shape[1]
|
||||
else:
|
||||
depth_dim = c_w.shape[2]
|
||||
for l in range(depth_dim):
|
||||
c_b[l] = (bn_gamma[l] * (c_b[l] - bn_mean[l]) / np.sqrt(bn_variance[l] + epsilon)) + bn_beta[l]
|
||||
|
||||
print('fused weight max', c_w.max(), 'min', c_w.min())
|
||||
print('fused bias max', c_b.max(), 'min', c_b.min())
|
||||
# write the weights back to the layer
|
||||
# after that, the model will be destroyed.. need a better way to pass the new weight
|
||||
layer.set_weights([c_w, c_b])
|
||||
|
||||
def generate_weights(model, name='weights.h', format='hwc', shift_list=None):
|
||||
# Quantize weights to 8-bits using (min,max) and write to file
|
||||
f = open(name, 'w')
|
||||
f.write('#include "nnom.h"\n\n')
|
||||
f.close()
|
||||
|
||||
for curr_idx, layer in enumerate(model.layers):
|
||||
if (not layer.weights):
|
||||
continue
|
||||
|
||||
# before merging bn layer, check if the bn is "legally" after Conv
|
||||
if('batch_normalization' in layer.name) and \
|
||||
('conv' not in layer.inbound_nodes[0].inbound_layers.name):
|
||||
raise Exception('Currently only support batch_normalization after conv', layer.name,
|
||||
layer._inbound_nodes[0].inbound_layers[0].name)
|
||||
|
||||
# try to fuse BN layer to convolutional
|
||||
if ('conv' in layer.name) and \
|
||||
('batch_normalization' in layer.outbound_nodes[0].outbound_layer.name):
|
||||
fuse_bn_to_conv(layer)
|
||||
|
||||
# generate weights and bias now
|
||||
weight_dec_shift = 0
|
||||
print('weights for layer', layer.name)
|
||||
for var in layer.weights:
|
||||
var_name = str(var.name)
|
||||
if("kernel" in var_name ):
|
||||
var_values = layer.get_weights()[0] # weight
|
||||
print(" weight:", var_name)
|
||||
elif("bias" in var_name):
|
||||
var_values = layer.get_weights()[1] # bias
|
||||
print(" bias: ",var_name)
|
||||
else:
|
||||
continue
|
||||
|
||||
print(" original shape: ", var_values.shape)
|
||||
min_value = np.min(var_values)
|
||||
max_value = np.max(var_values)
|
||||
|
||||
int_bits = int(np.ceil(np.log2(max(abs(min_value), abs(max_value)))))
|
||||
dec_bits = 7 - int_bits
|
||||
print(" dec bit", dec_bits)
|
||||
bSameAsKernel = False
|
||||
if(is_shift_layer(layer)):
|
||||
bSameAsKernel = False
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
input_encoding = shift_list[inp]
|
||||
if ("kernel" in var_name):
|
||||
weight_dec_shift = dec_bits
|
||||
else:
|
||||
shift = input_encoding+weight_dec_shift-dec_bits
|
||||
if(shift < 0):
|
||||
bSameAsKernel = True
|
||||
if(shift_list is None or bSameAsKernel):
|
||||
# check if bias shift > weight shift, then reduce bias shift to weight shift
|
||||
if ("kernel" in var_name):
|
||||
weight_dec_shift = dec_bits
|
||||
else:
|
||||
if(dec_bits > weight_dec_shift):
|
||||
dec_bits = weight_dec_shift
|
||||
print(" new dec bit", dec_bits)
|
||||
|
||||
# convert to [-128,128) or int8
|
||||
var_values = np.round(var_values * 2 ** dec_bits)
|
||||
var_name = var_name.replace('/', '_')
|
||||
var_name = var_name.replace(':', '_')
|
||||
with open(name, 'a') as f:
|
||||
f.write('#define ' + var_name.upper() + ' {')
|
||||
# CHW format
|
||||
if ('chw' in format):
|
||||
if "dense" in var_name and "kernel" in var_name:
|
||||
transposed_wts = np.transpose(var_values)
|
||||
transposed_wts = convert_to_x4_q7_weights(
|
||||
np.reshape(transposed_wts, (transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
|
||||
# all other kernels, bias stay the same
|
||||
else:
|
||||
transposed_wts = var_values
|
||||
# HWC format
|
||||
else:
|
||||
if (len(var_values.shape) == 3): # 1D convolution layer weights
|
||||
transposed_wts = np.transpose(var_values, (2, 0, 1))
|
||||
elif (len(var_values.shape) == 4): # 2D convolution layer weights
|
||||
transposed_wts = np.transpose(var_values, (3, 0, 1, 2))
|
||||
else: # fully connected layer weights or biases of any layer
|
||||
# test, use opt weight reorder
|
||||
if "dense" in var_name and "kernel" in var_name:
|
||||
transposed_wts = np.transpose(var_values)
|
||||
transposed_wts = convert_to_x4_q7_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
|
||||
else:
|
||||
transposed_wts = np.transpose(var_values)
|
||||
|
||||
print(" reshape to:",transposed_wts.shape)
|
||||
|
||||
with open(name, 'a') as f:
|
||||
transposed_wts.tofile(f, sep=", ", format="%d")
|
||||
f.write('}\n\n')
|
||||
if ("bias" in var_name):
|
||||
f.write('#define ' + var_name.upper() + '_SHIFT ' + '(' + str(dec_bits) + ')\n\n\n')
|
||||
if ("kernel" in var_name ):
|
||||
f.write('#define ' + var_name.upper() + '_SHIFT ' + '(' + str(dec_bits) + ')\n\n')
|
||||
"""
|
||||
# for checking the quantised and dequantised range.
|
||||
with K.tf.Session() as session:
|
||||
# convert back original range but quantized to 8-bits or 256 levels
|
||||
var_values = var_values / (2 ** dec_bits)
|
||||
var_values = session.run(K.tf.assign(var, var_values))
|
||||
print(' '+var_name + ' number of wts/bias: ' + str(var_values.shape) + \
|
||||
' dec bits: ' + str(dec_bits) + \
|
||||
' max: (' + str(np.max(var_values)) + ',' + str(max_value) + ')' + \
|
||||
' min: (' + str(np.min(var_values)) + ',' + str(min_value) + ')')
|
||||
"""
|
||||
|
||||
def layers_output_ranges(model, x_test, quantize_method='max_min', calibrate_size=1000):
|
||||
# limit the test data size
|
||||
np.random.shuffle(x_test)
|
||||
if(x_test.shape[0] > calibrate_size):
|
||||
x_test = x_test[:1000]
|
||||
# test, show the output ranges
|
||||
shift_list = {}
|
||||
# FIXME: only support one input
|
||||
if(type(model.layers[0]) != InputLayer):
|
||||
L = [model.input] + model.layers
|
||||
else:
|
||||
L = model.layers
|
||||
last_layer = None
|
||||
|
||||
for layer in L: # layer loop
|
||||
if("input" in layer.name):
|
||||
features = x_test
|
||||
else:
|
||||
# batch_normalization will need to be handled differently, since we are fusing the weight to its predecessor.
|
||||
# sigmoid and tanh are different, their shift is fixed to 7
|
||||
if(is_shift_layer(layer) or
|
||||
('batch_normalization' in layer.name)):
|
||||
layer_model = Model(inputs=model.input, outputs=layer.output)
|
||||
features = layer_model.predict(x_test)
|
||||
else:
|
||||
# leave the features not changed, so this layer shift will be the same
|
||||
# as its inputs
|
||||
pass
|
||||
# calculate no saturation shift
|
||||
max_val = features.max()
|
||||
min_val = features.min()
|
||||
int_bits = int(np.ceil(np.log2(max(abs(max_val), abs(min_val)))))
|
||||
dec_bits = 7 - int_bits
|
||||
|
||||
# saturation shift, using KLD method
|
||||
# Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
|
||||
if('kld' in quantize_method and not is_shift_fixed(layer) and "input" not in layer.name and "dense" not in layer.name): # test, also do not use kld in input layer
|
||||
import scipy.stats
|
||||
abs_max = max(abs(max_val), abs(min_val))
|
||||
small_var = 1e-5
|
||||
bins = np.arange(-abs_max, abs_max, abs_max/2048*2)
|
||||
q_bins = np.arange(-abs_max, abs_max, abs_max/256*2)
|
||||
flat_hist = np.histogram(features.flatten(), bins=bins)[0]
|
||||
kl_loss = []
|
||||
kl_shifts = []
|
||||
for shift in range(4):
|
||||
t = 2 ** (dec_bits + shift) # 2-based threshold
|
||||
act = np.round(features.flatten() * t)
|
||||
act = act / t
|
||||
act = np.clip(act, -128/t, 127/t)
|
||||
act = np.histogram(act, bins=q_bins)[0]
|
||||
act_hist = np.zeros(2047)
|
||||
chunk = int(2048/256)
|
||||
for i in range(int(255)):
|
||||
none_zero = np.count_nonzero(flat_hist[i*chunk:(i+1)*chunk])
|
||||
if none_zero == 0:
|
||||
continue
|
||||
for j in range(chunk):
|
||||
act_hist[i*chunk+j] = act[i]/none_zero if flat_hist[i*chunk+j] != 0 else 0
|
||||
flat_hist[flat_hist==0] = small_var
|
||||
act_hist[act_hist==0] = small_var
|
||||
kl = scipy.stats.entropy(flat_hist, act_hist)
|
||||
kl_loss.append(kl)
|
||||
kl_shifts.append(dec_bits + shift)
|
||||
"""
|
||||
ax = plt.subplot(8, 1, shift+1)
|
||||
ax.plot(flat_hist)
|
||||
ax.plot(act_hist)
|
||||
"""
|
||||
new_dec = kl_shifts[np.argmin(kl_loss)] # set the dec_bit to the KLD results
|
||||
#plt.show()
|
||||
print("KLD loss", kl_loss)
|
||||
print("KLD shift", kl_shifts)
|
||||
if(new_dec != dec_bits):
|
||||
print(layer.name,"is using KLD method, original shift",dec_bits, "KLD results", new_dec)
|
||||
dec_bits = new_dec
|
||||
|
||||
print( layer.name, "max value:", max_val, "min value:", min_val,"dec bit", dec_bits)
|
||||
# record the shift
|
||||
if(type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
|
||||
shift_list[layer.name.split(':')[0]] = dec_bits
|
||||
else:
|
||||
shift_list[layer.name] = dec_bits
|
||||
if ('batch_normalization' in layer.name):
|
||||
shift_list[last_layer.name] = dec_bits # use the bn layer shift to update the last layer.
|
||||
last_layer = layer
|
||||
|
||||
LM = {}
|
||||
for layer in model.layers:
|
||||
LM[layer.name] = layer
|
||||
L = [l for l in model.layers[1:]]
|
||||
L.reverse()
|
||||
|
||||
def update_previous_layer_shift(layer, Q):
|
||||
if(type(layer.input) == list):
|
||||
for inp in layer.input:
|
||||
iname = inp.name.split('/')[0]
|
||||
if('input' in iname):
|
||||
continue
|
||||
shift_list[iname] = Qmin
|
||||
if(not is_shift_layer(LM[iname])):
|
||||
update_previous_layer_shift(LM[iname], Q)
|
||||
else:
|
||||
iname = layer.input.name.split('/')[0]
|
||||
if('input' in iname):
|
||||
return
|
||||
shift_list[iname] = Qmin
|
||||
if(not is_shift_layer(LM[iname])):
|
||||
update_previous_layer_shift(LM[iname], Q)
|
||||
for layer in L:
|
||||
if(type(layer.input) == list):
|
||||
iname = layer.input[0].name.split('/')[0]
|
||||
Qmin = shift_list[iname]
|
||||
for inp in layer.input:
|
||||
iname = inp.name.split('/')[0]
|
||||
if(shift_list[iname] < Qmin):
|
||||
Qmin = shift_list[iname]
|
||||
if(shift_list[iname] != Qmin):
|
||||
bFlag = True
|
||||
for inp in layer.input:
|
||||
iname = inp.name.split('/')[0]
|
||||
shift_list[iname] = Qmin
|
||||
if(not is_shift_layer(LM[iname])):
|
||||
update_previous_layer_shift(LM[iname], Qmin)
|
||||
print('set shift', Qmin, 'for the input of', layer.name, ':', [inp.name.split('/')[0] for inp in layer.input])
|
||||
if(not is_shift_layer(layer) or Qmin < shift_list[layer.name]): # update current layer's shift only when we cannot change the shift
|
||||
shift_list[layer.name] = Qmin
|
||||
print("shift list", shift_list)
|
||||
return shift_list
|
||||
|
||||
def generate_model(model, x_test, name='weights.h', format='hwc', quantize_method='max_min'):
|
||||
shift_list = layers_output_ranges(model, x_test, quantize_method=quantize_method)
|
||||
generate_weights(model, name=name, format=format, shift_list=shift_list)
|
||||
if(type(model.layers[0]) != InputLayer):
|
||||
L = [model.input] + model.layers
|
||||
else:
|
||||
L = model.layers
|
||||
with open(name,'a') as fp:
|
||||
fp.write('\n/* output enconding for each layer */\n')
|
||||
for layer in L:
|
||||
if(type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
|
||||
iname = layer.name.split(':')[0]
|
||||
else:
|
||||
iname = layer.name
|
||||
fp.write('#define %s_OUTPUT_SHIFT %s\n'%(iname.upper(), shift_list[iname]))
|
||||
fp.write('\n/* bias shift and output shift for each layer */\n')
|
||||
for layer in model.layers:
|
||||
if(is_shift_layer(layer)):
|
||||
iname = layer.name.upper()
|
||||
if(len(layer.weights) == 2 and
|
||||
'kernel' in layer.weights[0].name and
|
||||
'bias' in layer.weights[1].name):
|
||||
kname = layer.weights[0].name.upper().replace('/', '_').replace(':', '_')
|
||||
bname = layer.weights[1].name.upper().replace('/', '_').replace(':', '_')
|
||||
inp = layer.input.name.replace(':','/').split('/')[0].upper()
|
||||
fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_SHIFT+{2}_SHIFT-{0}_OUTPUT_SHIFT)\n'.format(
|
||||
iname, inp, kname))
|
||||
fp.write('#define {0}_BIAS_LSHIFT ({1}_OUTPUT_SHIFT+{2}_SHIFT-{3}_SHIFT)\n'.format(
|
||||
iname, inp, kname, bname))
|
||||
fp.write('#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(iname))
|
||||
fp.write('#if {0}_BIAS_LSHIFT < 0\n#error {0}_BIAS_RSHIFT must be bigger than 0\n#endif\n'.format(iname))
|
||||
# add, sub
|
||||
elif ('add' in layer.name or
|
||||
'subtract' in layer.name):
|
||||
# only consider the first, they have been set to same in out_put_range()
|
||||
inp = layer.input[0].name.replace(':','/').split('/')[0].upper()
|
||||
fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_SHIFT-{0}_OUTPUT_SHIFT)\n'.format(
|
||||
iname, inp))
|
||||
fp.write('#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(iname))
|
||||
# mult is different, Q3.4 * Q3.4 = Q6.8. if mult out is Q4.3, then shift (Q.4+q.4)-Q.3=5. Am I right?
|
||||
elif ('multiply' in layer.name ):
|
||||
inp = layer.input[0].name.replace(':','/').split('/')[0].upper()
|
||||
fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_SHIFT*2-{0}_OUTPUT_SHIFT)\n'.format(
|
||||
iname, inp))
|
||||
fp.write('#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(iname))
|
||||
|
||||
fp.write('\n/* weights for each layer */\n')
|
||||
LI = {}
|
||||
ID = 0
|
||||
def is_skipable_layer(layer):
|
||||
# FIXME: add more that could be skiped
|
||||
if('lambda' in layer.name or
|
||||
'dropout' in layer.name or
|
||||
'batch_normalization' in layer.name or
|
||||
('flatten' in layer.name and 'chw' not in format)): # flatten layer can be skipped in HWC but have to present in CHW
|
||||
return True
|
||||
return False
|
||||
for id,layer in enumerate(L):
|
||||
if(is_skipable_layer(layer)):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
LI[layer.name] = (LI[inp][0], layer)
|
||||
else:
|
||||
if(type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
|
||||
LI[layer.name.split(':')[0]] = (ID, layer)
|
||||
else:
|
||||
LI[layer.name] = (ID, layer)
|
||||
ID += 1
|
||||
|
||||
if ('input' in layer.name or not layer.weights):
|
||||
continue
|
||||
for var in layer.weights:
|
||||
var_name = str(var.name).replace('/', '_').replace(':', '_')
|
||||
if("kernel" in var_name):
|
||||
fp.write('static const int8_t %s_weights[] = %s;\n'%(layer.name, var_name.upper()))
|
||||
fp.write('static const nnom_weight_t %s_w = { (const void*)%s_weights, %s_OUTPUT_RSHIFT};\n'%(layer.name,layer.name, layer.name.upper()))
|
||||
elif("bias" in var_name):
|
||||
fp.write('static const int8_t %s_bias[] = %s;\n'%(layer.name, var_name.upper()))
|
||||
fp.write('static const nnom_bias_t %s_b = { (const void*)%s_bias, %s_BIAS_LSHIFT};\n'%(layer.name,layer.name, layer.name.upper()))
|
||||
fp.write('\n/* nnom model */\n')
|
||||
# FIXME: now only support one input and one output
|
||||
sz = 1
|
||||
for d in model.input.shape[1:]:
|
||||
sz = sz*d
|
||||
fp.write('static int8_t nnom_input_data[%d];\n'%(sz))
|
||||
sz = 1
|
||||
for d in model.output.shape[1:]:
|
||||
sz = sz*d
|
||||
fp.write('static int8_t nnom_output_data[%d];\n'%(sz))
|
||||
fp.write('static nnom_model_t* nnom_model_create(void)\n{\n')
|
||||
fp.write('\tstatic nnom_model_t model;\n')
|
||||
if(ID>32):
|
||||
fp.write('\tnnom_layer_t ** layer = malloc(sizeof(nnom_layer_t *)*%d);\n'%(ID+1))
|
||||
fp.write('\tif(NULL == layer) return NULL;\n')
|
||||
else:
|
||||
fp.write('\tnnom_layer_t* layer[%d];\n'%(ID+1))
|
||||
fp.write('\n\tnew_model(&model);\n\n')
|
||||
for layer in L:
|
||||
if(is_skipable_layer(layer)):
|
||||
continue
|
||||
#FIXME: need a better solution to seperate the input 'tensor' from other layers
|
||||
if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
|
||||
id,_ = LI[layer.name.split(':')[0]]
|
||||
else:
|
||||
id,_ = LI[layer.name]
|
||||
|
||||
if('input' in layer.name):
|
||||
try:
|
||||
inshape = layer.input_shape[0][1:] # new changes in tf2?
|
||||
except:
|
||||
inshape = layer.shape[1:]
|
||||
if (len(inshape) == 1): # 1-D input
|
||||
fp.write('\tlayer[%d] = Input(shape(%d,1,1), nnom_input_data);\n' % (id, inshape[0]))
|
||||
elif (len(inshape) == 2): # 1-D input
|
||||
fp.write('\tlayer[%d] = Input(shape(1,%d,%d), nnom_input_data);\n' % (id, inshape[0], inshape[1]))
|
||||
else:
|
||||
fp.write('\tlayer[%d] = Input(shape%s, nnom_input_data);\n' % (id, inshape))
|
||||
|
||||
# convlutional
|
||||
elif('conv1d' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
cfg = layer.get_config()
|
||||
if('depthwise' in layer.name):
|
||||
fp.write('\tlayer[{0}] = model.hook(DW_Conv2D({1}, kernel(1,{2}), stride(1,{3}), dilation(1,{4}), PADDING_{5}, &{6}_w, &{6}_b), layer[{7}]);\n'.format(
|
||||
id, 1, cfg['kernel_size'][0], cfg['strides'][0], cfg['dilation_rate'][0], cfg['padding'].upper(),
|
||||
layer.name, LI[inp][0]))
|
||||
else:
|
||||
fp.write('\tlayer[{0}] = model.hook(Conv2D({1}, kernel(1,{2}), stride(1,{3}), dilation(1,{4}), PADDING_{5}, &{6}_w, &{6}_b), layer[{7}]);\n'.format(
|
||||
id, cfg['filters'], cfg['kernel_size'][0], cfg['strides'][0], cfg['dilation_rate'][0], cfg['padding'].upper(),
|
||||
layer.name, LI[inp][0]))
|
||||
elif('conv2d' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
cfg = layer.get_config()
|
||||
if ('depthwise' in layer.name):
|
||||
fp.write('\tlayer[{0}] = model.hook(DW_Conv2D({1}, kernel{2}, stride{3}, dilation{4}, PADDING_{5}, &{6}_w, &{6}_b), layer[{7}]);\n'.format(
|
||||
id, 1, cfg['kernel_size'], cfg['strides'], cfg['dilation_rate'], cfg['padding'].upper(),
|
||||
layer.name, LI[inp][0]))
|
||||
else:
|
||||
fp.write('\tlayer[{0}] = model.hook(Conv2D({1}, kernel{2}, stride{3}, dilation{4}, PADDING_{5}, &{6}_w, &{6}_b), layer[{7}]);\n'.format(
|
||||
id, cfg['filters'], cfg['kernel_size'], cfg['strides'], cfg['dilation_rate'], cfg['padding'].upper(),
|
||||
layer.name, LI[inp][0]))
|
||||
# activations
|
||||
elif('activation' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
cfg = layer.get_config()
|
||||
if(cfg['activation'] == 'relu'):
|
||||
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n'%(id, LI[inp][0]))
|
||||
if(cfg['activation'] == 'tanh'):
|
||||
fp.write('\tlayer[%s] = model.active(act_tanh(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
|
||||
if(cfg['activation'] == 'sigmoid'):
|
||||
fp.write('\tlayer[%s] = model.active(act_sigmoid(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
|
||||
elif(cfg['activation'] == 'softmax'):
|
||||
fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n'%(id, LI[inp][0]))
|
||||
elif('re_lu' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n'%(id, LI[inp][0]))
|
||||
# pooling
|
||||
elif('max_pooling' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
cfg = layer.get_config()
|
||||
if ('global' in layer.name):
|
||||
fp.write('\tlayer[%s] = model.hook(GlobalMaxPool(), layer[%s]);\n' % (id, LI[inp][0]))
|
||||
elif('2d' in layer.name):
|
||||
fp.write('\tlayer[%s] = model.hook(MaxPool(kernel%s, stride%s, PADDING_%s), layer[%d]);\n'%(
|
||||
id, cfg['pool_size'], cfg['strides'], cfg['padding'].upper(), LI[inp][0]))
|
||||
elif('1d' in layer.name):
|
||||
fp.write('\tlayer[{0}] = model.hook(MaxPool(kernel(1,{1}), stride(1,{2}), PADDING_{3}), layer[{4}]);\n'.format(
|
||||
id, cfg['pool_size'][0], cfg['strides'][0], cfg['padding'].upper(), LI[inp][0]))
|
||||
elif('average_pooling' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
cfg = layer.get_config()
|
||||
if ('global' in layer.name):
|
||||
# a global avg pool before softmax can be replace by sumpool in MCU (recommend)
|
||||
if(layer == model.layers[-2] and 'Softmax' in model.layers[-1].output.name):
|
||||
print(layer.name, 'has been replaced by GlobalSumPool()')
|
||||
fp.write('\tlayer[%s] = model.hook(GlobalSumPool(), layer[%s]);\n' % (id, LI[inp][0]))
|
||||
else:
|
||||
fp.write('\tlayer[%s] = model.hook(GlobalAvgPool(), layer[%s]);\n' % (id, LI[inp][0]))
|
||||
elif('2d' in layer.name):
|
||||
fp.write('\tlayer[%s] = model.hook(AvgPool(kernel%s, stride%s, PADDING_%s), layer[%d]);\n'%(
|
||||
id, cfg['pool_size'], cfg['strides'], cfg['padding'].upper(), LI[inp][0]))
|
||||
elif('1d' in layer.name):
|
||||
fp.write('\tlayer[{0}] = model.hook(AvgPool(kernel(1,{1}), stride(1,{2}), PADDING_{3}), layer[{4}]);\n'.format(
|
||||
id, cfg['pool_size'][0], cfg['strides'][0], cfg['padding'].upper(), LI[inp][0]))
|
||||
elif ('up_sampling' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
cfg = layer.get_config()
|
||||
if('2d' in layer.name):
|
||||
fp.write('\tlayer[%s] = model.hook(UpSample(kernel%s), layer[%d]);\n'%(id, cfg['size'], LI[inp][0]))
|
||||
elif('1d' in layer.name):
|
||||
fp.write('\tlayer[{0}] = model.hook(UpSample(kernel(1,{1})), layer[{2}]);\n'.format(
|
||||
id, cfg['size'][0], LI[inp][0]))
|
||||
# zero padding
|
||||
elif ('zero_padding' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
cfg = layer.get_config()
|
||||
if('2d' in layer.name):
|
||||
fp.write('\tlayer[{0}] = model.hook(ZeroPadding(border({1},{2},{3},{4})), layer[{5}]);\n'.format(
|
||||
id, cfg['padding'][0][0], cfg['padding'][0][1], cfg['padding'][1][0],cfg['padding'][1][1], LI[inp][0]))
|
||||
elif('1d' in layer.name):
|
||||
fp.write('\tlayer[{0}] = model.hook(ZeroPadding(border(0,0,{1},{2})), layer[{3}]);\n'.format(
|
||||
id, cfg['padding'][0], cfg['padding'][1], LI[inp][0]))
|
||||
# Cropping
|
||||
elif ('cropping' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
cfg = layer.get_config()
|
||||
if('2d' in layer.name):
|
||||
fp.write('\tlayer[{0}] = model.hook(Cropping(border({1},{2},{3},{4})), layer[{5}]);\n'.format(
|
||||
id, cfg['cropping'][0][0], cfg['cropping'][0][1], cfg['cropping'][1][0],cfg['cropping'][1][1], LI[inp][0]))
|
||||
elif('1d' in layer.name):
|
||||
fp.write('\tlayer[{0}] = model.hook(Cropping(border(0,0,{1},{2})), layer[{3}]);\n'.format(
|
||||
id, cfg['cropping'][0], cfg['cropping'][1], LI[inp][0]))
|
||||
|
||||
# others
|
||||
elif('flatten' in layer.name): # flatten is needed in CHW backend but not needed in HWC
|
||||
inp = layer.input.name.replace(':', '/').split('/')[0]
|
||||
fp.write('\tlayer[%s] = model.hook(Flatten(), layer[%s]);\n'%(id, LI[inp][0]))
|
||||
elif('concatenate' in layer.name):
|
||||
inps = [input.name.replace(':','/').split('/')[0] for input in layer.input]
|
||||
inX = ''
|
||||
for inp in inps:
|
||||
inX += ' ,layer[%d]'%(LI[inp][0])
|
||||
cfg = layer.get_config()
|
||||
fp.write('\tlayer[%s] = model.mergex(Concat(%s), %s%s);\n'%(
|
||||
id, cfg['axis'], len(inps), inX))
|
||||
elif('add' in layer.name):
|
||||
inps = [input.name.replace(':','/').split('/')[0] for input in layer.input]
|
||||
inX = ''
|
||||
for inp in inps:
|
||||
inX += ' ,layer[%d]'%(LI[inp][0])
|
||||
fp.write('\tlayer[%s] = model.mergex(Add(%s_OUTPUT_RSHIFT), %s%s);\n'%(
|
||||
id, layer.name.upper(), len(inps), inX))
|
||||
elif('subtract' in layer.name):
|
||||
inps = [input.name.replace(':','/').split('/')[0] for input in layer.input]
|
||||
inX = ''
|
||||
for inp in inps:
|
||||
inX += ' ,layer[%d]'%(LI[inp][0])
|
||||
fp.write('\tlayer[%s] = model.mergex(Sub(%s_OUTPUT_RSHIFT), %s%s);\n'%(
|
||||
id, layer.name.upper(), len(inps), inX))
|
||||
elif('multiply' in layer.name):
|
||||
warnings.warn("Warning mutiply is under testing")
|
||||
inps = [input.name.replace(':','/').split('/')[0] for input in layer.input]
|
||||
inX = ''
|
||||
for inp in inps:
|
||||
inX += ' ,layer[%d]'%(LI[inp][0])
|
||||
fp.write('\tlayer[%s] = model.mergex(Mult(%s_OUTPUT_RSHIFT), %s%s);\n'%(
|
||||
id, layer.name.upper(), len(inps), inX))
|
||||
elif('dense' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
cfg = layer.get_config()
|
||||
fp.write('\tlayer[{0}] = model.hook(Dense({1}, &{2}_w, &{2}_b), layer[{3}]);\n'.format(
|
||||
id, cfg['units'], layer.name, LI[inp][0]))
|
||||
elif('softmax' in layer.name):
|
||||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||||
fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n'%(id, LI[inp][0]))
|
||||
else:
|
||||
raise Exception('unsupported layer', layer.name, layer)
|
||||
|
||||
"""
|
||||
# temporary fixed for activations attached into layers in construction
|
||||
def is_activation_attached(layer):
|
||||
if(("Softmax" in layer.output.name and "softmax" not in layer.name)or
|
||||
("Relu" in layer.output.name and "re_lu" not in layer.name) or
|
||||
("Sigmoid" in layer.output.name and "sigmoid" not in layer.name) or
|
||||
("Tanh" in layer.output.name and "tanh" not in layer.name)):
|
||||
return True
|
||||
return False
|
||||
if "input" not in layer.name and is_activation_attached(layer):
|
||||
inp = layer.output.name.replace(':', '/').split('/')[0]
|
||||
cfg = layer.get_config()
|
||||
if(cfg['activation'] == 'relu'):
|
||||
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n'%(id, LI[inp][0]))
|
||||
if(cfg['activation'] == 'tanh'):
|
||||
fp.write('\tlayer[%s] = model.active(act_tanh(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
|
||||
if(cfg['activation'] == 'sigmoid'):
|
||||
fp.write('\tlayer[%s] = model.active(act_sigmoid(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
|
||||
elif(cfg['activation'] == 'softmax'):
|
||||
fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n'%(id, LI[inp][0]))
|
||||
"""
|
||||
|
||||
# FIXME, test later.
|
||||
if('softmax' in layer.name
|
||||
or ('activation' in layer.name and layer.get_config()['activation'] == 'softmax')):
|
||||
fp.write('\tlayer[%s] = model.hook(Output(shape(%s,1,1), nnom_output_data), layer[%s]);\n'%(id+1, layer.output.shape[1], id))
|
||||
elif len(layer.output.shape) == 4:
|
||||
fp.write('\tlayer[%s] = model.hook(Output(shape%s, nnom_output_data), layer[%s]);\n'%(id+1, layer.output.shape[1:], id))
|
||||
elif len(layer.output.shape) == 3:
|
||||
fp.write('\tlayer[%s] = model.hook(Output(shape(1,%s,%s), nnom_output_data), layer[%s]);\n'%(id+1, layer.output.shape[1], layer.output.shape[2], id))
|
||||
elif len(layer.output.shape) == 2:
|
||||
fp.write('\tlayer[%s] = model.hook(Output(shape(%s,1,1), nnom_output_data), layer[%s]);\n'%(id+1, layer.output.shape[1], id))
|
||||
else:
|
||||
raise Exception('unsupported output shape of the last layer', layer.name, layer)
|
||||
fp.write('\tmodel_compile(&model, layer[0], layer[%s]);\n'%(id+1))
|
||||
if(ID>32):
|
||||
fp.write('\tfree(layer);\n')
|
||||
fp.write('\treturn &model;\n}\n')
|
||||
with open('.shift_list','w') as fp:
|
||||
fp.write(str(shift_list))
|
||||
|
||||
def evaluate_model(model, x_test, y_test, running_time=False, to_file='evaluation.txt'):
|
||||
# Score trained model.
|
||||
scores = model.evaluate(x_test, y_test, verbose=2)
|
||||
print('Test loss:', scores[0])
|
||||
print('Top 1:', scores[1])
|
||||
|
||||
if(len(y_test.shape)>1):
|
||||
# predictions = model.predict(x_test)
|
||||
# output = tf.keras.metrics.top_k_categorical_accuracy(y_test, predictions, k=2)
|
||||
# # with tf.Session() as sess:
|
||||
# # result = sess.run(output)
|
||||
# result =
|
||||
# print("Top 2:",result)
|
||||
|
||||
predictions = model.predict(x_test)
|
||||
matrix = metrics.confusion_matrix(y_test.argmax(axis=1), predictions.argmax(axis=1))
|
||||
print(matrix)
|
||||
|
||||
run_time = 0
|
||||
if running_time:
|
||||
# try to calculate the time
|
||||
T = time.time()
|
||||
for i in range(10):
|
||||
model.predict(x_test)
|
||||
T = time.time() - T
|
||||
run_time = round((T / 10 / x_test.shape[0] * 1000 * 1000), 2)
|
||||
print("Runing time:",run_time , "us" )
|
||||
#
|
||||
with open(to_file, 'w') as f:
|
||||
f.write("Runing time: "+ str(run_time) + "us" + "\n")
|
||||
f.write('Test loss:'+ str(scores[0]) + "\n")
|
||||
f.write('Top 1:'+ str(scores[1])+ "\n")
|
||||
if (len(y_test.shape) > 1):
|
||||
#f.write("Top 2:"+ str(result)+ "\n")
|
||||
#f.write(str(matrix))
|
||||
for row in matrix:
|
||||
row.tofile(f, sep=',')
|
||||
f.write("\n")
|
||||
|
||||
# try to check the weight and bias dec ranges
|
||||
for layer in model.layers:
|
||||
if (not layer.weights):
|
||||
continue
|
||||
for var in layer.weights:
|
||||
var_name = str(var.name)
|
||||
if ("kernel" in var_name):
|
||||
var_values = layer.get_weights()[0] # weight
|
||||
else:
|
||||
var_values = layer.get_weights()[1] # bias
|
||||
min_value = np.min(var_values)
|
||||
max_value = np.max(var_values)
|
||||
intt = int(np.ceil(np.log2(max(abs(min_value), abs(max_value)))))
|
||||
dec = 7 - intt
|
||||
print(var_name, "Dec num:", dec)
|
||||
return scores
|
||||
|
||||
def f2q(d, Q):
|
||||
'''To convert a number from floating point to Qm.n format:
|
||||
1. Multiply the floating point number by 2n
|
||||
2. Round to the nearest integer
|
||||
'''
|
||||
return np.round(d*2**Q)
|
||||
|
||||
|
||||
def q2f(d, Q):
|
||||
'''To convert a number from Qm.n format to floating point:
|
||||
1. Convert the number to floating point as if it were an integer, in other words remove the binary point
|
||||
2. Multiply by 2-n
|
||||
'''
|
||||
return d*2**-Q
|
||||
|
||||
def show_weights(w, name):
|
||||
sz = 1
|
||||
for s in w.shape:
|
||||
sz = sz*s
|
||||
aL = w.reshape(sz,)
|
||||
MIN,MAX=min(aL),max(aL)
|
||||
Q = int(np.ceil(np.log2(max(abs(MIN),abs(MAX)))))
|
||||
Q = 7-Q
|
||||
qL = f2q(aL,Q)
|
||||
qL = q2f(qL,Q)
|
||||
plt.figure(figsize=(18, 3))
|
||||
plt.subplot(131)
|
||||
plt.title(name)
|
||||
plt.plot(aL)
|
||||
plt.grid()
|
||||
aL.sort()
|
||||
plt.plot(aL,'r')
|
||||
plt.grid()
|
||||
plt.subplot(132)
|
||||
plt.title('Q%s'%(Q))
|
||||
qL.sort()
|
||||
plt.plot(aL,'r')
|
||||
plt.plot(qL,'g')
|
||||
plt.grid()
|
||||
plt.subplot(133)
|
||||
plt.hist(aL,100)
|
||||
plt.title('hist')
|
||||
plt.grid()
|
||||
plt.show()
|
||||
|
||||
def compare(a,b,name):
|
||||
sz = 1
|
||||
for s in a.shape:
|
||||
sz = sz*s
|
||||
aL = a.reshape(sz,)
|
||||
bL = b.reshape(sz,)
|
||||
assert(len(aL) == len(bL))
|
||||
Z = list(zip(aL,bL))
|
||||
Z.sort(key=lambda x: x[0])
|
||||
aL1,bL1=zip(*Z)
|
||||
plt.figure(figsize=(18, 3))
|
||||
plt.subplot(131)
|
||||
plt.plot(aL)
|
||||
plt.plot(aL1,'r')
|
||||
plt.grid()
|
||||
plt.title('tf-%s'%(name))
|
||||
plt.subplot(133)
|
||||
plt.plot(bL1,'g')
|
||||
plt.plot(aL1,'r')
|
||||
plt.grid()
|
||||
plt.title('compare')
|
||||
plt.subplot(132)
|
||||
bL1=list(bL1)
|
||||
bL1.sort()
|
||||
plt.plot(bL)
|
||||
plt.plot(bL1,'g')
|
||||
plt.grid()
|
||||
plt.title('nn-%s'%(name))
|
||||
plt.show()
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-02-05 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_layers.h"
|
||||
|
||||
size_t shape_size(nnom_3d_shape_t *s)
|
||||
{
|
||||
if (s == NULL)
|
||||
return 0;
|
||||
return s->h * s->w * s->c;
|
||||
}
|
||||
|
||||
nnom_3d_shape_t shape(size_t h, size_t w, size_t c)
|
||||
{
|
||||
nnom_3d_shape_t s;
|
||||
s.h = h;
|
||||
s.w = w;
|
||||
s.c = c;
|
||||
return s;
|
||||
}
|
||||
nnom_3d_shape_t kernel(size_t h, size_t w)
|
||||
{
|
||||
return shape(h, w, 1);
|
||||
}
|
||||
nnom_3d_shape_t stride(size_t h, size_t w)
|
||||
{
|
||||
return shape(h, w, 1);
|
||||
}
|
||||
nnom_3d_shape_t dilation(size_t h, size_t w)
|
||||
{
|
||||
return shape(h, w, 1);
|
||||
}
|
||||
|
||||
nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right)
|
||||
{
|
||||
nnom_border_t b;
|
||||
b.top = top;
|
||||
b.bottom = bottom;
|
||||
b.left = left;
|
||||
b.right = right;
|
||||
return b;
|
||||
}
|
||||
|
||||
// this function has to be used while assign a io for a layer.
|
||||
// because the io needs to know who is its owner.
|
||||
nnom_layer_io_t *io_init(void *owner_layer, nnom_layer_io_t *io)
|
||||
{
|
||||
io->owner = (nnom_layer_t *)owner_layer;
|
||||
return io;
|
||||
}
|
||||
|
||||
// this function is to add a new IO to current inited IO
|
||||
// input, the targeted IO that the new IO will be added to
|
||||
// output , the new IO
|
||||
nnom_layer_io_t *io_add_aux(nnom_layer_io_t *targeted_io)
|
||||
{
|
||||
nnom_layer_io_t *new_io;
|
||||
// check if the targeted io is inited, and its aux = NULL
|
||||
if (targeted_io == NULL || targeted_io->owner == NULL || targeted_io->aux != NULL)
|
||||
return NULL;
|
||||
// create new io, init it
|
||||
new_io = nnom_mem(sizeof(nnom_layer_io_t));
|
||||
if (new_io == NULL)
|
||||
return NULL;
|
||||
// add to aux
|
||||
targeted_io->aux = new_io;
|
||||
return io_init(targeted_io->owner, new_io);
|
||||
}
|
|
@ -0,0 +1,245 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-02-05 Jianjia Ma The first version
|
||||
* 2019-02-14 Jianjia Ma Add layer.free() method.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdarg.h>
|
||||
#include "nnom.h"
|
||||
#include "nnom_tensor.h"
|
||||
|
||||
// tensor size
|
||||
size_t tensor_size(nnom_tensor_t* t)
|
||||
{
|
||||
size_t size = 0;
|
||||
if (t != NULL)
|
||||
{
|
||||
size = t->dim[0];
|
||||
for (int i = 1; i < t->num_dim; i++)
|
||||
size *= t->dim[i];
|
||||
}
|
||||
return size;
|
||||
}
|
||||
size_t tensor_size_byte(nnom_tensor_t* t)
|
||||
{
|
||||
return tensor_size(t)*t->bitwidth/8;
|
||||
}
|
||||
|
||||
|
||||
size_t tensor_get_num_channel(nnom_tensor_t* t)
|
||||
{
|
||||
// this will need to be changed to support batch.
|
||||
#ifdef NNOM_USING_CHW
|
||||
// channel first
|
||||
//return t->dim[0];
|
||||
return t->dim[t->num_dim -1]; // we are always using hwc to describe even our data is in CHW
|
||||
#else
|
||||
// channel last
|
||||
return t->dim[t->num_dim -1];
|
||||
#endif
|
||||
}
|
||||
|
||||
// initialise/create new tensor
|
||||
nnom_tensor_t* new_tensor(nnom_qtype_t type, uint32_t num_dim, uint32_t num_channel)
|
||||
{
|
||||
nnom_tensor_t* t = NULL;
|
||||
uint32_t q_len;
|
||||
if(type == NNOM_QTYPE_PER_AXIS)
|
||||
{
|
||||
q_len = num_channel;
|
||||
}
|
||||
else if (type == NNOM_QTYPE_PER_TENSOR)
|
||||
{
|
||||
q_len = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
NNOM_LOG("ERROR: tensor type not specified\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
t = nnom_mem(nnom_alignto(sizeof(nnom_tensor_t), NNOM_ALIGN)
|
||||
+ nnom_alignto(num_dim*sizeof(nnom_shape_data_t),sizeof(nnom_qformat_param_t))
|
||||
+ q_len*sizeof(nnom_qformat_param_t)*2);
|
||||
if(t == NULL)
|
||||
return t;
|
||||
t->dim = (nnom_shape_data_t*)((uint8_t*)t + sizeof(nnom_tensor_t)); // should add alignment
|
||||
t->q_dec = (nnom_qformat_param_t*)((uint8_t*)t->dim + nnom_alignto(num_dim*sizeof(nnom_shape_data_t),sizeof(nnom_qformat_param_t)));
|
||||
t->q_offset = (nnom_qformat_param_t*)((uint8_t*)t->q_dec + q_len*sizeof(nnom_qformat_param_t));
|
||||
t->num_dim = num_dim;
|
||||
t->qtype = type;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void delete_tensor(nnom_tensor_t* t)
|
||||
{
|
||||
if (t)
|
||||
nnom_free(t);
|
||||
}
|
||||
|
||||
// set tensor by value
|
||||
// for tensor with quantized type NNOM_QTYPE_PER_TENSOR
|
||||
nnom_tensor_t* tensor_set_attr_v(nnom_tensor_t* t,
|
||||
nnom_qformat_param_t dec_bit, nnom_qformat_param_t offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth)
|
||||
{
|
||||
// copy dim
|
||||
t->num_dim = num_dim;
|
||||
nnom_memcpy(t->dim, dim, sizeof(nnom_shape_data_t) * num_dim);
|
||||
|
||||
// bitwidth
|
||||
t->bitwidth = bitwidth;
|
||||
// copy the offset and q format
|
||||
*(t->q_dec) = dec_bit;
|
||||
*(t->q_offset) = offset;
|
||||
return t;
|
||||
}
|
||||
|
||||
|
||||
// set tensor by pointer
|
||||
// for tensor with quantized type NNOM_QTYPE_PER_AXIS
|
||||
nnom_tensor_t* tensor_set_attr(nnom_tensor_t* t,
|
||||
nnom_qformat_param_t*dec_bit, nnom_qformat_param_t *offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
// copy dim
|
||||
t->num_dim = num_dim;
|
||||
nnom_memcpy(t->dim, dim, sizeof(nnom_shape_data_t) * num_dim);
|
||||
|
||||
// get the q format data size
|
||||
if(t->qtype == NNOM_QTYPE_PER_AXIS)
|
||||
size = sizeof(nnom_qformat_param_t) * tensor_get_num_channel(t);
|
||||
else
|
||||
size = sizeof(nnom_qformat_param_t);
|
||||
|
||||
// bitwidth
|
||||
t->bitwidth = bitwidth;
|
||||
// copy the offset and q format
|
||||
nnom_memcpy(t->q_dec, dec_bit, size);
|
||||
nnom_memcpy(t->q_offset, offset, size);
|
||||
return t;
|
||||
}
|
||||
|
||||
// this method copy the attributes of a tensor to a new tensor
|
||||
// before that, src and des tensor must already have QTYPE and NUM_OF_DIM set.
|
||||
// Note, the tensors must have the same lenght. this method wont cpy the memory pointer data (we will assign memory later after building)
|
||||
nnom_tensor_t* tensor_cpy_attr(nnom_tensor_t* des, nnom_tensor_t* src)
|
||||
{
|
||||
size_t size;
|
||||
if(src->qtype != des->qtype || src->num_dim != des->num_dim)
|
||||
return NULL;
|
||||
|
||||
if(src->qtype == NNOM_QTYPE_PER_AXIS)
|
||||
size = sizeof(nnom_qformat_param_t) * tensor_get_num_channel(src);
|
||||
else
|
||||
size = sizeof(nnom_qformat_param_t);
|
||||
|
||||
// bit
|
||||
des->bitwidth = src->bitwidth;
|
||||
// copy quantisation parameters
|
||||
nnom_memcpy(des->q_dec, src->q_dec, size);
|
||||
nnom_memcpy(des->q_offset, src->q_offset, size);
|
||||
|
||||
// copy number of dimension
|
||||
des->num_dim = src->num_dim;
|
||||
nnom_memcpy(des->dim, src->dim, src->num_dim * sizeof(nnom_shape_data_t));
|
||||
return des;
|
||||
}
|
||||
|
||||
// change format from CHW to HWC
|
||||
// the shape of the data, input data, output data
|
||||
void tensor_hwc2chw_q7(nnom_tensor_t* des, nnom_tensor_t* src)
|
||||
{
|
||||
q7_t* p_out = des->p_data;
|
||||
q7_t* p_in = src->p_data;
|
||||
|
||||
for (int c = 0; c < src->dim[2]; c++)
|
||||
{
|
||||
for (int h = 0; h < src->dim[0]; h++)
|
||||
{
|
||||
for (int w = 0; w < src->dim[1]; w++)
|
||||
{
|
||||
*p_out = p_in[(h * src->dim[1] + w) * src->dim[2] + c];
|
||||
p_out++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// only support 3d tensor
|
||||
// change format from CHW to HWC
|
||||
void tensor_chw2hwc_q7(nnom_tensor_t* des, nnom_tensor_t* src)
|
||||
{
|
||||
q7_t* p_out = des->p_data;
|
||||
q7_t* p_in = src->p_data;
|
||||
int im_size;
|
||||
int h_step;
|
||||
|
||||
im_size = src->dim[0] * src->dim[1]; // H*W
|
||||
|
||||
for (int h = 0; h < src->dim[0]; h++)
|
||||
{
|
||||
h_step = src->dim[1] * h;
|
||||
for (int w = 0; w < src->dim[1]; w++)
|
||||
{
|
||||
for (int c = 0; c < src->dim[2]; c++)
|
||||
{
|
||||
*p_out = p_in[im_size * c + h_step + w];
|
||||
p_out++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// (deprecated by tensor_hwc2chw version)
|
||||
// change format from CHW to HWC
|
||||
// the shape of the data, input data, output data
|
||||
void hwc2chw_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out)
|
||||
{
|
||||
for (int c = 0; c < shape.c; c++)
|
||||
{
|
||||
for (int h = 0; h < shape.h; h++)
|
||||
{
|
||||
for (int w = 0; w < shape.w; w++)
|
||||
{
|
||||
*p_out = p_in[(h * shape.w + w) * shape.c + c];
|
||||
p_out++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// (deprecated)
|
||||
// change format from CHW to HWC
|
||||
// the shape of the data, input data, output data
|
||||
void chw2hwc_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out)
|
||||
{
|
||||
int im_size = shape.w * shape.h;
|
||||
int h_step;
|
||||
|
||||
for (int h = 0; h < shape.h; h++)
|
||||
{
|
||||
h_step = shape.w * h;
|
||||
for (int w = 0; w < shape.w; w++)
|
||||
{
|
||||
for (int c = 0; c < shape.c; c++)
|
||||
{
|
||||
*p_out = p_in[im_size * c + h_step + w];
|
||||
p_out++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,417 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-02-05 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include "nnom.h"
|
||||
#include "nnom_utils.h"
|
||||
|
||||
static nnom_predict_t *_predict_create_instance(nnom_model_t *m, size_t label_num, size_t top_k_size)
|
||||
{
|
||||
nnom_predict_t *pre;
|
||||
// allocate memory
|
||||
pre = (nnom_predict_t *)nnom_malloc(sizeof(nnom_predict_t));
|
||||
if(pre == NULL)
|
||||
return NULL;
|
||||
pre->top_k = (uint32_t *)nnom_malloc(top_k_size * sizeof(uint32_t));
|
||||
pre->confusion_mat = (uint16_t *)nnom_malloc(label_num * label_num * sizeof(uint16_t));
|
||||
if(pre->top_k == NULL || pre->confusion_mat == NULL)
|
||||
{
|
||||
nnom_free(pre->top_k); nnom_free(pre->confusion_mat); nnom_free(pre);
|
||||
return NULL;
|
||||
}
|
||||
nnom_memset(pre->top_k, 0, top_k_size * sizeof(uint32_t));
|
||||
nnom_memset(pre->confusion_mat, 0, label_num * label_num * sizeof(uint16_t));
|
||||
|
||||
// config
|
||||
pre->label_num = label_num;
|
||||
pre->top_k_size = top_k_size;
|
||||
pre->predict_count = 0;
|
||||
|
||||
// run
|
||||
pre->model = m;
|
||||
pre->t_run_total = 0; // model running time in total
|
||||
pre->t_predict_start = 0; // when it is initial
|
||||
pre->t_predict_total = 0; // total time of the whole test
|
||||
|
||||
return pre;
|
||||
}
|
||||
|
||||
static void _predict_delete_instance(nnom_predict_t *pre)
|
||||
{
|
||||
if(pre == NULL)
|
||||
return;
|
||||
nnom_free(pre->top_k);
|
||||
nnom_free(pre->confusion_mat);
|
||||
nnom_free(pre);
|
||||
}
|
||||
|
||||
// create a prediction
|
||||
// input model, the buf pointer to the softwmax output (Temporary, this can be extract from model)
|
||||
// the size of softmax output (the num of lable)
|
||||
// the top k that wants to record.
|
||||
nnom_predict_t *prediction_create(nnom_model_t *m, int8_t *buf_prediction, size_t label_num, size_t top_k_size)
|
||||
{
|
||||
nnom_predict_t *pre = _predict_create_instance(m, label_num, top_k_size);
|
||||
if (!pre)
|
||||
return NULL;
|
||||
if (!m)
|
||||
{
|
||||
_predict_delete_instance(pre);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// set the output buffer of model to the prediction instance
|
||||
pre->buf_prediction = buf_prediction;
|
||||
|
||||
// mark start time.
|
||||
pre->t_predict_start = nnom_ms_get();
|
||||
|
||||
return pre;
|
||||
}
|
||||
|
||||
// after a new data is set in input
|
||||
// feed data to prediction
|
||||
// input the current label, (range from 0 to total number of label -1)
|
||||
// (the current input data should be set by user manully to the input buffer of the model.)
|
||||
nnom_status_t prediction_run(nnom_predict_t *pre, uint32_t true_label, uint32_t*predict_label, float* prob)
|
||||
{
|
||||
int max_val;
|
||||
int max_index;
|
||||
uint32_t true_ranking = 0;
|
||||
uint32_t start;
|
||||
uint32_t sum = 0;
|
||||
|
||||
if (!pre)
|
||||
return NN_ARGUMENT_ERROR;
|
||||
|
||||
// now run model
|
||||
start = nnom_ms_get();
|
||||
model_run(pre->model);
|
||||
pre->t_run_total += nnom_ms_get() - start;
|
||||
|
||||
// only draw matrix and top k when number of label > 1
|
||||
if (pre->label_num > 1)
|
||||
{
|
||||
// find how many prediction is bigger than the ground true.
|
||||
// Raning rules, same as tensorflow. however, predictions in MCU is more frequencly to have equal probability since it is using fixed-point.
|
||||
// if ranking is 1, 2, =2(true), 4, 5, 6. the result will be top 3.
|
||||
// if ranking is 1, 2(true), =2, 4, 5, 6. the result will be top 2.
|
||||
// find the ranking of the prediced label.
|
||||
for (uint32_t j = 0; j < pre->label_num; j++)
|
||||
{
|
||||
if (j == true_label)
|
||||
continue;
|
||||
if (pre->buf_prediction[true_label] < pre->buf_prediction[j])
|
||||
true_ranking++;
|
||||
// while value[label] = value[j]. only when label > j, label is the second of j
|
||||
else if (pre->buf_prediction[true_label] == pre->buf_prediction[j] && j < true_label)
|
||||
true_ranking++;
|
||||
}
|
||||
|
||||
if (true_ranking < pre->top_k_size)
|
||||
pre->top_k[true_ranking]++;
|
||||
|
||||
// Find top 1 and return the current prediction.
|
||||
// If there are several maximum prediction, return the first one.
|
||||
max_val = pre->buf_prediction[0];
|
||||
max_index = 0;
|
||||
for (uint32_t j = 1; j < pre->label_num; j++)
|
||||
{
|
||||
if (pre->buf_prediction[j] > max_val)
|
||||
{
|
||||
max_val = pre->buf_prediction[j];
|
||||
max_index = j;
|
||||
}
|
||||
sum += pre->buf_prediction[j];
|
||||
}
|
||||
// result
|
||||
if (max_val != 0)
|
||||
*prob = (float)max_val / 127.f;
|
||||
else
|
||||
*prob = 0;
|
||||
*predict_label = max_index;
|
||||
|
||||
// fill confusion matrix
|
||||
pre->confusion_mat[true_label * pre->label_num + max_index] += 1;
|
||||
}
|
||||
// only one neural as output.
|
||||
else
|
||||
{
|
||||
*prob = (float)pre->buf_prediction[0] / 127.f;
|
||||
if (*prob >= 0.5f)
|
||||
*predict_label = 1;
|
||||
else
|
||||
*predict_label = 0;
|
||||
}
|
||||
|
||||
// prediction count
|
||||
pre->predict_count++;
|
||||
|
||||
// return the prediction
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
void prediction_end(nnom_predict_t *pre)
|
||||
{
|
||||
if (!pre)
|
||||
return;
|
||||
pre->t_predict_total = nnom_ms_get() - pre->t_predict_start;
|
||||
}
|
||||
|
||||
void prediction_delete(nnom_predict_t *pre)
|
||||
{
|
||||
_predict_delete_instance(pre);
|
||||
}
|
||||
|
||||
void prediction_matrix(nnom_predict_t *pre)
|
||||
{
|
||||
if (!pre)
|
||||
return;
|
||||
// print titles
|
||||
NNOM_LOG("\nConfusion matrix:\n");
|
||||
NNOM_LOG("predict");
|
||||
for (int i = 0; i < pre->label_num; i++)
|
||||
{
|
||||
NNOM_LOG("%6d", i);
|
||||
}
|
||||
NNOM_LOG("\n");
|
||||
NNOM_LOG("actual\n");
|
||||
// print the matrix
|
||||
for (int i = 0; i < pre->label_num; i++)
|
||||
{
|
||||
uint32_t row_total = 0;
|
||||
|
||||
NNOM_LOG(" %3d | ", i);
|
||||
for (int j = 0; j < pre->label_num; j++)
|
||||
{
|
||||
row_total += pre->confusion_mat[i * pre->label_num + j];
|
||||
NNOM_LOG("%6d", pre->confusion_mat[i * pre->label_num + j]);
|
||||
}
|
||||
NNOM_LOG(" |%4d%%\n", pre->confusion_mat[i * pre->label_num + i] * 100 / row_total);
|
||||
row_total = 0;
|
||||
}
|
||||
NNOM_LOG("\n");
|
||||
}
|
||||
|
||||
// top-k
|
||||
void prediction_top_k(nnom_predict_t *pre)
|
||||
{
|
||||
uint32_t top = 0;
|
||||
if (!pre)
|
||||
return;
|
||||
|
||||
for (int i = 0; i < pre->top_k_size; i++)
|
||||
{
|
||||
top += pre->top_k[i];
|
||||
if (top != pre->predict_count)
|
||||
NNOM_LOG("Top %d Accuracy: %d.%02d%% \n", i + 1, (top * 100) / pre->predict_count,
|
||||
((top * 100 * 100) / pre->predict_count)%100);
|
||||
else
|
||||
NNOM_LOG("Top %d Accuracy: 100%% \n", i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// this function is to print sumarry
|
||||
void prediction_summary(nnom_predict_t *pre)
|
||||
{
|
||||
if (!pre)
|
||||
return;
|
||||
// sumamry
|
||||
NNOM_LOG("\nPrediction summary:\n");
|
||||
NNOM_LOG("Test frames: %d\n", pre->predict_count);
|
||||
NNOM_LOG("Test running time: %d sec\n", pre->t_predict_total / 1000);
|
||||
NNOM_LOG("Model running time: %d ms\n", pre->t_run_total);
|
||||
if(pre->predict_count !=0)
|
||||
NNOM_LOG("Average prediction time: %d us\n", (pre->t_run_total * 1000) / pre->predict_count);
|
||||
if(pre->t_run_total != 0)
|
||||
NNOM_LOG("Average effeciency: %d.%02d ops/us\n", (int)(((uint64_t)pre->model->total_ops * pre->predict_count) / (pre->t_run_total * 1000)),
|
||||
(int)(((uint64_t)pre->model->total_ops * pre->predict_count)*100 / (pre->t_run_total * 1000))%100);
|
||||
if(pre->t_run_total !=0 && pre->predict_count !=0)
|
||||
NNOM_LOG("Average frame rate: %d.%d Hz\n", 1000 / (pre->t_run_total / pre->predict_count),
|
||||
(1000*10 / (pre->t_run_total / pre->predict_count))%10);
|
||||
|
||||
// only valid for multiple labels
|
||||
if(pre->label_num > 1)
|
||||
{
|
||||
// print top-k
|
||||
prediction_top_k(pre);
|
||||
|
||||
// print confusion matrix
|
||||
prediction_matrix(pre);
|
||||
}
|
||||
}
|
||||
|
||||
// stand alone prediction API
|
||||
// this api test one set of data, return the prediction
|
||||
nnom_status_t nnom_predict(nnom_model_t *m, uint32_t *label, float *prob)
|
||||
{
|
||||
int32_t max_val, max_index, sum;
|
||||
int8_t *output;
|
||||
|
||||
if (!m)
|
||||
return NN_ARGUMENT_ERROR;
|
||||
|
||||
model_run(m);
|
||||
|
||||
// get the output memory
|
||||
output = m->tail->out->tensor->p_data;
|
||||
|
||||
// multiple neural output
|
||||
if (tensor_size(m->tail->out->tensor) > 1)
|
||||
{
|
||||
// Top 1
|
||||
max_val = output[0];
|
||||
max_index = 0;
|
||||
sum = max_val;
|
||||
for (uint32_t i = 1; i < tensor_size(m->tail->out->tensor); i++)
|
||||
{
|
||||
if (output[i] > max_val)
|
||||
{
|
||||
max_val = output[i];
|
||||
max_index = i;
|
||||
}
|
||||
sum += output[i];
|
||||
}
|
||||
// send results
|
||||
*label = max_index;
|
||||
if(max_val !=0)
|
||||
*prob = (float)max_val/127.f;
|
||||
else
|
||||
*prob = 0;
|
||||
}
|
||||
// single neural output
|
||||
else
|
||||
{
|
||||
*prob = (float)output[0] / 127.f;
|
||||
if (*prob >= 0.5f)
|
||||
*label = 1;
|
||||
else
|
||||
*label = 0;
|
||||
}
|
||||
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
static void layer_stat(nnom_layer_t *layer)
|
||||
{
|
||||
// layer stat
|
||||
if(layer->type != NNOM_RNN)
|
||||
NNOM_LOG("%-10s - ", default_layer_names[layer->type]);
|
||||
else
|
||||
{
|
||||
NNOM_LOG("%-3s/", default_layer_names[layer->type]);
|
||||
NNOM_LOG("%-6s - ", default_cell_names[((nnom_rnn_layer_t*)layer)->cell->type]);
|
||||
}
|
||||
NNOM_LOG(" %8d ", layer->stat.time);
|
||||
|
||||
// MAC operation
|
||||
if(layer->stat.macc == 0)
|
||||
NNOM_LOG(" ");
|
||||
else if (layer->stat.macc < 10000)
|
||||
NNOM_LOG("%7d ", (uint32_t)layer->stat.macc);
|
||||
else if (layer->stat.macc < 1000*1000)
|
||||
NNOM_LOG("%6dk ", (uint32_t)(layer->stat.macc/1000));
|
||||
else if (layer->stat.macc < 1000*1000*1000)
|
||||
NNOM_LOG("%3d.%02dM ", (uint32_t)(layer->stat.macc/(1000*1000)), (uint32_t)(layer->stat.macc%(1000*1000)/(10*1000))); // xxx.xx M
|
||||
else
|
||||
NNOM_LOG("%3d.%02dG ", (uint32_t)(layer->stat.macc/(1000*1000*1000)), (uint32_t)(layer->stat.macc%(1000*1000*1000)/(10*1000*1000))); // xxx.xx G
|
||||
|
||||
// layer efficiency
|
||||
if (layer->stat.macc != 0 && layer->stat.time != 0)
|
||||
NNOM_LOG("%d.%02d\n", (uint32_t)(layer->stat.macc / layer->stat.time), (uint32_t)((layer->stat.macc * 100) / (layer->stat.time) % 100));
|
||||
else
|
||||
NNOM_LOG("\n");
|
||||
}
|
||||
|
||||
void model_stat(nnom_model_t *m)
|
||||
{
|
||||
size_t total_ops = 0;
|
||||
size_t total_time = 0;
|
||||
nnom_layer_t *layer;
|
||||
uint32_t run_num = 0;
|
||||
|
||||
if (!m)
|
||||
return;
|
||||
|
||||
layer = m->head;
|
||||
|
||||
NNOM_LOG("\nPrint running stat..\n");
|
||||
NNOM_LOG("Layer(#) - Time(us) ops(MACs) ops/us \n");
|
||||
NNOM_LOG("--------------------------------------------------------\n");
|
||||
while (layer)
|
||||
{
|
||||
run_num++;
|
||||
NNOM_LOG("#%-3d", run_num);
|
||||
total_ops += layer->stat.macc;
|
||||
total_time += layer->stat.time;
|
||||
layer_stat(layer);
|
||||
if (layer->shortcut == NULL)
|
||||
break;
|
||||
layer = layer->shortcut;
|
||||
}
|
||||
NNOM_LOG("\nSummary:\n");
|
||||
NNOM_LOG("Total ops (MAC): %d", (uint32_t)(total_ops));
|
||||
NNOM_LOG("(%d.%02dM)\n", (uint32_t) (total_ops/(1000*1000)), (uint32_t)(total_ops%(1000*1000)/(10000)));
|
||||
NNOM_LOG("Prediction time :%dus\n", (uint32_t)total_time);
|
||||
if(total_time != 0)
|
||||
NNOM_LOG("Efficiency %d.%02d ops/us\n",
|
||||
(uint32_t)(total_ops / total_time),
|
||||
(uint32_t)((total_ops * 100) / (total_time) % 100));
|
||||
|
||||
NNOM_LOG("Total memory:%d\n", (uint32_t)nnom_mem_stat());
|
||||
}
|
||||
|
||||
void model_io_format(nnom_model_t *m)
|
||||
{
|
||||
nnom_layer_t *layer;
|
||||
uint32_t run_num = 0;
|
||||
|
||||
if (!m)
|
||||
return;
|
||||
|
||||
layer = m->head;
|
||||
|
||||
NNOM_LOG("\nPrint layer input/output..\n");
|
||||
NNOM_LOG("Layer(#) - Input(Qnm) Output(Qnm) Oshape \n");
|
||||
NNOM_LOG("----------------------------------------------------------\n");
|
||||
while (layer)
|
||||
{
|
||||
run_num++;
|
||||
NNOM_LOG("#%-3d", run_num);
|
||||
if(layer->type != NNOM_RNN)
|
||||
NNOM_LOG("%-10s - ", default_layer_names[layer->type]);
|
||||
else
|
||||
{
|
||||
NNOM_LOG("%-3s/", default_layer_names[layer->type]);
|
||||
NNOM_LOG("%-6s - ", default_cell_names[((nnom_rnn_layer_t*)layer)->cell->type]);
|
||||
}
|
||||
NNOM_LOG(" %2d.%2d", 7-layer->in->tensor->q_dec[0], layer->in->tensor->q_dec[0]);
|
||||
NNOM_LOG(" %2d.%2d", 7-layer->out->tensor->q_dec[0], layer->out->tensor->q_dec[0]);
|
||||
NNOM_LOG(" (");
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
if (layer->out->tensor->num_dim > i)
|
||||
NNOM_LOG("%4d,", layer->out->tensor->dim[i]);
|
||||
else
|
||||
NNOM_LOG(" ");
|
||||
}
|
||||
NNOM_LOG(")\n");
|
||||
|
||||
if (layer->shortcut == NULL)
|
||||
break;
|
||||
layer = layer->shortcut;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,369 @@
|
|||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-07-23 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "layers/nnom_activation.h"
|
||||
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
#include "arm_math.h"
|
||||
#include "arm_nnfunctions.h"
|
||||
#endif
|
||||
|
||||
nnom_layer_t *Activation(nnom_activation_t *act)
|
||||
{
|
||||
nnom_activation_layer_t *layer;
|
||||
nnom_layer_io_t *in, *out;
|
||||
|
||||
// apply a block memory for all the sub handles.
|
||||
size_t mem_size = sizeof(nnom_activation_layer_t) + sizeof(nnom_layer_io_t) * 2;
|
||||
layer = nnom_mem(mem_size);
|
||||
if (layer == NULL)
|
||||
return NULL;
|
||||
|
||||
// distribut the memory to sub handles.
|
||||
in = (void *)((uint8_t*)layer + sizeof(nnom_activation_layer_t));
|
||||
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
|
||||
|
||||
// set type in layer parent
|
||||
layer->super.type = NNOM_ACTIVATION;
|
||||
layer->super.run = activation_run;
|
||||
layer->super.build = default_build;
|
||||
// set buf state
|
||||
in->type = NNOM_TENSOR_BUF_TEMP;
|
||||
out->type = NNOM_TENSOR_BUF_NULL; // when a layer's io is set to NULL, both will point to same mem.
|
||||
// put in & out on the layer.
|
||||
layer->super.in = io_init(layer, in);
|
||||
layer->super.out = io_init(layer, out);
|
||||
|
||||
// set activation to layer
|
||||
layer->act = act;
|
||||
|
||||
// set free method
|
||||
layer->super.free = activation_free;
|
||||
|
||||
return (nnom_layer_t *)layer;
|
||||
}
|
||||
|
||||
nnom_layer_t *ReLU(void)
|
||||
{
|
||||
nnom_layer_t *layer = Activation(act_relu());
|
||||
if (layer == NULL)
|
||||
return NULL;
|
||||
|
||||
// set type in layer parent
|
||||
layer->type = NNOM_RELU;
|
||||
return layer;
|
||||
}
|
||||
|
||||
nnom_layer_t *LeakyReLU(float alpha)
|
||||
{
|
||||
nnom_layer_t *layer = Activation(act_leaky_relu(alpha));
|
||||
if (layer == NULL)
|
||||
return NULL;
|
||||
|
||||
// set type in layer parent
|
||||
layer->type = NNOM_LEAKY_RELU;
|
||||
return layer;
|
||||
}
|
||||
|
||||
nnom_layer_t *AdvReLU(float alpha, float max, float threshold)
|
||||
{
|
||||
nnom_layer_t *layer = Activation(act_adv_relu(alpha, max, threshold));
|
||||
if (layer == NULL)
|
||||
return NULL;
|
||||
|
||||
// set type in layer parent
|
||||
layer->type = NNOM_ADV_RELU;
|
||||
return layer;
|
||||
}
|
||||
|
||||
nnom_layer_t *Sigmoid(int32_t dec_bit)
|
||||
{
|
||||
nnom_layer_t *layer = Activation(act_sigmoid(dec_bit));
|
||||
if (layer == NULL)
|
||||
return NULL;
|
||||
|
||||
// set type in layer parent
|
||||
layer->type = NNOM_SIGMOID;
|
||||
return layer;
|
||||
}
|
||||
|
||||
nnom_layer_t *TanH(int32_t dec_bit)
|
||||
{
|
||||
nnom_layer_t *layer = Activation(act_tanh(dec_bit));
|
||||
if (layer == NULL)
|
||||
return NULL;
|
||||
// set type in layer parent
|
||||
layer->type = NNOM_TANH;
|
||||
return layer;
|
||||
}
|
||||
|
||||
void act_delete(nnom_activation_t* act){
|
||||
nnom_free(act);
|
||||
}
|
||||
|
||||
// activation takes act instance which is created. therefore, it must be free when activation is deleted.
|
||||
// this is the callback in layer->free
|
||||
nnom_status_t activation_free(nnom_layer_t *layer)
|
||||
{
|
||||
if(layer)
|
||||
act_delete(((nnom_activation_layer_t *)layer)->act);
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
nnom_status_t activation_run(nnom_layer_t *layer)
|
||||
{
|
||||
nnom_activation_layer_t *cl = (nnom_activation_layer_t *)layer;
|
||||
return act_tensor_run(cl->act, layer->in->tensor);
|
||||
}
|
||||
|
||||
// porting
|
||||
static nnom_status_t relu_run(nnom_activation_t* act)
|
||||
{
|
||||
if(act->tensor->bitwidth == 16)
|
||||
{
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
arm_relu_q15(act->tensor->p_data, tensor_size(act->tensor));
|
||||
#else
|
||||
local_relu_q15(act->tensor->p_data, tensor_size(act->tensor));
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
arm_relu_q7(act->tensor->p_data, tensor_size(act->tensor));
|
||||
#else
|
||||
local_relu_q7(act->tensor->p_data, tensor_size(act->tensor));
|
||||
#endif
|
||||
}
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
// leaky relu
|
||||
static nnom_status_t leaky_relu_run(nnom_activation_t* act)
|
||||
{
|
||||
nnom_activation_leaky_relu_t* a = (nnom_activation_leaky_relu_t*) act;
|
||||
if(act->tensor->bitwidth == 16)
|
||||
local_leaky_relu_q15(act->tensor->p_data, a->alpha, tensor_size(act->tensor));
|
||||
else
|
||||
local_leaky_relu_q7(act->tensor->p_data, a->alpha, tensor_size(act->tensor));
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
// advance relu
|
||||
static nnom_status_t adv_relu_run(nnom_activation_t* act)
|
||||
{
|
||||
nnom_activation_adv_relu_t* a = (nnom_activation_adv_relu_t*) act;
|
||||
|
||||
// we need to convert float to fixpoint in runtime where we can know the tensor's q format
|
||||
if(act->tensor->bitwidth == 16)
|
||||
{
|
||||
q15_t max = 32767;
|
||||
q15_t threshold = MIN(a->threshold * (1 << (15 - act->tensor->q_dec[0])), 32767);
|
||||
q7_t max_scale = (1 << (15 - act->tensor->q_dec[0]));
|
||||
if(a->max != INFINITY && a->max != 0x7fc00000)
|
||||
if(a->max * max_scale < max)
|
||||
max = a->max * max_scale;
|
||||
local_adv_relu_q15(act->tensor->p_data, a->negative_slope, max, threshold, tensor_size(act->tensor));
|
||||
}
|
||||
// 8bit
|
||||
else
|
||||
{
|
||||
q7_t max = 127;
|
||||
q7_t threshold = MIN(a->threshold * (1 << (7 - act->tensor->q_dec[0])), 127);
|
||||
q7_t max_scale = (1 << (7 - act->tensor->q_dec[0]));
|
||||
if(a->max != INFINITY && a->max != 0x7fc00000) // QNAN 0x7fc00000 also represent infinity in script 0.4.1
|
||||
if(a->max * max_scale < max)
|
||||
max = a->max * max_scale;
|
||||
local_adv_relu_q7(act->tensor->p_data, a->negative_slope, max, threshold, tensor_size(act->tensor));
|
||||
}
|
||||
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
static nnom_status_t tanh_run(nnom_activation_t* act)
|
||||
{
|
||||
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
|
||||
// 16 bit
|
||||
if(act->tensor->bitwidth == 16)
|
||||
{
|
||||
uint8_t int_bit = 15 - a->dec_bit;
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
arm_nn_activations_direct_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_TANH);
|
||||
#else
|
||||
local_tanh_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit);
|
||||
#endif
|
||||
}
|
||||
else // 8bit
|
||||
{
|
||||
uint8_t int_bit = 7 - a->dec_bit;
|
||||
// arm version cannot handle int_bit > 3
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
if(act->tensor->q_dec[0] <= 3)
|
||||
arm_nn_activations_direct_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_TANH);
|
||||
else
|
||||
#endif
|
||||
local_tanh_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit);
|
||||
}
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
static nnom_status_t sigmoid_run( nnom_activation_t* act)
|
||||
{
|
||||
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
|
||||
// 16 bit
|
||||
if(act->tensor->bitwidth == 16)
|
||||
{
|
||||
uint8_t int_bit = 15 - a->dec_bit;
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
arm_nn_activations_direct_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_SIGMOID);
|
||||
#else
|
||||
local_sigmoid_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit);
|
||||
#endif
|
||||
}
|
||||
else // 8bit
|
||||
{
|
||||
uint8_t int_bit = 7 - a->dec_bit;
|
||||
// arm version cannot handle int_bit > 3
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
if(act->tensor->q_dec[0] <= 3)
|
||||
arm_nn_activations_direct_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_TANH);
|
||||
else
|
||||
#endif
|
||||
local_sigmoid_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit);
|
||||
}
|
||||
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
static nnom_status_t hard_tanh_run( nnom_activation_t* act)
|
||||
{
|
||||
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
|
||||
if(act->tensor->bitwidth == 16)
|
||||
local_hard_tanh_q15(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit + 8); // a->dec is based on 8 bit.
|
||||
else
|
||||
local_hard_tanh_q7(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit);
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
static nnom_status_t hard_sigmoid_run( nnom_activation_t* act)
|
||||
{
|
||||
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
|
||||
if(act->tensor->bitwidth == 16)
|
||||
local_hard_sigmoid_q15(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit + 8); // a->dec is based on 8 bit.
|
||||
else
|
||||
local_hard_sigmoid_q7(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit);
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
//
|
||||
nnom_activation_t* act_relu(void)
|
||||
{
|
||||
nnom_activation_t* act = nnom_mem(sizeof(nnom_activation_t));
|
||||
act->run = relu_run;
|
||||
act->type = ACT_RELU;
|
||||
return act;
|
||||
}
|
||||
|
||||
nnom_activation_t* act_leaky_relu(float alpha)
|
||||
{
|
||||
nnom_activation_leaky_relu_t* act = nnom_mem(sizeof(nnom_activation_leaky_relu_t));
|
||||
act->super.run = leaky_relu_run;
|
||||
act->super.type = ACT_LEAKY_RELU;
|
||||
act->alpha = (q7_t)(alpha*128);
|
||||
return (nnom_activation_t* )act;
|
||||
}
|
||||
|
||||
nnom_activation_t* act_adv_relu(float negative_slope, float max, float threshold)
|
||||
{
|
||||
nnom_activation_adv_relu_t* act = nnom_mem(sizeof(nnom_activation_adv_relu_t));
|
||||
act->super.run = adv_relu_run;
|
||||
act->super.type = ACT_ADV_RELU;
|
||||
act->negative_slope = (q7_t)(negative_slope*128);
|
||||
act->max = max;
|
||||
act->threshold = threshold;
|
||||
return (nnom_activation_t* )act;
|
||||
}
|
||||
|
||||
nnom_activation_t* act_tanh(int32_t dec_bit)
|
||||
{
|
||||
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
|
||||
act->super.run = tanh_run;
|
||||
act->super.type = ACT_TANH;
|
||||
act->dec_bit = dec_bit;
|
||||
return (nnom_activation_t*)act;
|
||||
}
|
||||
|
||||
nnom_activation_t* act_sigmoid(int32_t dec_bit)
|
||||
{
|
||||
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
|
||||
|
||||
act->super.run = sigmoid_run;
|
||||
act->super.type = ACT_SIGMOID;
|
||||
act->dec_bit = dec_bit;
|
||||
return (nnom_activation_t*)act;
|
||||
}
|
||||
|
||||
nnom_activation_t* act_hard_tanh(int32_t dec_bit)
|
||||
{
|
||||
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
|
||||
|
||||
act->super.run = hard_tanh_run;
|
||||
act->super.type = ACT_HARD_TANH;
|
||||
act->dec_bit = dec_bit;
|
||||
return (nnom_activation_t*)act;
|
||||
}
|
||||
|
||||
nnom_activation_t* act_hard_sigmoid(int32_t dec_bit)
|
||||
{
|
||||
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
|
||||
|
||||
act->super.run = hard_sigmoid_run;
|
||||
act->super.type = ACT_HARD_SIGMOID;
|
||||
act->dec_bit = dec_bit;
|
||||
return (nnom_activation_t*)act;
|
||||
}
|
||||
|
||||
// return the decimal bit if the activation will change the q format of the layer.
|
||||
int32_t act_get_dec_bit(nnom_activation_type_t type, int32_t dec_bit)
|
||||
{
|
||||
switch(type)
|
||||
{
|
||||
case ACT_RELU:
|
||||
case ACT_LEAKY_RELU:
|
||||
case ACT_ADV_RELU:
|
||||
break;
|
||||
case ACT_TANH:
|
||||
case ACT_HARD_TANH:
|
||||
case ACT_SIGMOID:
|
||||
case ACT_HARD_SIGMOID:
|
||||
dec_bit = 7;
|
||||
default:break;
|
||||
}
|
||||
return dec_bit;
|
||||
}
|
||||
|
||||
// a direct api to run activate a tensor
|
||||
nnom_status_t act_tensor_run(nnom_activation_t* act, nnom_tensor_t* tensor)
|
||||
{
|
||||
act->tensor = tensor;
|
||||
return act->run(act);
|
||||
}
|
|
@ -0,0 +1,167 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-07-23 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "layers/nnom_avgpool.h"
|
||||
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
#include "arm_math.h"
|
||||
#include "arm_nnfunctions.h"
|
||||
#endif
|
||||
|
||||
nnom_layer_t *avgpool_s(const nnom_pool_config_t * config)
|
||||
{
|
||||
nnom_avgpool_layer_t *cl;
|
||||
|
||||
if(config->num_dim == 1)
|
||||
{
|
||||
cl = (nnom_avgpool_layer_t *)AvgPool(kernel(1, config->kernel_size[0]),
|
||||
stride(1, config->stride_size[0]),
|
||||
config->padding_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
cl = (nnom_avgpool_layer_t *)AvgPool(kernel(config->kernel_size[0], config->kernel_size[1]),
|
||||
stride(config->stride_size[0], config->stride_size[1]),
|
||||
config->padding_type);
|
||||
}
|
||||
|
||||
if(cl)
|
||||
{
|
||||
cl->super.config = (void*) config;
|
||||
cl->output_shift = config->output_shift; // no idea if we need it
|
||||
}
|
||||
return (nnom_layer_t *)cl;
|
||||
}
|
||||
|
||||
nnom_layer_t *AvgPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type)
|
||||
{
|
||||
nnom_layer_t *layer = MaxPool(k, s, pad_type);
|
||||
|
||||
if (layer != NULL)
|
||||
{
|
||||
layer->type = NNOM_AVGPOOL;
|
||||
layer->run = avgpool_run;
|
||||
layer->build = avgpool_build;
|
||||
}
|
||||
return (nnom_layer_t *)layer;
|
||||
}
|
||||
|
||||
nnom_status_t avgpool_build(nnom_layer_t *layer)
|
||||
{
|
||||
uint32_t size;
|
||||
// avg pooling share the same output shape, stride, padding setting.
|
||||
maxpool_build(layer);
|
||||
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
// however, avg pooling require a computational buffer.
|
||||
// bufferA size: 2*dim_im_out*ch_im_in
|
||||
size = layer->out->tensor->dim[1] > layer->out->tensor->dim[0] ?
|
||||
layer->out->tensor->dim[1] : layer->out->tensor->dim[0];
|
||||
layer->comp->size = 2 * size * layer->in->tensor->dim[2];
|
||||
#endif
|
||||
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
nnom_status_t avgpool_run(nnom_layer_t *layer)
|
||||
{
|
||||
nnom_avgpool_layer_t *cl = (nnom_avgpool_layer_t *)(layer);
|
||||
uint16_t out_x, out_y;
|
||||
// if global pooling
|
||||
if(layer->out->tensor->num_dim == 1)
|
||||
{
|
||||
out_x = 1; out_y = 1;
|
||||
}
|
||||
else // normal pooling.
|
||||
{
|
||||
out_x = layer->out->tensor->dim[1]; //W
|
||||
out_y = layer->out->tensor->dim[0]; //h
|
||||
}
|
||||
|
||||
// 16 bit
|
||||
if(layer->in->tensor->bitwidth == 16)
|
||||
{
|
||||
#ifdef NNOM_USING_CHW
|
||||
local_avepool_q15_CHW(layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h,
|
||||
cl->pad.w, cl->pad.h,
|
||||
cl->stride.w, cl->stride.h,
|
||||
out_x, out_y,
|
||||
cl->output_shift,
|
||||
NULL,
|
||||
layer->out->tensor->p_data);
|
||||
#else
|
||||
local_avepool_q15_HWC(layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h,
|
||||
cl->pad.w, cl->pad.h,
|
||||
cl->stride.w, cl->stride.h,
|
||||
out_x, out_y,
|
||||
cl->output_shift,
|
||||
NULL,
|
||||
layer->out->tensor->p_data);
|
||||
#endif
|
||||
}
|
||||
// 8bit
|
||||
else{
|
||||
#ifdef NNOM_USING_CHW
|
||||
local_avepool_q7_CHW(layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h,
|
||||
cl->pad.w, cl->pad.h,
|
||||
cl->stride.w, cl->stride.h,
|
||||
out_x, out_y,
|
||||
cl->output_shift,
|
||||
NULL,
|
||||
layer->out->tensor->p_data);
|
||||
#else //end of CHW
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
// 2D, square
|
||||
if (layer->in->tensor->dim[1] == layer->in->tensor->dim[0] &&
|
||||
layer->out->tensor->dim[1] == layer->out->tensor->dim[0] &&
|
||||
cl->output_shift == 0)
|
||||
{
|
||||
arm_avepool_q7_HWC(
|
||||
layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[2],
|
||||
cl->kernel.w, cl->pad.w, cl->stride.w,
|
||||
layer->out->tensor->dim[1],
|
||||
layer->comp->mem->blk,
|
||||
layer->out->tensor->p_data);
|
||||
}
|
||||
// none square 2D, or 1D
|
||||
else
|
||||
#endif
|
||||
{
|
||||
// CMSIS-NN does not support none-square pooling, we have to use local implementation
|
||||
local_avepool_q7_HWC(layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h,
|
||||
cl->pad.w, cl->pad.h,
|
||||
cl->stride.w, cl->stride.h,
|
||||
out_x, out_y,
|
||||
cl->output_shift,
|
||||
NULL,
|
||||
layer->out->tensor->p_data);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return NN_SUCCESS;
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-07-23 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "layers/nnom_baselayer.h"
|
||||
|
||||
// this layer copys the input to the output
|
||||
|
||||
nnom_layer_t *baselayer_s(const nnom_layer_config_t * config)
|
||||
{
|
||||
nnom_layer_t *layer = BaseLayer();
|
||||
if(layer)
|
||||
layer->config = (void*) config;
|
||||
return layer;
|
||||
}
|
||||
|
||||
nnom_layer_t *BaseLayer()
|
||||
{
|
||||
nnom_io_layer_t *layer;
|
||||
nnom_layer_io_t *in, *out;
|
||||
|
||||
// apply a block memory for all the sub handles.
|
||||
size_t mem_size = sizeof(nnom_io_layer_t) + sizeof(nnom_layer_io_t) * 2;
|
||||
layer = nnom_mem(mem_size);
|
||||
if (layer == NULL)
|
||||
return NULL;
|
||||
|
||||
// distribut the memory to sub handles.
|
||||
in = (void *)((uint8_t*)layer + sizeof(nnom_io_layer_t));
|
||||
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
|
||||
|
||||
// set type in layer parent
|
||||
layer->super.type = NNOM_BASE;
|
||||
layer->super.run = default_run;
|
||||
layer->super.build = default_build;
|
||||
// set buf state
|
||||
in->type = NNOM_TENSOR_BUF_TEMP;
|
||||
out->type = NNOM_TENSOR_BUF_NULL;
|
||||
// put in & out on the layer.
|
||||
layer->super.in = io_init(layer, in);
|
||||
layer->super.out = io_init(layer, out);
|
||||
|
||||
return (nnom_layer_t *)layer;
|
||||
}
|
||||
|
||||
// this is call while output shape is not defined.
|
||||
// this will set the output shape same as input shape, and it set only the primary IO
|
||||
// this cannot be used as first layer, of course...
|
||||
nnom_status_t default_build(nnom_layer_t *layer)
|
||||
{
|
||||
// get the last layer's output as input shape
|
||||
layer->in->tensor = layer->in->hook.io->tensor;
|
||||
// output tensor
|
||||
// 1. allocate a new tensor for output
|
||||
// 2. set the same dim, qfmt to the new tensor.
|
||||
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR,layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
|
||||
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
|
||||
|
||||
// see if the activation will change the q format
|
||||
if(layer->actail)
|
||||
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
|
||||
|
||||
// now this build has passed the input tensors (shapes, formats) to the new tensors.
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
// simply copy input to output
|
||||
nnom_status_t default_run(nnom_layer_t *layer)
|
||||
{
|
||||
if(layer->out->type != NNOM_TENSOR_BUF_NULL)
|
||||
{
|
||||
nnom_memcpy(layer->out->tensor->p_data, layer->in->tensor->p_data, tensor_size_byte(layer->in->tensor));
|
||||
}
|
||||
return NN_SUCCESS;
|
||||
}
|
|
@ -0,0 +1,223 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-07-23 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "layers/nnom_concat.h"
|
||||
|
||||
nnom_layer_t *concat_s(const nnom_concat_config_t *config)
|
||||
{
|
||||
nnom_layer_t* layer = Concat(config->axis);
|
||||
if(layer)
|
||||
layer->config = (void*) config;
|
||||
return layer;
|
||||
}
|
||||
|
||||
// concate method
|
||||
// concate requires more than one input module. aux input will be allocated in model.merge()
|
||||
nnom_layer_t *Concat(int8_t axis)
|
||||
{
|
||||
nnom_concat_layer_t *layer;
|
||||
nnom_layer_io_t *in, *out;
|
||||
size_t mem_size;
|
||||
|
||||
// apply a block memory for all the sub handles.
|
||||
mem_size = sizeof(nnom_concat_layer_t) + sizeof(nnom_layer_io_t) * 2;
|
||||
layer = nnom_mem(mem_size);
|
||||
if (layer == NULL)
|
||||
return NULL;
|
||||
|
||||
// distribut the memory to sub handles.
|
||||
in = (void *)((uint8_t*)layer + sizeof(nnom_concat_layer_t));
|
||||
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
|
||||
|
||||
// set type in layer parent
|
||||
layer->super.type = NNOM_CONCAT;
|
||||
layer->super.run = concat_run;
|
||||
layer->super.build = concat_build;
|
||||
// set buf state
|
||||
in->type = NNOM_TENSOR_BUF_TEMP;
|
||||
out->type = NNOM_TENSOR_BUF_TEMP;
|
||||
// put in & out on the layer.
|
||||
layer->super.in = io_init(layer, in);
|
||||
layer->super.out = io_init(layer, out);
|
||||
|
||||
// axis
|
||||
layer->axis = axis;
|
||||
|
||||
return (nnom_layer_t *)layer;
|
||||
}
|
||||
|
||||
|
||||
nnom_status_t concat_build(nnom_layer_t *layer)
|
||||
{
|
||||
nnom_concat_layer_t *cl = (nnom_concat_layer_t *)layer;
|
||||
nnom_layer_io_t *in;
|
||||
uint32_t in_num = 0;
|
||||
int32_t num_dim;
|
||||
|
||||
// for each input module, copy the shape from the output of last layer
|
||||
in = layer->in;
|
||||
while (in != NULL)
|
||||
{
|
||||
//get the last layer's output as input shape
|
||||
in->tensor = in->hook.io->tensor;
|
||||
in = in->aux;
|
||||
in_num++;
|
||||
}
|
||||
|
||||
// allocate new tensor for output, keep the same dimension lenght
|
||||
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
|
||||
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
|
||||
|
||||
// convert the axis.
|
||||
if (cl->axis < 0)
|
||||
cl->axis = (layer->in->tensor->num_dim + cl->axis);
|
||||
else if (cl->axis >0)
|
||||
cl->axis = cl->axis -1; // keras use axis start from 1. we are using 0, 1, 2 (check?)
|
||||
|
||||
// find out the concated axis
|
||||
num_dim = layer->in->tensor->num_dim;
|
||||
for (uint32_t i = 0; i < num_dim; i ++)
|
||||
{
|
||||
// exclue the concat axies
|
||||
if (i == cl->axis)
|
||||
{
|
||||
layer->out->tensor->dim[i] = 0;
|
||||
|
||||
// add the same axis from all input up.
|
||||
in = layer->in;
|
||||
while (in != NULL)
|
||||
{
|
||||
layer->out->tensor->dim[i] += in->tensor->dim[i];
|
||||
in = in->aux;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// check others, all other must be same shape
|
||||
in = layer->in;
|
||||
while (in != NULL && in->aux != NULL)
|
||||
{
|
||||
if (in->tensor->dim[i] != in->aux->tensor->dim[i])
|
||||
return NN_ARGUMENT_ERROR;
|
||||
in = in->aux;
|
||||
}
|
||||
|
||||
// now set other axis
|
||||
layer->out->tensor->dim[i] = layer->in->tensor->dim[i];
|
||||
}
|
||||
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
#ifdef NNOM_USING_CHW
|
||||
// axis index converter between HWC and CHW
|
||||
static inline int chw_i(int hwc, int num_dim)
|
||||
{
|
||||
num_dim = num_dim -1;
|
||||
hwc = hwc + 1;
|
||||
if(hwc>num_dim)
|
||||
hwc = 0;
|
||||
return hwc;
|
||||
}
|
||||
static inline int hwc_i(int chw, int num_dim)
|
||||
{
|
||||
num_dim = num_dim -1;
|
||||
chw = chw - 1;
|
||||
if(chw<num_dim)
|
||||
chw = num_dim;
|
||||
return chw;
|
||||
}
|
||||
#endif
|
||||
|
||||
nnom_status_t concat_run(nnom_layer_t *layer)
|
||||
{
|
||||
// by default, concat layer has mutiple (>=2) input and 1 output.
|
||||
nnom_concat_layer_t *cl = (nnom_concat_layer_t *)layer;
|
||||
nnom_layer_io_t *in;
|
||||
uint32_t dwidth = layer->in->tensor->bitwidth/8; // data width in byte
|
||||
|
||||
#ifdef NNOM_USING_CHW
|
||||
// Concatenate for HWC
|
||||
uint8_t *pin;
|
||||
uint8_t *pout = layer->out->tensor->p_data;
|
||||
uint32_t block_size;
|
||||
uint32_t n_block;
|
||||
uint8_t num_dim = layer->in->tensor->num_dim;
|
||||
|
||||
// calcualte number of block to concat. the other shapes before the concat axis
|
||||
n_block = 1;
|
||||
for(int i= 0; i< chw_i(cl->axis, num_dim); i++)
|
||||
{
|
||||
n_block *= layer->in->tensor->dim[hwc_i(i, num_dim)];
|
||||
}
|
||||
|
||||
// concat all input layers
|
||||
for(int i=0; i<n_block; i++)
|
||||
{
|
||||
in = layer->in;
|
||||
while (in != NULL)
|
||||
{
|
||||
// the block size of concat data in this layer
|
||||
block_size = dwidth;
|
||||
for(int j= num_dim-1; j >= chw_i(cl->axis, num_dim); j--)
|
||||
block_size *= in->tensor->dim[hwc_i(j, num_dim)];
|
||||
// concat
|
||||
pin = (uint8_t *)in->tensor->p_data + i * block_size;
|
||||
nnom_memcpy(pout, pin, block_size);
|
||||
pout += block_size;
|
||||
in = in->aux;
|
||||
}
|
||||
}
|
||||
|
||||
#else // end of CHW concate
|
||||
|
||||
// Concatenate for HWC
|
||||
uint8_t* pin;
|
||||
uint8_t* pout = layer->out->tensor->p_data;
|
||||
uint32_t block_size;
|
||||
uint32_t n_block;
|
||||
uint8_t num_dim = layer->in->tensor->num_dim;
|
||||
|
||||
// calcualte the number of block to concat. (the other shapes before the concat axis)
|
||||
n_block = 1;
|
||||
for (int i = 0; i < cl->axis; i++)
|
||||
n_block *= layer->in->tensor->dim[i];
|
||||
|
||||
// concat all input layers
|
||||
for (int i = 0; i < n_block; i++)
|
||||
{
|
||||
in = layer->in;
|
||||
while (in != NULL)
|
||||
{
|
||||
// the block size of concat data in this layer
|
||||
block_size = dwidth;
|
||||
for (int j = cl->axis; j < num_dim; j++)
|
||||
block_size *= in->tensor->dim[j];
|
||||
// concat
|
||||
pin = (uint8_t*)in->tensor->p_data + i * block_size;
|
||||
nnom_memcpy(pout, pin, block_size);
|
||||
pout += block_size;
|
||||
in = in->aux;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,434 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020
|
||||
* Jianjia Ma
|
||||
* majianjia@live.com
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Change Logs:
|
||||
* Date Author Notes
|
||||
* 2019-07-23 Jianjia Ma The first version
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nnom.h"
|
||||
#include "nnom_local.h"
|
||||
#include "nnom_layers.h"
|
||||
#include "layers/nnom_conv2d.h"
|
||||
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
#include "arm_math.h"
|
||||
#include "arm_nnfunctions.h"
|
||||
#endif
|
||||
|
||||
// a machine friendly api, with suffix _s for structured configuration.
|
||||
nnom_layer_t *conv2d_s(const nnom_conv2d_config_t *config)
|
||||
{
|
||||
nnom_conv2d_layer_t *layer;
|
||||
nnom_buf_t *comp;
|
||||
nnom_layer_io_t *in, *out;
|
||||
size_t mem_size;
|
||||
|
||||
// allocate a block memory for all the sub handles and shifts.
|
||||
mem_size = sizeof(nnom_conv2d_layer_t) + sizeof(nnom_layer_io_t) * 2 + sizeof(nnom_buf_t);
|
||||
layer = nnom_mem(mem_size);
|
||||
if (layer == NULL)
|
||||
return NULL;
|
||||
|
||||
// distribut the memory to sub handles.
|
||||
in = (void *)((uint8_t*)layer + sizeof(nnom_conv2d_layer_t));
|
||||
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
|
||||
comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
|
||||
|
||||
// set type in layer parent
|
||||
layer->super.type = NNOM_CONV_2D;
|
||||
// set buf state
|
||||
in->type = NNOM_TENSOR_BUF_TEMP;
|
||||
out->type = NNOM_TENSOR_BUF_TEMP;
|
||||
comp->type = NNOM_TENSOR_BUF_TEMP;
|
||||
// put in & out on the layer.
|
||||
layer->super.in = io_init(layer, in);
|
||||
layer->super.out = io_init(layer, out);
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
layer->super.comp = comp;
|
||||
#endif
|
||||
// set run method & output shape
|
||||
layer->super.run = conv2d_run;
|
||||
layer->super.build = conv2d_build;
|
||||
layer->super.free = conv2d_free;
|
||||
|
||||
// save the config
|
||||
layer->super.config = (void*) config;
|
||||
|
||||
// get the private parameters
|
||||
// test: for 1d input, expend h = 1
|
||||
if(config->weight->num_dim == 3)
|
||||
{
|
||||
layer->kernel = kernel(1, config->kernel_size[0]);
|
||||
layer->stride = stride(1, config->stride_size[0]);
|
||||
layer->dilation = dilation(1, config->dilation_size[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
layer->kernel = kernel(config->kernel_size[0], config->kernel_size[1]);
|
||||
layer->stride = stride(config->stride_size[0], config->stride_size[1]);
|
||||
layer->dilation = dilation(config->dilation_size[0], config->dilation_size[1]);
|
||||
}
|
||||
|
||||
layer->filter_mult = config->filter_size; // for convs, this means filter number
|
||||
layer->padding_type = config->padding_type;
|
||||
|
||||
// get bias and weight tensor, this should be created by script.
|
||||
layer->weight = config->weight;
|
||||
layer->bias = config->bias;
|
||||
|
||||
// get shifts
|
||||
layer->output_rshift = (nnom_qformat_param_t *)config->output_shift;
|
||||
layer->bias_lshift = (nnom_qformat_param_t *)config->bias_shift;
|
||||
|
||||
// padding
|
||||
if (layer->padding_type == PADDING_SAME)
|
||||
{
|
||||
layer->pad.h = layer->dilation.h * (layer->kernel.h - 1) / 2;
|
||||
layer->pad.w = layer->dilation.w * (layer->kernel.w - 1) / 2;
|
||||
layer->pad.c = (1 - 1) / 2;
|
||||
}
|
||||
|
||||
return (nnom_layer_t *)layer;
|
||||
}
|
||||
|
||||
|
||||
// Conv2D
|
||||
// multiplier of (output/input channel),
|
||||
// shape of kernal, shape of strides, weight struct, bias struct
|
||||
nnom_layer_t *Conv2D(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
|
||||
const nnom_weight_t *w, const nnom_bias_t *b)
|
||||
{
|
||||
nnom_conv2d_layer_t *layer;
|
||||
nnom_buf_t *comp;
|
||||
nnom_layer_io_t *in, *out;
|
||||
// apply a block memory for all the sub handles.
|
||||
size_t mem_size = sizeof(nnom_conv2d_layer_t) + sizeof(nnom_layer_io_t) * 2 + sizeof(nnom_buf_t);
|
||||
layer = nnom_mem(mem_size);
|
||||
if (layer == NULL)
|
||||
return NULL;
|
||||
|
||||
// distribut the memory to sub handles.
|
||||
in = (void *)((uint8_t*)layer + sizeof(nnom_conv2d_layer_t));
|
||||
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
|
||||
comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
|
||||
|
||||
// set type in layer parent
|
||||
layer->super.type = NNOM_CONV_2D;
|
||||
// set buf state
|
||||
in->type = NNOM_TENSOR_BUF_TEMP;
|
||||
out->type = NNOM_TENSOR_BUF_TEMP;
|
||||
comp->type = NNOM_TENSOR_BUF_TEMP;
|
||||
// put in & out on the layer.
|
||||
layer->super.in = io_init(layer, in);
|
||||
layer->super.out = io_init(layer, out);
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
layer->super.comp = comp;
|
||||
#endif
|
||||
// set run method & output shape
|
||||
layer->super.run = conv2d_run;
|
||||
layer->super.build = conv2d_build;
|
||||
|
||||
// get the private parameters
|
||||
layer->kernel = k;
|
||||
layer->stride = s;
|
||||
layer->dilation = d;
|
||||
layer->filter_mult = filters; // for convs, this means filter number
|
||||
layer->padding_type = pad_type;
|
||||
|
||||
// create weight and bias tensor
|
||||
layer->weight = new_tensor(NNOM_QTYPE_PER_TENSOR, 4, filters);
|
||||
layer->bias = new_tensor(NNOM_QTYPE_PER_TENSOR, 1, filters);
|
||||
|
||||
// configure weight tensor manually to support new tensor based backends.
|
||||
// needs to be very careful
|
||||
{
|
||||
// config weight
|
||||
nnom_shape_data_t dim[4] = {k.h, k.w, k.c, filters};
|
||||
*(layer->weight->q_offset) = 0; // we have no support of offset here
|
||||
*(layer->weight->q_dec) = 0; // not using it
|
||||
layer->weight->p_data = (void*)w->p_value;
|
||||
layer->weight->bitwidth = 8;
|
||||
layer->weight->qtype = NNOM_QTYPE_PER_TENSOR;
|
||||
nnom_memcpy(layer->weight->dim, dim, layer->weight->num_dim * sizeof(nnom_shape_data_t));
|
||||
|
||||
// config bias
|
||||
dim[0] = filters;
|
||||
*(layer->bias->q_offset) = 0; // we have no support of offset here
|
||||
*(layer->bias->q_dec) = 0; // not using it
|
||||
layer->bias->p_data = (void*) b->p_value;
|
||||
layer->bias->bitwidth = 8;
|
||||
layer->weight->qtype = NNOM_QTYPE_PER_TENSOR;
|
||||
nnom_memcpy(layer->bias->dim, dim, layer->bias->num_dim * sizeof(nnom_shape_data_t));
|
||||
|
||||
// output shift and bias shift
|
||||
layer->output_rshift = (nnom_qformat_param_t *)&w->shift;
|
||||
layer->bias_lshift = (nnom_qformat_param_t *)&b->shift;
|
||||
}
|
||||
|
||||
return (nnom_layer_t *)layer;
|
||||
}
|
||||
|
||||
// keras's implementation.
|
||||
// source: https://github.com/keras-team/keras/blob/7a39b6c62d43c25472b2c2476bd2a8983ae4f682/keras/utils/conv_utils.py#L85
|
||||
uint32_t conv_output_length(uint32_t input_length, uint32_t filter_size, nnom_padding_t padding, uint32_t stride, uint32_t dilation)
|
||||
{
|
||||
if (input_length == 0)
|
||||
return 0;
|
||||
uint32_t dilated_filter_size = (filter_size - 1) * dilation + 1;
|
||||
uint32_t output_length;
|
||||
if(padding == PADDING_SAME)
|
||||
output_length = input_length;
|
||||
else
|
||||
output_length = input_length - dilated_filter_size + 1;
|
||||
return (output_length + stride - 1) / stride;
|
||||
}
|
||||
|
||||
nnom_status_t conv2d_build(nnom_layer_t *layer)
|
||||
{
|
||||
nnom_conv2d_layer_t *cl = (nnom_conv2d_layer_t *)layer;
|
||||
|
||||
// get the tensor from last layer's output
|
||||
layer->in->tensor = layer->in->hook.io->tensor;
|
||||
|
||||
// create new tensor for the output
|
||||
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, cl->filter_mult);
|
||||
// copy then change later.
|
||||
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
|
||||
|
||||
// calculate the output tensor q format, only support per tensor quantise now
|
||||
layer->out->tensor->q_dec[0] = layer->in->tensor->q_dec[0] + cl->weight->q_dec[0] - cl->output_rshift[0]; // need some modification for 16bit.
|
||||
// see if the activation will change the q format
|
||||
if(layer->actail)
|
||||
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
|
||||
|
||||
// now we set up the tensor shape, always HWC format
|
||||
layer->out->tensor->dim[0] = conv_output_length(layer->in->tensor->dim[0], cl->kernel.h, cl->padding_type, cl->stride.h, cl->dilation.h);
|
||||
layer->out->tensor->dim[1] = conv_output_length(layer->in->tensor->dim[1], cl->kernel.w, cl->padding_type, cl->stride.w, cl->dilation.w);
|
||||
layer->out->tensor->dim[2] = cl->filter_mult; // channel stays the same
|
||||
|
||||
// fill padding
|
||||
if (cl->padding_type == PADDING_SAME)
|
||||
{
|
||||
cl->pad.w = cl->dilation.w * (cl->kernel.w - 1) / 2;
|
||||
cl->pad.h = cl->dilation.h * (cl->kernel.h - 1) / 2;
|
||||
cl->pad.c = 0;
|
||||
}
|
||||
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
// bufferA size: (1D shape)
|
||||
// 2*ch_im_in*dim_kernel*dim_kernel
|
||||
layer->comp->size = 2 * 2 * layer->in->tensor->dim[2] * cl->kernel.w * cl->kernel.h;
|
||||
#endif
|
||||
// computational cost: K x K x Cin x Hour x Wout x Cout
|
||||
layer->stat.macc = cl->kernel.w * cl->kernel.h * layer->in->tensor->dim[2] * tensor_size(layer->out->tensor);
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
nnom_status_t conv2d_free(nnom_layer_t *layer)
|
||||
{
|
||||
// free weight and bias tensor when we are not initialised from structured configuration.
|
||||
if(!layer->config)
|
||||
{
|
||||
nnom_conv2d_layer_t* cl = (nnom_conv2d_layer_t*)layer;
|
||||
delete_tensor(cl->weight);
|
||||
delete_tensor(cl->bias);
|
||||
}
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
nnom_status_t conv2d_run(nnom_layer_t *layer)
|
||||
{
|
||||
nnom_conv2d_layer_t *cl = (nnom_conv2d_layer_t *)layer;
|
||||
|
||||
#ifdef NNOM_USING_CHW
|
||||
// CHW format
|
||||
if(layer->in->tensor->bitwidth == 16)
|
||||
local_convolve_CHW_q15_nonsquare(
|
||||
layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data, layer->out->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
|
||||
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
|
||||
layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
|
||||
else
|
||||
local_convolve_CHW_q7_nonsquare(
|
||||
layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data, layer->out->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
|
||||
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
|
||||
layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
|
||||
return NN_SUCCESS;
|
||||
#else
|
||||
// HWC format
|
||||
#ifdef NNOM_USING_CMSIS_NN
|
||||
// current cmsis nn does not support dilation
|
||||
if(cl->dilation.w == 1 && cl->dilation.h == 1 && cl->weight->qtype == NNOM_QTYPE_PER_TENSOR)
|
||||
{
|
||||
// 8 bit cmsis nn
|
||||
if(layer->in->tensor->bitwidth == 8)
|
||||
{
|
||||
//RGB
|
||||
// ch_im_in = 3, w = h
|
||||
if (layer->in->tensor->dim[2] == 3 && layer->in->tensor->dim[0] == layer->in->tensor->dim[1])
|
||||
// squared
|
||||
if((cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
|
||||
return (nnom_status_t)arm_convolve_HWC_q7_RGB(
|
||||
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data,
|
||||
layer->out->tensor->dim[2],
|
||||
cl->kernel.w, cl->pad.w, cl->stride.w,
|
||||
cl->bias->p_data, cl->bias_lshift[0],
|
||||
cl->output_rshift[0], layer->out->tensor->p_data, layer->out->tensor->dim[1],
|
||||
(q15_t *)(layer->comp->mem->blk), NULL);
|
||||
|
||||
// check if can use optimized function
|
||||
// ch_im_in is multiple of 4
|
||||
// ch_im_out is multiple of 2
|
||||
if ((layer->in->tensor->dim[2] % 4 == 0) && (layer->out->tensor->dim[2] % 2 == 0))
|
||||
{
|
||||
// squared
|
||||
if((layer->in->tensor->dim[0] == layer->in->tensor->dim[1])
|
||||
&& (layer->out->tensor->dim[0] == layer->out->tensor->dim[1])
|
||||
&& (cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
|
||||
{
|
||||
// 1x1 fast
|
||||
if (cl->kernel.w == 1 && cl->kernel.h == 1 && cl->stride.w == 1 && cl->stride.h == 1 && cl->pad.w == 0 && cl->pad.h == 0)
|
||||
return (nnom_status_t)arm_convolve_1x1_HWC_q7_fast_nonsquare(
|
||||
layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data,
|
||||
layer->out->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
|
||||
cl->bias->p_data, cl->bias_lshift[0],
|
||||
cl->output_rshift[0], layer->out->tensor->p_data, layer->out->tensor->dim[1], layer->out->tensor->dim[0],
|
||||
(q15_t *)(layer->comp->mem->blk), NULL);
|
||||
// opt square shape
|
||||
else
|
||||
return (nnom_status_t)arm_convolve_HWC_q7_fast(
|
||||
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data,
|
||||
layer->out->tensor->dim[2], cl->kernel.w, cl->pad.w, cl->stride.w,
|
||||
cl->bias->p_data, cl->bias_lshift[0],
|
||||
cl->output_rshift[0], layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], (q15_t *)(layer->comp->mem->blk), NULL);
|
||||
}
|
||||
// opt none square shape
|
||||
else
|
||||
return (nnom_status_t)arm_convolve_HWC_q7_fast_nonsquare(
|
||||
layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data, layer->out->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
|
||||
cl->bias->p_data, cl->bias_lshift[0], cl->output_rshift[0],
|
||||
layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], layer->out->tensor->dim[0], (q15_t *)(layer->comp->mem->blk), NULL);
|
||||
}
|
||||
// none optimized
|
||||
else
|
||||
{
|
||||
// none opt square shape
|
||||
if ((layer->in->tensor->dim[0] == layer->in->tensor->dim[1] &&
|
||||
layer->out->tensor->dim[0] == layer->out->tensor->dim[1]) &&
|
||||
(cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
|
||||
return (nnom_status_t)arm_convolve_HWC_q7_basic(
|
||||
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data,
|
||||
layer->out->tensor->dim[2], cl->kernel.w, cl->pad.w, cl->stride.w,
|
||||
cl->bias->p_data, cl->bias_lshift[0],
|
||||
cl->output_rshift[0], layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], (q15_t *)(layer->comp->mem->blk), NULL);
|
||||
// none opt none square shape
|
||||
else
|
||||
return (nnom_status_t)arm_convolve_HWC_q7_basic_nonsquare(
|
||||
layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data, layer->out->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
|
||||
cl->bias->p_data, cl->bias_lshift[0], cl->output_rshift[0],
|
||||
layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], layer->out->tensor->dim[0], (q15_t *)(layer->comp->mem->blk), NULL);
|
||||
} //end of cmsis-nn none-opt
|
||||
} //end of 8 bit cmsis-nn
|
||||
else if (layer->in->tensor->bitwidth == 16)
|
||||
{
|
||||
// fast opt
|
||||
if ((layer->in->tensor->dim[2] % 2 == 0) && (layer->out->tensor->dim[2] % 2 == 0))
|
||||
{
|
||||
if((layer->in->tensor->dim[0] == layer->in->tensor->dim[1])
|
||||
&& (layer->out->tensor->dim[0] == layer->out->tensor->dim[1])
|
||||
&& (cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
|
||||
return (nnom_status_t)arm_convolve_HWC_q15_fast(
|
||||
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data,
|
||||
layer->out->tensor->dim[2], cl->kernel.w, cl->pad.w, cl->stride.w,
|
||||
cl->bias->p_data, cl->bias_lshift[0],
|
||||
cl->output_rshift[0], layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], (q15_t *)(layer->comp->mem->blk), NULL);
|
||||
else
|
||||
return (nnom_status_t)arm_convolve_HWC_q15_fast_nonsquare(
|
||||
layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data, layer->out->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
|
||||
cl->bias->p_data, cl->bias_lshift[0], cl->output_rshift[0],
|
||||
layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], layer->out->tensor->dim[0], (q15_t *)(layer->comp->mem->blk), NULL);
|
||||
}
|
||||
// none opt basic
|
||||
else
|
||||
{
|
||||
local_convolve_HWC_q7_nonsquare(
|
||||
layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data, layer->out->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
|
||||
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
|
||||
layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
||||
} // end of 16 bit cmsis-nn
|
||||
} // end of dilation == 1
|
||||
else
|
||||
#endif // NNOM_USING_CMSIS_NN
|
||||
{
|
||||
|
||||
if(layer->in->tensor->bitwidth == 16)
|
||||
local_convolve_HWC_q15_nonsquare(
|
||||
layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data, layer->out->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
|
||||
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
|
||||
layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
|
||||
else
|
||||
local_convolve_HWC_q7_nonsquare(
|
||||
layer->in->tensor->p_data,
|
||||
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
|
||||
cl->weight->p_data, layer->out->tensor->dim[2],
|
||||
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
|
||||
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
|
||||
layer->out->tensor->p_data,
|
||||
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
#endif // end of CHW/HWC
|
||||
return NN_SUCCESS;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue