forked from xuos/xiuos
APP_Framework/Applications/:add CMSIS-NN (version 5) cifar10 example
This commit is contained in:
parent
96ca3b18f1
commit
49caa6e533
|
@ -0,0 +1,14 @@
|
|||
# CMSIS-NN cifar10 example
|
||||
|
||||
The model of this example is from [[ARM-software](https://github.com/ARM-software)/**[ML-examples](https://github.com/ARM-software/ML-examples)**] and can be deployed on Arm Cortex-M CPUs using [CMSIS-NN](https://github.com/ARM-software/CMSIS_5).
|
||||
|
||||
## Requirements:
|
||||
- CMSIS-NN in Framework/knowing/cmsis_5
|
||||
- TJpgDec in Framework/knowing/image_processing
|
||||
- Enough stack size (recommend 10240) for finsh thread which can be changed in "RT-Thread Components->Command shell->finsh shell" by menuconfig.
|
||||
|
||||
## To run this demo:
|
||||
- Place the photo where you want
|
||||
- Run demo by type the command
|
||||
```
|
||||
cmsisnn_demo /path/to/photo
|
Binary file not shown.
After Width: | Height: | Size: 1000 B |
|
@ -0,0 +1,157 @@
|
|||
#include <transform.h>
|
||||
#include <tjpgd.h>
|
||||
#include "../model/m4/nn.h"
|
||||
|
||||
#define WORK_POOL_SIZE (4 * 1024 + 32)
|
||||
|
||||
const char *cifar10_label[] = {"Plane", "Car", "Bird", "Cat", "Deer", "Dog", "Frog", "Horse", "Ship", "Truck"};
|
||||
|
||||
int get_top_prediction(q7_t *predictions)
|
||||
{
|
||||
int max_ind = 0;
|
||||
int max_val = -128;
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
if (max_val < predictions[i])
|
||||
{
|
||||
max_val = predictions[i];
|
||||
max_ind = i;
|
||||
}
|
||||
}
|
||||
return max_ind;
|
||||
}
|
||||
|
||||
int cmsisnn_inference(uint8_t *input_data)
|
||||
{
|
||||
q7_t output_data[10];
|
||||
run_nn((q7_t *)input_data, output_data);
|
||||
arm_softmax_q7(output_data, IP1_OUT_DIM, output_data);
|
||||
int top_ind = get_top_prediction(output_data);
|
||||
printf("\rPrediction: %s \r\n", cifar10_label[top_ind]);
|
||||
return top_ind;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
FILE *fp;
|
||||
uint8_t *fbuf;
|
||||
uint16_t wfbuf;
|
||||
} IODEV;
|
||||
|
||||
unsigned int in_func_cmsisnn(JDEC *jd, uint8_t *buff, unsigned int nbyte)
|
||||
{
|
||||
IODEV *dev = (IODEV *)jd->device;
|
||||
|
||||
if (buff)
|
||||
{
|
||||
return (uint16_t)fread(buff, 1, nbyte, dev->fp);
|
||||
}
|
||||
else
|
||||
{
|
||||
return fseek(dev->fp, nbyte, SEEK_CUR) ? 0 : nbyte;
|
||||
}
|
||||
}
|
||||
|
||||
int out_func_cmsisnn(JDEC *jd, void *bitmap, JRECT *rect)
|
||||
{
|
||||
IODEV *dev = (IODEV *)jd->device;
|
||||
uint8_t *src, *dst;
|
||||
uint16_t y, bws, bwd;
|
||||
|
||||
if (rect->left == 0)
|
||||
{
|
||||
printf("\r%lu%%", (rect->top << jd->scale) * 100UL / jd->height);
|
||||
}
|
||||
|
||||
src = (uint8_t *)bitmap;
|
||||
dst = dev->fbuf + 3 * (rect->top * dev->wfbuf + rect->left);
|
||||
bws = 3 * (rect->right - rect->left + 1);
|
||||
bwd = 3 * dev->wfbuf;
|
||||
for (y = rect->top; y <= rect->bottom; y++)
|
||||
{
|
||||
memcpy(dst, src, bws);
|
||||
src += bws;
|
||||
dst += bwd;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int cmsisnn_demo(int argc, char *argv[])
|
||||
{
|
||||
void *work;
|
||||
JDEC jdec;
|
||||
JRESULT res;
|
||||
IODEV devid;
|
||||
|
||||
if (argc < 2)
|
||||
{
|
||||
printf("Jpeg_Dec illegal arguments ...\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
devid.fp = fopen(argv[1], "r+");
|
||||
if (!devid.fp)
|
||||
{
|
||||
printf("Jpeg_Dec open the file failed...\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
work = malloc(WORK_POOL_SIZE);
|
||||
if (work == NULL)
|
||||
{
|
||||
printf("Jpeg_Dec work malloc failed...\n");
|
||||
res = -1;
|
||||
goto __exit;
|
||||
}
|
||||
|
||||
res = jd_prepare(&jdec, in_func_cmsisnn, work, WORK_POOL_SIZE, &devid);
|
||||
if (res == JDR_OK)
|
||||
{
|
||||
printf("Image dimensions: %u by %u. %u bytes used.\n", jdec.width, jdec.height, 3100 - jdec.sz_pool);
|
||||
|
||||
devid.fbuf = malloc(3 * jdec.width * jdec.height);
|
||||
if (devid.fbuf == NULL)
|
||||
{
|
||||
printf("Jpeg_Dec devid.fbuf malloc failed, need to use %d Bytes ...\n", 3 * jdec.width * jdec.height);
|
||||
res = -1;
|
||||
goto __exit;
|
||||
}
|
||||
devid.wfbuf = jdec.width;
|
||||
|
||||
res = jd_decomp(&jdec, out_func_cmsisnn, 0);
|
||||
if (res == JDR_OK)
|
||||
{
|
||||
printf("\rDecompress success \n");
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("Failed to decompress: rc=%d\n", res);
|
||||
}
|
||||
|
||||
cmsisnn_inference(devid.fbuf);
|
||||
|
||||
if (devid.fbuf != NULL)
|
||||
{
|
||||
free(devid.fbuf);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("Failed to prepare: rc=%d\n", res);
|
||||
}
|
||||
|
||||
__exit:
|
||||
if (work != NULL)
|
||||
{
|
||||
free(work);
|
||||
}
|
||||
|
||||
fclose(devid.fp);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#ifdef __RT_THREAD_H__
|
||||
MSH_CMD_EXPORT(cmsisnn_demo, cifar10 demo and filename should be followed);
|
||||
#endif
|
|
@ -0,0 +1,46 @@
|
|||
#include "nn.h"
|
||||
|
||||
// Timer t;
|
||||
|
||||
static uint8_t mean[DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM] = MEAN_DATA;
|
||||
|
||||
static q7_t conv1_wt[CONV1_IN_CH*CONV1_KER_DIM*CONV1_KER_DIM*CONV1_OUT_CH] = CONV1_WT;
|
||||
static q7_t conv1_bias[CONV1_OUT_CH] = CONV1_BIAS;
|
||||
|
||||
static q7_t conv2_wt[CONV2_IN_CH*CONV2_KER_DIM*CONV2_KER_DIM*CONV2_OUT_CH] = CONV2_WT;
|
||||
static q7_t conv2_bias[CONV2_OUT_CH] = CONV2_BIAS;
|
||||
|
||||
static q7_t conv3_wt[CONV3_IN_CH*CONV3_KER_DIM*CONV3_KER_DIM*CONV3_OUT_CH] = CONV3_WT;
|
||||
static q7_t conv3_bias[CONV3_OUT_CH] = CONV3_BIAS;
|
||||
|
||||
static q7_t ip1_wt[IP1_IN_DIM*IP1_OUT_DIM] = IP1_WT;
|
||||
static q7_t ip1_bias[IP1_OUT_DIM] = IP1_BIAS;
|
||||
|
||||
//Add input_data and output_data in top main.cpp file
|
||||
//uint8_t input_data[DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM];
|
||||
//q7_t output_data[IP1_OUT_DIM];
|
||||
|
||||
q7_t col_buffer[3200];
|
||||
q7_t scratch_buffer[40960];
|
||||
|
||||
void mean_subtract(q7_t* image_data) {
|
||||
for(int i=0; i<DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM; i++) {
|
||||
image_data[i] = (q7_t)__SSAT( ((int)(image_data[i] - mean[i]) >> DATA_RSHIFT), 8);
|
||||
}
|
||||
}
|
||||
|
||||
void run_nn(q7_t* input_data, q7_t* output_data) {
|
||||
q7_t* buffer1 = scratch_buffer;
|
||||
q7_t* buffer2 = buffer1 + 32768;
|
||||
mean_subtract(input_data);
|
||||
arm_convolve_HWC_q7_RGB(input_data, CONV1_IN_DIM, CONV1_IN_CH, conv1_wt, CONV1_OUT_CH, CONV1_KER_DIM, CONV1_PAD, CONV1_STRIDE, conv1_bias, CONV1_BIAS_LSHIFT, CONV1_OUT_RSHIFT, buffer1, CONV1_OUT_DIM, (q15_t*)col_buffer, NULL);
|
||||
arm_maxpool_q7_HWC(buffer1, POOL1_IN_DIM, POOL1_IN_CH, POOL1_KER_DIM, POOL1_PAD, POOL1_STRIDE, POOL1_OUT_DIM, col_buffer, buffer2);
|
||||
arm_relu_q7(buffer2, RELU1_OUT_DIM*RELU1_OUT_DIM*RELU1_OUT_CH);
|
||||
arm_convolve_HWC_q7_fast(buffer2, CONV2_IN_DIM, CONV2_IN_CH, conv2_wt, CONV2_OUT_CH, CONV2_KER_DIM, CONV2_PAD, CONV2_STRIDE, conv2_bias, CONV2_BIAS_LSHIFT, CONV2_OUT_RSHIFT, buffer1, CONV2_OUT_DIM, (q15_t*)col_buffer, NULL);
|
||||
arm_relu_q7(buffer1, RELU2_OUT_DIM*RELU2_OUT_DIM*RELU2_OUT_CH);
|
||||
arm_avepool_q7_HWC(buffer1, POOL2_IN_DIM, POOL2_IN_CH, POOL2_KER_DIM, POOL2_PAD, POOL2_STRIDE, POOL2_OUT_DIM, col_buffer, buffer2);
|
||||
arm_convolve_HWC_q7_fast(buffer2, CONV3_IN_DIM, CONV3_IN_CH, conv3_wt, CONV3_OUT_CH, CONV3_KER_DIM, CONV3_PAD, CONV3_STRIDE, conv3_bias, CONV3_BIAS_LSHIFT, CONV3_OUT_RSHIFT, buffer1, CONV3_OUT_DIM, (q15_t*)col_buffer, NULL);
|
||||
arm_relu_q7(buffer1, RELU3_OUT_DIM*RELU3_OUT_DIM*RELU3_OUT_CH);
|
||||
arm_avepool_q7_HWC(buffer1, POOL3_IN_DIM, POOL3_IN_CH, POOL3_KER_DIM, POOL3_PAD, POOL3_STRIDE, POOL3_OUT_DIM, col_buffer, buffer2);
|
||||
arm_fully_connected_q7_opt(buffer2, ip1_wt, IP1_IN_DIM, IP1_OUT_DIM, IP1_BIAS_LSHIFT, IP1_OUT_RSHIFT, ip1_bias, output_data, (q15_t*)col_buffer);
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
#ifndef __NN_H__
|
||||
#define __NN_H__
|
||||
|
||||
// #include "mbed.h"
|
||||
#include "arm_math.h"
|
||||
#include "parameter.h"
|
||||
#include "weights.h"
|
||||
#include "arm_nnfunctions.h"
|
||||
|
||||
void run_nn(q7_t* input_data, q7_t* output_data);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,69 @@
|
|||
#define DATA_OUT_CH 3
|
||||
#define DATA_OUT_DIM 32
|
||||
|
||||
#define CONV1_IN_DIM 32
|
||||
#define CONV1_IN_CH 3
|
||||
#define CONV1_KER_DIM 5
|
||||
#define CONV1_PAD 2
|
||||
#define CONV1_STRIDE 1
|
||||
#define CONV1_OUT_CH 32
|
||||
#define CONV1_OUT_DIM 32
|
||||
|
||||
#define POOL1_IN_DIM 32
|
||||
#define POOL1_IN_CH 32
|
||||
#define POOL1_KER_DIM 3
|
||||
#define POOL1_STRIDE 2
|
||||
#define POOL1_PAD 0
|
||||
#define POOL1_OUT_DIM 16
|
||||
|
||||
#define RELU1_OUT_CH 32
|
||||
#define RELU1_OUT_DIM 16
|
||||
|
||||
#define CONV2_IN_DIM 16
|
||||
#define CONV2_IN_CH 32
|
||||
#define CONV2_KER_DIM 5
|
||||
#define CONV2_PAD 2
|
||||
#define CONV2_STRIDE 1
|
||||
#define CONV2_OUT_CH 16
|
||||
#define CONV2_OUT_DIM 16
|
||||
|
||||
#define RELU2_OUT_CH 16
|
||||
#define RELU2_OUT_DIM 16
|
||||
|
||||
#define POOL2_IN_DIM 16
|
||||
#define POOL2_IN_CH 16
|
||||
#define POOL2_KER_DIM 3
|
||||
#define POOL2_STRIDE 2
|
||||
#define POOL2_PAD 0
|
||||
#define POOL2_OUT_DIM 8
|
||||
|
||||
#define CONV3_IN_DIM 8
|
||||
#define CONV3_IN_CH 16
|
||||
#define CONV3_KER_DIM 5
|
||||
#define CONV3_PAD 2
|
||||
#define CONV3_STRIDE 1
|
||||
#define CONV3_OUT_CH 32
|
||||
#define CONV3_OUT_DIM 8
|
||||
|
||||
#define RELU3_OUT_CH 32
|
||||
#define RELU3_OUT_DIM 8
|
||||
|
||||
#define POOL3_IN_DIM 8
|
||||
#define POOL3_IN_CH 32
|
||||
#define POOL3_KER_DIM 3
|
||||
#define POOL3_STRIDE 2
|
||||
#define POOL3_PAD 0
|
||||
#define POOL3_OUT_DIM 4
|
||||
|
||||
#define IP1_IN_DIM 512
|
||||
#define IP1_OUT_DIM 10
|
||||
|
||||
#define DATA_RSHIFT 0
|
||||
#define CONV1_BIAS_LSHIFT 0
|
||||
#define CONV1_OUT_RSHIFT 11
|
||||
#define CONV2_BIAS_LSHIFT 0
|
||||
#define CONV2_OUT_RSHIFT 8
|
||||
#define CONV3_BIAS_LSHIFT 0
|
||||
#define CONV3_OUT_RSHIFT 8
|
||||
#define IP1_BIAS_LSHIFT 5
|
||||
#define IP1_OUT_RSHIFT 7
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,46 @@
|
|||
#include "nn.h"
|
||||
|
||||
// Timer t;
|
||||
|
||||
static uint8_t mean[DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM] = MEAN_DATA;
|
||||
|
||||
static q7_t conv1_wt[CONV1_IN_CH*CONV1_KER_DIM*CONV1_KER_DIM*CONV1_OUT_CH] = CONV1_WT;
|
||||
static q7_t conv1_bias[CONV1_OUT_CH] = CONV1_BIAS;
|
||||
|
||||
static q7_t conv2_wt[CONV2_IN_CH*CONV2_KER_DIM*CONV2_KER_DIM*CONV2_OUT_CH] = CONV2_WT;
|
||||
static q7_t conv2_bias[CONV2_OUT_CH] = CONV2_BIAS;
|
||||
|
||||
static q7_t conv3_wt[CONV3_IN_CH*CONV3_KER_DIM*CONV3_KER_DIM*CONV3_OUT_CH] = CONV3_WT;
|
||||
static q7_t conv3_bias[CONV3_OUT_CH] = CONV3_BIAS;
|
||||
|
||||
static q7_t ip1_wt[IP1_IN_DIM*IP1_OUT_DIM] = IP1_WT;
|
||||
static q7_t ip1_bias[IP1_OUT_DIM] = IP1_BIAS;
|
||||
|
||||
//Add input_data and output_data in top main.cpp file
|
||||
//uint8_t input_data[DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM];
|
||||
//q7_t output_data[IP1_OUT_DIM];
|
||||
|
||||
q7_t col_buffer[6400];
|
||||
q7_t scratch_buffer[40960];
|
||||
|
||||
void mean_subtract(q7_t* image_data) {
|
||||
for(int i=0; i<DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM; i++) {
|
||||
image_data[i] = (q7_t)__SSAT( ((int)(image_data[i] - mean[i]) >> DATA_RSHIFT), 8);
|
||||
}
|
||||
}
|
||||
|
||||
void run_nn(q7_t* input_data, q7_t* output_data) {
|
||||
q7_t* buffer1 = scratch_buffer;
|
||||
q7_t* buffer2 = buffer1 + 32768;
|
||||
mean_subtract(input_data);
|
||||
arm_convolve_HWC_q7_RGB(input_data, CONV1_IN_DIM, CONV1_IN_CH, conv1_wt, CONV1_OUT_CH, CONV1_KER_DIM, CONV1_PAD, CONV1_STRIDE, conv1_bias, CONV1_BIAS_LSHIFT, CONV1_OUT_RSHIFT, buffer1, CONV1_OUT_DIM, (q15_t*)col_buffer, NULL);
|
||||
arm_maxpool_q7_HWC(buffer1, POOL1_IN_DIM, POOL1_IN_CH, POOL1_KER_DIM, POOL1_PAD, POOL1_STRIDE, POOL1_OUT_DIM, col_buffer, buffer2);
|
||||
arm_relu_q7(buffer2, RELU1_OUT_DIM*RELU1_OUT_DIM*RELU1_OUT_CH);
|
||||
arm_convolve_HWC_q7_fast(buffer2, CONV2_IN_DIM, CONV2_IN_CH, conv2_wt, CONV2_OUT_CH, CONV2_KER_DIM, CONV2_PAD, CONV2_STRIDE, conv2_bias, CONV2_BIAS_LSHIFT, CONV2_OUT_RSHIFT, buffer1, CONV2_OUT_DIM, (q15_t*)col_buffer, NULL);
|
||||
arm_relu_q7(buffer1, RELU2_OUT_DIM*RELU2_OUT_DIM*RELU2_OUT_CH);
|
||||
arm_avepool_q7_HWC(buffer1, POOL2_IN_DIM, POOL2_IN_CH, POOL2_KER_DIM, POOL2_PAD, POOL2_STRIDE, POOL2_OUT_DIM, col_buffer, buffer2);
|
||||
arm_convolve_HWC_q7_fast(buffer2, CONV3_IN_DIM, CONV3_IN_CH, conv3_wt, CONV3_OUT_CH, CONV3_KER_DIM, CONV3_PAD, CONV3_STRIDE, conv3_bias, CONV3_BIAS_LSHIFT, CONV3_OUT_RSHIFT, buffer1, CONV3_OUT_DIM, (q15_t*)col_buffer, NULL);
|
||||
arm_relu_q7(buffer1, RELU3_OUT_DIM*RELU3_OUT_DIM*RELU3_OUT_CH);
|
||||
arm_avepool_q7_HWC(buffer1, POOL3_IN_DIM, POOL3_IN_CH, POOL3_KER_DIM, POOL3_PAD, POOL3_STRIDE, POOL3_OUT_DIM, col_buffer, buffer2);
|
||||
arm_fully_connected_q7_opt(buffer2, ip1_wt, IP1_IN_DIM, IP1_OUT_DIM, IP1_BIAS_LSHIFT, IP1_OUT_RSHIFT, ip1_bias, output_data, (q15_t*)col_buffer);
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
#ifndef __NN_H__
|
||||
#define __NN_H__
|
||||
|
||||
// #include "mbed.h"
|
||||
#include "arm_math.h"
|
||||
#include "parameter.h"
|
||||
#include "weights.h"
|
||||
#include "arm_nnfunctions.h"
|
||||
|
||||
void run_nn(q7_t* input_data, q7_t* output_data);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,69 @@
|
|||
#define DATA_OUT_CH 3
|
||||
#define DATA_OUT_DIM 32
|
||||
|
||||
#define CONV1_IN_DIM 32
|
||||
#define CONV1_IN_CH 3
|
||||
#define CONV1_KER_DIM 5
|
||||
#define CONV1_PAD 2
|
||||
#define CONV1_STRIDE 1
|
||||
#define CONV1_OUT_CH 32
|
||||
#define CONV1_OUT_DIM 32
|
||||
|
||||
#define POOL1_IN_DIM 32
|
||||
#define POOL1_IN_CH 32
|
||||
#define POOL1_KER_DIM 3
|
||||
#define POOL1_STRIDE 2
|
||||
#define POOL1_PAD 0
|
||||
#define POOL1_OUT_DIM 16
|
||||
|
||||
#define RELU1_OUT_CH 32
|
||||
#define RELU1_OUT_DIM 16
|
||||
|
||||
#define CONV2_IN_DIM 16
|
||||
#define CONV2_IN_CH 32
|
||||
#define CONV2_KER_DIM 5
|
||||
#define CONV2_PAD 2
|
||||
#define CONV2_STRIDE 1
|
||||
#define CONV2_OUT_CH 32
|
||||
#define CONV2_OUT_DIM 16
|
||||
|
||||
#define RELU2_OUT_CH 32
|
||||
#define RELU2_OUT_DIM 16
|
||||
|
||||
#define POOL2_IN_DIM 16
|
||||
#define POOL2_IN_CH 32
|
||||
#define POOL2_KER_DIM 3
|
||||
#define POOL2_STRIDE 2
|
||||
#define POOL2_PAD 0
|
||||
#define POOL2_OUT_DIM 8
|
||||
|
||||
#define CONV3_IN_DIM 8
|
||||
#define CONV3_IN_CH 32
|
||||
#define CONV3_KER_DIM 5
|
||||
#define CONV3_PAD 2
|
||||
#define CONV3_STRIDE 1
|
||||
#define CONV3_OUT_CH 64
|
||||
#define CONV3_OUT_DIM 8
|
||||
|
||||
#define RELU3_OUT_CH 64
|
||||
#define RELU3_OUT_DIM 8
|
||||
|
||||
#define POOL3_IN_DIM 8
|
||||
#define POOL3_IN_CH 64
|
||||
#define POOL3_KER_DIM 3
|
||||
#define POOL3_STRIDE 2
|
||||
#define POOL3_PAD 0
|
||||
#define POOL3_OUT_DIM 4
|
||||
|
||||
#define IP1_IN_DIM 1024
|
||||
#define IP1_OUT_DIM 10
|
||||
|
||||
#define DATA_RSHIFT 0
|
||||
#define CONV1_BIAS_LSHIFT 0
|
||||
#define CONV1_OUT_RSHIFT 9
|
||||
#define CONV2_BIAS_LSHIFT 0
|
||||
#define CONV2_OUT_RSHIFT 9
|
||||
#define CONV3_BIAS_LSHIFT 0
|
||||
#define CONV3_OUT_RSHIFT 9
|
||||
#define IP1_BIAS_LSHIFT 3
|
||||
#define IP1_OUT_RSHIFT 5
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue