APP_Framework/Applications/:add CMSIS-NN (version 5) cifar10 example

This commit is contained in:
WentaoWong 2021-11-29 18:51:31 +08:00
parent 96ca3b18f1
commit 49caa6e533
11 changed files with 461 additions and 0 deletions

View File

@ -0,0 +1,14 @@
# CMSIS-NN cifar10 example
The model of this example is from [[ARM-software](https://github.com/ARM-software)/**[ML-examples](https://github.com/ARM-software/ML-examples)**] and can be deployed on Arm Cortex-M CPUs using [CMSIS-NN](https://github.com/ARM-software/CMSIS_5).
## Requirements:
- CMSIS-NN in Framework/knowing/cmsis_5
- TJpgDec in Framework/knowing/image_processing
- Enough stack size (recommend 10240) for finsh thread which can be changed in "RT-Thread Components->Command shell->finsh shell" by menuconfig.
## To run this demo:
- Place the photo where you want
- Run demo by type the command
```
cmsisnn_demo /path/to/photo

Binary file not shown.

After

Width:  |  Height:  |  Size: 1000 B

View File

@ -0,0 +1,157 @@
#include <transform.h>
#include <tjpgd.h>
#include "../model/m4/nn.h"
#define WORK_POOL_SIZE (4 * 1024 + 32)
const char *cifar10_label[] = {"Plane", "Car", "Bird", "Cat", "Deer", "Dog", "Frog", "Horse", "Ship", "Truck"};
int get_top_prediction(q7_t *predictions)
{
int max_ind = 0;
int max_val = -128;
for (int i = 0; i < 10; i++)
{
if (max_val < predictions[i])
{
max_val = predictions[i];
max_ind = i;
}
}
return max_ind;
}
int cmsisnn_inference(uint8_t *input_data)
{
q7_t output_data[10];
run_nn((q7_t *)input_data, output_data);
arm_softmax_q7(output_data, IP1_OUT_DIM, output_data);
int top_ind = get_top_prediction(output_data);
printf("\rPrediction: %s \r\n", cifar10_label[top_ind]);
return top_ind;
}
typedef struct
{
FILE *fp;
uint8_t *fbuf;
uint16_t wfbuf;
} IODEV;
unsigned int in_func_cmsisnn(JDEC *jd, uint8_t *buff, unsigned int nbyte)
{
IODEV *dev = (IODEV *)jd->device;
if (buff)
{
return (uint16_t)fread(buff, 1, nbyte, dev->fp);
}
else
{
return fseek(dev->fp, nbyte, SEEK_CUR) ? 0 : nbyte;
}
}
int out_func_cmsisnn(JDEC *jd, void *bitmap, JRECT *rect)
{
IODEV *dev = (IODEV *)jd->device;
uint8_t *src, *dst;
uint16_t y, bws, bwd;
if (rect->left == 0)
{
printf("\r%lu%%", (rect->top << jd->scale) * 100UL / jd->height);
}
src = (uint8_t *)bitmap;
dst = dev->fbuf + 3 * (rect->top * dev->wfbuf + rect->left);
bws = 3 * (rect->right - rect->left + 1);
bwd = 3 * dev->wfbuf;
for (y = rect->top; y <= rect->bottom; y++)
{
memcpy(dst, src, bws);
src += bws;
dst += bwd;
}
return 1;
}
int cmsisnn_demo(int argc, char *argv[])
{
void *work;
JDEC jdec;
JRESULT res;
IODEV devid;
if (argc < 2)
{
printf("Jpeg_Dec illegal arguments ...\n");
return -1;
}
devid.fp = fopen(argv[1], "r+");
if (!devid.fp)
{
printf("Jpeg_Dec open the file failed...\n");
return -1;
}
work = malloc(WORK_POOL_SIZE);
if (work == NULL)
{
printf("Jpeg_Dec work malloc failed...\n");
res = -1;
goto __exit;
}
res = jd_prepare(&jdec, in_func_cmsisnn, work, WORK_POOL_SIZE, &devid);
if (res == JDR_OK)
{
printf("Image dimensions: %u by %u. %u bytes used.\n", jdec.width, jdec.height, 3100 - jdec.sz_pool);
devid.fbuf = malloc(3 * jdec.width * jdec.height);
if (devid.fbuf == NULL)
{
printf("Jpeg_Dec devid.fbuf malloc failed, need to use %d Bytes ...\n", 3 * jdec.width * jdec.height);
res = -1;
goto __exit;
}
devid.wfbuf = jdec.width;
res = jd_decomp(&jdec, out_func_cmsisnn, 0);
if (res == JDR_OK)
{
printf("\rDecompress success \n");
}
else
{
printf("Failed to decompress: rc=%d\n", res);
}
cmsisnn_inference(devid.fbuf);
if (devid.fbuf != NULL)
{
free(devid.fbuf);
}
}
else
{
printf("Failed to prepare: rc=%d\n", res);
}
__exit:
if (work != NULL)
{
free(work);
}
fclose(devid.fp);
return res;
}
#ifdef __RT_THREAD_H__
MSH_CMD_EXPORT(cmsisnn_demo, cifar10 demo and filename should be followed);
#endif

View File

@ -0,0 +1,46 @@
#include "nn.h"
// Timer t;
static uint8_t mean[DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM] = MEAN_DATA;
static q7_t conv1_wt[CONV1_IN_CH*CONV1_KER_DIM*CONV1_KER_DIM*CONV1_OUT_CH] = CONV1_WT;
static q7_t conv1_bias[CONV1_OUT_CH] = CONV1_BIAS;
static q7_t conv2_wt[CONV2_IN_CH*CONV2_KER_DIM*CONV2_KER_DIM*CONV2_OUT_CH] = CONV2_WT;
static q7_t conv2_bias[CONV2_OUT_CH] = CONV2_BIAS;
static q7_t conv3_wt[CONV3_IN_CH*CONV3_KER_DIM*CONV3_KER_DIM*CONV3_OUT_CH] = CONV3_WT;
static q7_t conv3_bias[CONV3_OUT_CH] = CONV3_BIAS;
static q7_t ip1_wt[IP1_IN_DIM*IP1_OUT_DIM] = IP1_WT;
static q7_t ip1_bias[IP1_OUT_DIM] = IP1_BIAS;
//Add input_data and output_data in top main.cpp file
//uint8_t input_data[DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM];
//q7_t output_data[IP1_OUT_DIM];
q7_t col_buffer[3200];
q7_t scratch_buffer[40960];
void mean_subtract(q7_t* image_data) {
for(int i=0; i<DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM; i++) {
image_data[i] = (q7_t)__SSAT( ((int)(image_data[i] - mean[i]) >> DATA_RSHIFT), 8);
}
}
void run_nn(q7_t* input_data, q7_t* output_data) {
q7_t* buffer1 = scratch_buffer;
q7_t* buffer2 = buffer1 + 32768;
mean_subtract(input_data);
arm_convolve_HWC_q7_RGB(input_data, CONV1_IN_DIM, CONV1_IN_CH, conv1_wt, CONV1_OUT_CH, CONV1_KER_DIM, CONV1_PAD, CONV1_STRIDE, conv1_bias, CONV1_BIAS_LSHIFT, CONV1_OUT_RSHIFT, buffer1, CONV1_OUT_DIM, (q15_t*)col_buffer, NULL);
arm_maxpool_q7_HWC(buffer1, POOL1_IN_DIM, POOL1_IN_CH, POOL1_KER_DIM, POOL1_PAD, POOL1_STRIDE, POOL1_OUT_DIM, col_buffer, buffer2);
arm_relu_q7(buffer2, RELU1_OUT_DIM*RELU1_OUT_DIM*RELU1_OUT_CH);
arm_convolve_HWC_q7_fast(buffer2, CONV2_IN_DIM, CONV2_IN_CH, conv2_wt, CONV2_OUT_CH, CONV2_KER_DIM, CONV2_PAD, CONV2_STRIDE, conv2_bias, CONV2_BIAS_LSHIFT, CONV2_OUT_RSHIFT, buffer1, CONV2_OUT_DIM, (q15_t*)col_buffer, NULL);
arm_relu_q7(buffer1, RELU2_OUT_DIM*RELU2_OUT_DIM*RELU2_OUT_CH);
arm_avepool_q7_HWC(buffer1, POOL2_IN_DIM, POOL2_IN_CH, POOL2_KER_DIM, POOL2_PAD, POOL2_STRIDE, POOL2_OUT_DIM, col_buffer, buffer2);
arm_convolve_HWC_q7_fast(buffer2, CONV3_IN_DIM, CONV3_IN_CH, conv3_wt, CONV3_OUT_CH, CONV3_KER_DIM, CONV3_PAD, CONV3_STRIDE, conv3_bias, CONV3_BIAS_LSHIFT, CONV3_OUT_RSHIFT, buffer1, CONV3_OUT_DIM, (q15_t*)col_buffer, NULL);
arm_relu_q7(buffer1, RELU3_OUT_DIM*RELU3_OUT_DIM*RELU3_OUT_CH);
arm_avepool_q7_HWC(buffer1, POOL3_IN_DIM, POOL3_IN_CH, POOL3_KER_DIM, POOL3_PAD, POOL3_STRIDE, POOL3_OUT_DIM, col_buffer, buffer2);
arm_fully_connected_q7_opt(buffer2, ip1_wt, IP1_IN_DIM, IP1_OUT_DIM, IP1_BIAS_LSHIFT, IP1_OUT_RSHIFT, ip1_bias, output_data, (q15_t*)col_buffer);
}

View File

@ -0,0 +1,12 @@
#ifndef __NN_H__
#define __NN_H__
// #include "mbed.h"
#include "arm_math.h"
#include "parameter.h"
#include "weights.h"
#include "arm_nnfunctions.h"
void run_nn(q7_t* input_data, q7_t* output_data);
#endif

View File

@ -0,0 +1,69 @@
#define DATA_OUT_CH 3
#define DATA_OUT_DIM 32
#define CONV1_IN_DIM 32
#define CONV1_IN_CH 3
#define CONV1_KER_DIM 5
#define CONV1_PAD 2
#define CONV1_STRIDE 1
#define CONV1_OUT_CH 32
#define CONV1_OUT_DIM 32
#define POOL1_IN_DIM 32
#define POOL1_IN_CH 32
#define POOL1_KER_DIM 3
#define POOL1_STRIDE 2
#define POOL1_PAD 0
#define POOL1_OUT_DIM 16
#define RELU1_OUT_CH 32
#define RELU1_OUT_DIM 16
#define CONV2_IN_DIM 16
#define CONV2_IN_CH 32
#define CONV2_KER_DIM 5
#define CONV2_PAD 2
#define CONV2_STRIDE 1
#define CONV2_OUT_CH 16
#define CONV2_OUT_DIM 16
#define RELU2_OUT_CH 16
#define RELU2_OUT_DIM 16
#define POOL2_IN_DIM 16
#define POOL2_IN_CH 16
#define POOL2_KER_DIM 3
#define POOL2_STRIDE 2
#define POOL2_PAD 0
#define POOL2_OUT_DIM 8
#define CONV3_IN_DIM 8
#define CONV3_IN_CH 16
#define CONV3_KER_DIM 5
#define CONV3_PAD 2
#define CONV3_STRIDE 1
#define CONV3_OUT_CH 32
#define CONV3_OUT_DIM 8
#define RELU3_OUT_CH 32
#define RELU3_OUT_DIM 8
#define POOL3_IN_DIM 8
#define POOL3_IN_CH 32
#define POOL3_KER_DIM 3
#define POOL3_STRIDE 2
#define POOL3_PAD 0
#define POOL3_OUT_DIM 4
#define IP1_IN_DIM 512
#define IP1_OUT_DIM 10
#define DATA_RSHIFT 0
#define CONV1_BIAS_LSHIFT 0
#define CONV1_OUT_RSHIFT 11
#define CONV2_BIAS_LSHIFT 0
#define CONV2_OUT_RSHIFT 8
#define CONV3_BIAS_LSHIFT 0
#define CONV3_OUT_RSHIFT 8
#define IP1_BIAS_LSHIFT 5
#define IP1_OUT_RSHIFT 7

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,46 @@
#include "nn.h"
// Timer t;
static uint8_t mean[DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM] = MEAN_DATA;
static q7_t conv1_wt[CONV1_IN_CH*CONV1_KER_DIM*CONV1_KER_DIM*CONV1_OUT_CH] = CONV1_WT;
static q7_t conv1_bias[CONV1_OUT_CH] = CONV1_BIAS;
static q7_t conv2_wt[CONV2_IN_CH*CONV2_KER_DIM*CONV2_KER_DIM*CONV2_OUT_CH] = CONV2_WT;
static q7_t conv2_bias[CONV2_OUT_CH] = CONV2_BIAS;
static q7_t conv3_wt[CONV3_IN_CH*CONV3_KER_DIM*CONV3_KER_DIM*CONV3_OUT_CH] = CONV3_WT;
static q7_t conv3_bias[CONV3_OUT_CH] = CONV3_BIAS;
static q7_t ip1_wt[IP1_IN_DIM*IP1_OUT_DIM] = IP1_WT;
static q7_t ip1_bias[IP1_OUT_DIM] = IP1_BIAS;
//Add input_data and output_data in top main.cpp file
//uint8_t input_data[DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM];
//q7_t output_data[IP1_OUT_DIM];
q7_t col_buffer[6400];
q7_t scratch_buffer[40960];
void mean_subtract(q7_t* image_data) {
for(int i=0; i<DATA_OUT_CH*DATA_OUT_DIM*DATA_OUT_DIM; i++) {
image_data[i] = (q7_t)__SSAT( ((int)(image_data[i] - mean[i]) >> DATA_RSHIFT), 8);
}
}
void run_nn(q7_t* input_data, q7_t* output_data) {
q7_t* buffer1 = scratch_buffer;
q7_t* buffer2 = buffer1 + 32768;
mean_subtract(input_data);
arm_convolve_HWC_q7_RGB(input_data, CONV1_IN_DIM, CONV1_IN_CH, conv1_wt, CONV1_OUT_CH, CONV1_KER_DIM, CONV1_PAD, CONV1_STRIDE, conv1_bias, CONV1_BIAS_LSHIFT, CONV1_OUT_RSHIFT, buffer1, CONV1_OUT_DIM, (q15_t*)col_buffer, NULL);
arm_maxpool_q7_HWC(buffer1, POOL1_IN_DIM, POOL1_IN_CH, POOL1_KER_DIM, POOL1_PAD, POOL1_STRIDE, POOL1_OUT_DIM, col_buffer, buffer2);
arm_relu_q7(buffer2, RELU1_OUT_DIM*RELU1_OUT_DIM*RELU1_OUT_CH);
arm_convolve_HWC_q7_fast(buffer2, CONV2_IN_DIM, CONV2_IN_CH, conv2_wt, CONV2_OUT_CH, CONV2_KER_DIM, CONV2_PAD, CONV2_STRIDE, conv2_bias, CONV2_BIAS_LSHIFT, CONV2_OUT_RSHIFT, buffer1, CONV2_OUT_DIM, (q15_t*)col_buffer, NULL);
arm_relu_q7(buffer1, RELU2_OUT_DIM*RELU2_OUT_DIM*RELU2_OUT_CH);
arm_avepool_q7_HWC(buffer1, POOL2_IN_DIM, POOL2_IN_CH, POOL2_KER_DIM, POOL2_PAD, POOL2_STRIDE, POOL2_OUT_DIM, col_buffer, buffer2);
arm_convolve_HWC_q7_fast(buffer2, CONV3_IN_DIM, CONV3_IN_CH, conv3_wt, CONV3_OUT_CH, CONV3_KER_DIM, CONV3_PAD, CONV3_STRIDE, conv3_bias, CONV3_BIAS_LSHIFT, CONV3_OUT_RSHIFT, buffer1, CONV3_OUT_DIM, (q15_t*)col_buffer, NULL);
arm_relu_q7(buffer1, RELU3_OUT_DIM*RELU3_OUT_DIM*RELU3_OUT_CH);
arm_avepool_q7_HWC(buffer1, POOL3_IN_DIM, POOL3_IN_CH, POOL3_KER_DIM, POOL3_PAD, POOL3_STRIDE, POOL3_OUT_DIM, col_buffer, buffer2);
arm_fully_connected_q7_opt(buffer2, ip1_wt, IP1_IN_DIM, IP1_OUT_DIM, IP1_BIAS_LSHIFT, IP1_OUT_RSHIFT, ip1_bias, output_data, (q15_t*)col_buffer);
}

View File

@ -0,0 +1,12 @@
#ifndef __NN_H__
#define __NN_H__
// #include "mbed.h"
#include "arm_math.h"
#include "parameter.h"
#include "weights.h"
#include "arm_nnfunctions.h"
void run_nn(q7_t* input_data, q7_t* output_data);
#endif

View File

@ -0,0 +1,69 @@
#define DATA_OUT_CH 3
#define DATA_OUT_DIM 32
#define CONV1_IN_DIM 32
#define CONV1_IN_CH 3
#define CONV1_KER_DIM 5
#define CONV1_PAD 2
#define CONV1_STRIDE 1
#define CONV1_OUT_CH 32
#define CONV1_OUT_DIM 32
#define POOL1_IN_DIM 32
#define POOL1_IN_CH 32
#define POOL1_KER_DIM 3
#define POOL1_STRIDE 2
#define POOL1_PAD 0
#define POOL1_OUT_DIM 16
#define RELU1_OUT_CH 32
#define RELU1_OUT_DIM 16
#define CONV2_IN_DIM 16
#define CONV2_IN_CH 32
#define CONV2_KER_DIM 5
#define CONV2_PAD 2
#define CONV2_STRIDE 1
#define CONV2_OUT_CH 32
#define CONV2_OUT_DIM 16
#define RELU2_OUT_CH 32
#define RELU2_OUT_DIM 16
#define POOL2_IN_DIM 16
#define POOL2_IN_CH 32
#define POOL2_KER_DIM 3
#define POOL2_STRIDE 2
#define POOL2_PAD 0
#define POOL2_OUT_DIM 8
#define CONV3_IN_DIM 8
#define CONV3_IN_CH 32
#define CONV3_KER_DIM 5
#define CONV3_PAD 2
#define CONV3_STRIDE 1
#define CONV3_OUT_CH 64
#define CONV3_OUT_DIM 8
#define RELU3_OUT_CH 64
#define RELU3_OUT_DIM 8
#define POOL3_IN_DIM 8
#define POOL3_IN_CH 64
#define POOL3_KER_DIM 3
#define POOL3_STRIDE 2
#define POOL3_PAD 0
#define POOL3_OUT_DIM 4
#define IP1_IN_DIM 1024
#define IP1_OUT_DIM 10
#define DATA_RSHIFT 0
#define CONV1_BIAS_LSHIFT 0
#define CONV1_OUT_RSHIFT 9
#define CONV2_BIAS_LSHIFT 0
#define CONV2_OUT_RSHIFT 9
#define CONV3_BIAS_LSHIFT 0
#define CONV3_OUT_RSHIFT 9
#define IP1_BIAS_LSHIFT 3
#define IP1_OUT_RSHIFT 5

File diff suppressed because one or more lines are too long