diff --git a/APP_Framework/Framework/knowing/kpu-postprocessing/yolov2/region_layer.c b/APP_Framework/Framework/knowing/kpu-postprocessing/yolov2/region_layer.c index 6ca2182e8..dad312684 100644 --- a/APP_Framework/Framework/knowing/kpu-postprocessing/yolov2/region_layer.c +++ b/APP_Framework/Framework/knowing/kpu-postprocessing/yolov2/region_layer.c @@ -1,66 +1,63 @@ -#include -#include -#include #include "region_layer.h" -typedef struct -{ +#include +#include +#include + +typedef struct { float x; float y; float w; float h; } box_t; -typedef struct -{ +typedef struct { int index; int class; float **probs; } sortable_box_t; - int region_layer_init(region_layer_t *rl, int width, int height, int channels, int origin_width, int origin_height) { int flag = 0; rl->coords = 4; - rl->image_width = 320; - rl->image_height = 240; + /* As no more parameter adding to this function, + image width(height) is regarded as net input shape as well as image capture from sensor. + If net input did not match sensor input, `dvp_set_image_size` function can set sensor output shape. + */ + rl->image_width = origin_width; + rl->image_height = origin_height; rl->classes = channels / 5 - 5; rl->net_width = origin_width; rl->net_height = origin_height; rl->layer_width = width; rl->layer_height = height; - rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number); + rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number); rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1)); rl->output = malloc(rl->output_number * sizeof(float)); - if (rl->output == NULL) - { + if (rl->output == NULL) { flag = -1; goto malloc_error; } rl->boxes = malloc(rl->boxes_number * sizeof(box_t)); - if (rl->boxes == NULL) - { + if (rl->boxes == NULL) { flag = -2; goto malloc_error; } rl->probs_buf = malloc(rl->boxes_number * (rl->classes + 1) * sizeof(float)); - if (rl->probs_buf == NULL) - { + if (rl->probs_buf == NULL) { flag = -3; goto malloc_error; } rl->probs = malloc(rl->boxes_number * sizeof(float *)); - if (rl->probs == NULL) - { + if (rl->probs == NULL) { flag = -4; goto malloc_error; } - for (uint32_t i = 0; i < rl->boxes_number; i++) - rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]); + for (uint32_t i = 0; i < rl->boxes_number; i++) rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]); return 0; malloc_error: free(rl->output); @@ -78,24 +75,20 @@ void region_layer_deinit(region_layer_t *rl) free(rl->probs); } -static inline float sigmoid(float x) -{ - return 1.f / (1.f + expf(-x)); -} +static inline float sigmoid(float x) { return 1.f / (1.f + expf(-x)); } static void activate_array(region_layer_t *rl, int index, int n) { float *output = &rl->output[index]; float *input = &rl->input[index]; - for (int i = 0; i < n; ++i) - output[i] = sigmoid(input[i]); + for (int i = 0; i < n; ++i) output[i] = sigmoid(input[i]); } static int entry_index(region_layer_t *rl, int location, int entry) { int wh = rl->layer_width * rl->layer_height; - int n = location / wh; + int n = location / wh; int loc = location % wh; return n * wh * (rl->coords + rl->classes + 1) + entry * wh + loc; @@ -109,10 +102,8 @@ static void softmax(region_layer_t *rl, float *input, int n, int stride, float * float sum = 0; float largest_i = input[0]; - for (i = 0; i < n; ++i) - { - if (input[i * stride] > largest_i) - largest_i = input[i * stride]; + for (i = 0; i < n; ++i) { + if (input[i * stride] > largest_i) largest_i = input[i * stride]; } for (i = 0; i < n; ++i) { @@ -121,17 +112,16 @@ static void softmax(region_layer_t *rl, float *input, int n, int stride, float * sum += e; output[i * stride] = e; } - for (i = 0; i < n; ++i) - output[i * stride] /= sum; + for (i = 0; i < n; ++i) output[i * stride] /= sum; } -static void softmax_cpu(region_layer_t *rl, float *input, int n, int batch, int batch_offset, int groups, int stride, float *output) +static void softmax_cpu(region_layer_t *rl, float *input, int n, int batch, int batch_offset, int groups, int stride, + float *output) { int g, b; for (b = 0; b < batch; ++b) { - for (g = 0; g < groups; ++g) - softmax(rl, input + b * batch_offset + g, n, stride, output + b * batch_offset + g); + for (g = 0; g < groups; ++g) softmax(rl, input + b * batch_offset + g, n, stride, output + b * batch_offset + g); } } @@ -139,11 +129,9 @@ static void forward_region_layer(region_layer_t *rl) { int index; - for (index = 0; index < rl->output_number; index++) - rl->output[index] = rl->input[index]; + for (index = 0; index < rl->output_number; index++) rl->output[index] = rl->input[index]; - for (int n = 0; n < rl->anchor_number; ++n) - { + for (int n = 0; n < rl->anchor_number; ++n) { index = entry_index(rl, n * rl->layer_width * rl->layer_height, 0); activate_array(rl, index, 2 * rl->layer_width * rl->layer_height); index = entry_index(rl, n * rl->layer_width * rl->layer_height, 4); @@ -151,9 +139,8 @@ static void forward_region_layer(region_layer_t *rl) } index = entry_index(rl, 0, rl->coords + 1); - softmax_cpu(rl, rl->input + index, rl->classes, rl->anchor_number, - rl->output_number / rl->anchor_number, rl->layer_width * rl->layer_height, - rl->layer_width * rl->layer_height, rl->output + index); + softmax_cpu(rl, rl->input + index, rl->classes, rl->anchor_number, rl->output_number / rl->anchor_number, + rl->layer_width * rl->layer_height, rl->layer_width * rl->layer_height, rl->output + index); } static void correct_region_boxes(region_layer_t *rl, box_t *boxes) @@ -166,8 +153,7 @@ static void correct_region_boxes(region_layer_t *rl, box_t *boxes) int new_w = 0; int new_h = 0; - if (((float)net_width / image_width) < - ((float)net_height / image_height)) { + if (((float)net_width / image_width) < ((float)net_height / image_height)) { new_w = net_width; new_h = (image_height * net_width) / image_width; } else { @@ -177,10 +163,8 @@ static void correct_region_boxes(region_layer_t *rl, box_t *boxes) for (int i = 0; i < boxes_number; ++i) { box_t b = boxes[i]; - b.x = (b.x - (net_width - new_w) / 2. / net_width) / - ((float)new_w / net_width); - b.y = (b.y - (net_height - new_h) / 2. / net_height) / - ((float)new_h / net_height); + b.x = (b.x - (net_width - new_w) / 2. / net_width) / ((float)new_w / net_width); + b.y = (b.y - (net_height - new_h) / 2. / net_height) / ((float)new_h / net_height); b.w *= (float)net_width / new_w; b.h *= (float)net_height / new_h; boxes[i] = b; @@ -207,34 +191,29 @@ static void get_region_boxes(region_layer_t *rl, float *predictions, float **pro uint32_t coords = rl->coords; float threshold = rl->threshold; - for (int i = 0; i < layer_width * layer_height; ++i) - { + for (int i = 0; i < layer_width * layer_height; ++i) { int row = i / layer_width; int col = i % layer_width; - for (int n = 0; n < anchor_number; ++n) - { + for (int n = 0; n < anchor_number; ++n) { int index = n * layer_width * layer_height + i; - for (int j = 0; j < classes; ++j) - probs[index][j] = 0; + for (int j = 0; j < classes; ++j) probs[index][j] = 0; int obj_index = entry_index(rl, n * layer_width * layer_height + i, coords); int box_index = entry_index(rl, n * layer_width * layer_height + i, 0); - float scale = predictions[obj_index]; + float scale = predictions[obj_index]; - boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row, - layer_width, layer_height, layer_width * layer_height); + boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row, layer_width, layer_height, + layer_width * layer_height); float max = 0; - for (int j = 0; j < classes; ++j) - { + for (int j = 0; j < classes; ++j) { int class_index = entry_index(rl, n * layer_width * layer_height + i, coords + 1 + j); float prob = scale * predictions[class_index]; probs[index][j] = (prob > threshold) ? prob : 0; - if (prob > max) - max = prob; + if (prob > max) max = prob; } probs[index][classes] = max; } @@ -257,11 +236,11 @@ static int nms_comparator(void *pa, void *pb) static float overlap(float x1, float w1, float x2, float w2) { - float l1 = x1 - w1/2; - float l2 = x2 - w2/2; + float l1 = x1 - w1 / 2; + float l2 = x2 - w2 / 2; float left = l1 > l2 ? l1 : l2; - float r1 = x1 + w1/2; - float r2 = x2 + w2/2; + float r1 = x1 + w1 / 2; + float r2 = x2 + w2 / 2; float right = r1 < r2 ? r1 : r2; return right - left; @@ -272,8 +251,7 @@ static float box_intersection(box_t a, box_t b) float w = overlap(a.x, a.w, b.x, b.w); float h = overlap(a.y, a.h, b.y, b.h); - if (w < 0 || h < 0) - return 0; + if (w < 0 || h < 0) return 0; return w * h; } @@ -285,10 +263,7 @@ static float box_union(box_t a, box_t b) return u; } -static float box_iou(box_t a, box_t b) -{ - return box_intersection(a, b) / box_union(a, b); -} +static float box_iou(box_t a, box_t b) { return box_intersection(a, b) / box_union(a, b); } static void do_nms_sort(region_layer_t *rl, box_t *boxes, float **probs) { @@ -298,30 +273,23 @@ static void do_nms_sort(region_layer_t *rl, box_t *boxes, float **probs) int i, j, k; sortable_box_t s[boxes_number]; - for (i = 0; i < boxes_number; ++i) - { + for (i = 0; i < boxes_number; ++i) { s[i].index = i; s[i].class = 0; s[i].probs = probs; } - for (k = 0; k < classes; ++k) - { - for (i = 0; i < boxes_number; ++i) - s[i].class = k; + for (k = 0; k < classes; ++k) { + for (i = 0; i < boxes_number; ++i) s[i].class = k; qsort(s, boxes_number, sizeof(sortable_box_t), nms_comparator); - for (i = 0; i < boxes_number; ++i) - { - if (probs[s[i].index][k] == 0) - continue; + for (i = 0; i < boxes_number; ++i) { + if (probs[s[i].index][k] == 0) continue; box_t a = boxes[s[i].index]; - for (j = i + 1; j < boxes_number; ++j) - { + for (j = i + 1; j < boxes_number; ++j) { box_t b = boxes[s[j].index]; - if (box_iou(a, b) > nms_value) - probs[s[j].index][k] = 0; + if (box_iou(a, b) > nms_value) probs[s[j].index][k] = 0; } } } @@ -332,11 +300,9 @@ static int max_index(float *a, int n) int i, max_i = 0; float max = a[0]; - for (i = 1; i < n; ++i) - { - if (a[i] > max) - { - max = a[i]; + for (i = 1; i < n; ++i) { + if (a[i] > max) { + max = a[i]; max_i = i; } } @@ -351,14 +317,12 @@ static void region_layer_output(region_layer_t *rl, obj_info_t *obj_info) uint32_t boxes_number = rl->boxes_number; float threshold = rl->threshold; box_t *boxes = (box_t *)rl->boxes; - - for (int i = 0; i < rl->boxes_number; ++i) - { - int class = max_index(rl->probs[i], rl->classes); + + for (int i = 0; i < rl->boxes_number; ++i) { + int class = max_index(rl->probs[i], rl->classes); float prob = rl->probs[i][class]; - if (prob > threshold) - { + if (prob > threshold) { box_t *b = boxes + i; obj_info->obj[obj_number].x1 = b->x * image_width - (b->w * image_width / 2); obj_info->obj[obj_number].y1 = b->y * image_height - (b->h * image_height / 2); @@ -380,7 +344,8 @@ void region_layer_run(region_layer_t *rl, obj_info_t *obj_info) region_layer_output(rl, obj_info); } -void draw_edge(uint32_t *gram, obj_info_t *obj_info, uint32_t index, uint16_t color) +void draw_edge(uint32_t *gram, obj_info_t *obj_info, uint32_t index, uint16_t color, uint16_t image_width, + uint16_t image_height) { uint32_t data = ((uint32_t)color << 16) | (uint32_t)color; uint32_t *addr1, *addr2, *addr3, *addr4, x1, y1, x2, y2; @@ -390,48 +355,41 @@ void draw_edge(uint32_t *gram, obj_info_t *obj_info, uint32_t index, uint16_t co x2 = obj_info->obj[index].x2; y2 = obj_info->obj[index].y2; - if (x1 <= 0) - x1 = 1; - if (x2 >= 319) - x2 = 318; - if (y1 <= 0) - y1 = 1; - if (y2 >= 239) - y2 = 238; + if (x1 <= 0) x1 = 1; + if (x2 >= image_width - 1) x2 = image_width - 2; + if (y1 <= 0) y1 = 1; + if (y2 >= image_height - 1) y2 = image_height - 2; - addr1 = gram + (320 * y1 + x1) / 2; - addr2 = gram + (320 * y1 + x2 - 8) / 2; - addr3 = gram + (320 * (y2 - 1) + x1) / 2; - addr4 = gram + (320 * (y2 - 1) + x2 - 8) / 2; - for (uint32_t i = 0; i < 4; i++) - { + addr1 = gram + (image_width * y1 + x1) / 2; + addr2 = gram + (image_width * y1 + x2 - 8) / 2; + addr3 = gram + (image_width * (y2 - 1) + x1) / 2; + addr4 = gram + (image_width * (y2 - 1) + x2 - 8) / 2; + for (uint32_t i = 0; i < 4; i++) { *addr1 = data; - *(addr1 + 160) = data; + *(addr1 + image_width / 2) = data; *addr2 = data; - *(addr2 + 160) = data; + *(addr2 + image_width / 2) = data; *addr3 = data; - *(addr3 + 160) = data; + *(addr3 + image_width / 2) = data; *addr4 = data; - *(addr4 + 160) = data; + *(addr4 + image_width / 2) = data; addr1++; addr2++; addr3++; addr4++; } - addr1 = gram + (320 * y1 + x1) / 2; - addr2 = gram + (320 * y1 + x2 - 2) / 2; - addr3 = gram + (320 * (y2 - 8) + x1) / 2; - addr4 = gram + (320 * (y2 - 8) + x2 - 2) / 2; - for (uint32_t i = 0; i < 8; i++) - { + addr1 = gram + (image_width * y1 + x1) / 2; + addr2 = gram + (image_width * y1 + x2 - 2) / 2; + addr3 = gram + (image_width * (y2 - 8) + x1) / 2; + addr4 = gram + (image_width * (y2 - 8) + x2 - 2) / 2; + for (uint32_t i = 0; i < 8; i++) { *addr1 = data; *addr2 = data; *addr3 = data; *addr4 = data; - addr1 += 160; - addr2 += 160; - addr3 += 160; - addr4 += 160; + addr1 += image_width / 2; + addr2 += image_width / 2; + addr3 += image_width / 2; + addr4 += image_width / 2; } } - diff --git a/APP_Framework/Framework/knowing/kpu-postprocessing/yolov2/region_layer.h b/APP_Framework/Framework/knowing/kpu-postprocessing/yolov2/region_layer.h index 9b8a52197..70d0ea99d 100644 --- a/APP_Framework/Framework/knowing/kpu-postprocessing/yolov2/region_layer.h +++ b/APP_Framework/Framework/knowing/kpu-postprocessing/yolov2/region_layer.h @@ -44,6 +44,6 @@ typedef struct int region_layer_init(region_layer_t *rl, int width, int height, int channels, int origin_width, int origin_height); void region_layer_deinit(region_layer_t *rl); void region_layer_run(region_layer_t *rl, obj_info_t *obj_info); -void draw_edge(uint32_t *gram, obj_info_t *obj_info, uint32_t index, uint16_t color); +void draw_edge(uint32_t *gram, obj_info_t *obj_info, uint32_t index, uint16_t color, uint16_t image_width, uint16_t image_height); #endif // _REGION_LAYER