forked from xuos/xiuos
				
			update knowing framework and application from Tian_Chunyu
it is perfect
This commit is contained in:
		
						commit
						c4daa1dc44
					
				| 
						 | 
					@ -1,14 +1,3 @@
 | 
				
			||||||
menu "connection app"
 | 
					menu "connection app"
 | 
				
			||||||
    menuconfig APPLICATION_CONNECTION
 | 
					 | 
				
			||||||
        bool "Using connection apps"
 | 
					 | 
				
			||||||
        default n
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    menuconfig CONNECTION_COMMUNICATION_ZIGBEE
 | 
					 | 
				
			||||||
        bool "enable zigbee demo"
 | 
					 | 
				
			||||||
        default n
 | 
					 | 
				
			||||||
        select CONFIG_CONNECTION_COMMUNICATION_ZIGBEE
 | 
					 | 
				
			||||||
        if CONNECTION_COMMUNICATION_ZIGBEE
 | 
					 | 
				
			||||||
            source "$KERNEL_DIR/framework/connection/Adapter/zigbee/Kconfig"
 | 
					 | 
				
			||||||
        endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
endmenu
 | 
					endmenu
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,7 +1,4 @@
 | 
				
			||||||
menu "knowing app"
 | 
					menu "knowing app"
 | 
				
			||||||
 | 
					    source "$APP_DIR/Applications/knowing_app/mnist/Kconfig" 
 | 
				
			||||||
    menuconfig APPLICATION_KNOWING
 | 
					    source "$APP_DIR/Applications/knowing_app/face_detect/Kconfig" 
 | 
				
			||||||
        bool "Using knowing apps"
 | 
					 | 
				
			||||||
        default n
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
endmenu
 | 
					endmenu
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,14 @@
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					Import('RTT_ROOT')
 | 
				
			||||||
 | 
					from building import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cwd = GetCurrentDir()
 | 
				
			||||||
 | 
					objs = []
 | 
				
			||||||
 | 
					list = os.listdir(cwd)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for d in list:
 | 
				
			||||||
 | 
					    path = os.path.join(cwd, d)
 | 
				
			||||||
 | 
					    if os.path.isfile(os.path.join(path, 'SConscript')):
 | 
				
			||||||
 | 
					        objs = objs + SConscript(os.path.join(path, 'SConscript'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Return('objs')
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,7 @@
 | 
				
			||||||
 | 
					config FACE_DETECT
 | 
				
			||||||
 | 
					    bool "enable apps/face detect"
 | 
				
			||||||
 | 
					    depends on BOARD_K210_EVB
 | 
				
			||||||
 | 
						depends on DRV_USING_OV2640
 | 
				
			||||||
 | 
						depends on USING_KPU_POSTPROCESSING
 | 
				
			||||||
 | 
						depends on USING_YOLOV2
 | 
				
			||||||
 | 
					    default n
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,9 @@
 | 
				
			||||||
 | 
					from building import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cwd     = GetCurrentDir()
 | 
				
			||||||
 | 
					src     = Glob('*.c') + Glob('*.cpp')
 | 
				
			||||||
 | 
					CPPPATH = [cwd]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					group = DefineGroup('Applications', src, depend = ['FACE_DETECT'], LOCAL_CPPPATH = CPPPATH)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Return('group')
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,253 @@
 | 
				
			||||||
 | 
					#include <transform.h>
 | 
				
			||||||
 | 
					#include"region_layer.h"
 | 
				
			||||||
 | 
					#define SHOW_RGB_BUF_SIZE (320*240*2)
 | 
				
			||||||
 | 
					#define AI_KPU_RGB_BUF_SIZE (320*240*3)
 | 
				
			||||||
 | 
					#define KMODEL_SIZE (388776)    //face model size
 | 
				
			||||||
 | 
					#define ANCHOR_NUM 5
 | 
				
			||||||
 | 
					#define KPUIMAGEWIDTH (320)
 | 
				
			||||||
 | 
					#define KPUIMAGEHEIGHT (240)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static float anchor[ANCHOR_NUM * 2] = {1.889,2.5245,  2.9465,3.94056, 3.99987,5.3658, 5.155437,6.92275, 6.718375,9.01025};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define THREAD_PRIORITY_FACE_D  (11)
 | 
				
			||||||
 | 
					static pthread_t  facetid = 0;
 | 
				
			||||||
 | 
					static void* thread_face_detcet_entry(void *parameter);
 | 
				
			||||||
 | 
					static int g_fd = 0;
 | 
				
			||||||
 | 
					static int kmodel_fd = 0;
 | 
				
			||||||
 | 
					static int  if_exit = 0;
 | 
				
			||||||
 | 
					static unsigned char  * showbuffer = NULL ;
 | 
				
			||||||
 | 
					static unsigned char  * kpurgbbuffer = NULL ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static _ioctl_shoot_para shoot_para_t = {0};
 | 
				
			||||||
 | 
					unsigned char  * model_data = NULL;     //kpu data  load memory
 | 
				
			||||||
 | 
					unsigned char *model_data_align  = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					kpu_model_context_t face_detect_task;
 | 
				
			||||||
 | 
					static region_layer_t face_detect_rl;
 | 
				
			||||||
 | 
					static obj_info_t face_detect_info;
 | 
				
			||||||
 | 
					volatile uint32_t g_ai_done_flag;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void ai_done(void *ctx)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    g_ai_done_flag = 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void face_detect()
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int ret = 0;
 | 
				
			||||||
 | 
					    int result = 0;
 | 
				
			||||||
 | 
					    int size = 0;
 | 
				
			||||||
 | 
					    g_fd = open("/dev/ov2640",O_RDONLY);
 | 
				
			||||||
 | 
					    if(g_fd < 0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        printf("open ov2640 fail !!");
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    showbuffer = (unsigned char*)malloc(SHOW_RGB_BUF_SIZE);
 | 
				
			||||||
 | 
					    if(NULL ==showbuffer)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        close(g_fd);
 | 
				
			||||||
 | 
					        printf("showbuffer apply memory fail !!");
 | 
				
			||||||
 | 
					        return ;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    kpurgbbuffer = (unsigned char*)malloc(AI_KPU_RGB_BUF_SIZE);
 | 
				
			||||||
 | 
					    if(NULL ==kpurgbbuffer)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        close(g_fd);
 | 
				
			||||||
 | 
					        free(showbuffer);
 | 
				
			||||||
 | 
					        printf("kpurgbbuffer apply memory fail !!");
 | 
				
			||||||
 | 
					        return ;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    model_data = (unsigned char *)malloc(KMODEL_SIZE + 255);
 | 
				
			||||||
 | 
					    if(NULL ==model_data)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        free(showbuffer);
 | 
				
			||||||
 | 
					        free(kpurgbbuffer);
 | 
				
			||||||
 | 
					        close(g_fd);
 | 
				
			||||||
 | 
					        printf("model_data apply memory fail !!");
 | 
				
			||||||
 | 
					        return ;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    memset(model_data,0,KMODEL_SIZE + 255);
 | 
				
			||||||
 | 
					    memset(showbuffer,0,SHOW_RGB_BUF_SIZE);
 | 
				
			||||||
 | 
					    memset(kpurgbbuffer,0,AI_KPU_RGB_BUF_SIZE);
 | 
				
			||||||
 | 
					    shoot_para_t.pdata = (unsigned int *)(showbuffer);
 | 
				
			||||||
 | 
					    shoot_para_t.length = SHOW_RGB_BUF_SIZE;
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					        load memory 
 | 
				
			||||||
 | 
					    */
 | 
				
			||||||
 | 
					    kmodel_fd = open("/kmodel/detect.kmodel",O_RDONLY);
 | 
				
			||||||
 | 
					    if(kmodel_fd <0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					            printf("open kmodel fail");
 | 
				
			||||||
 | 
					            close(g_fd);
 | 
				
			||||||
 | 
					            free(showbuffer);
 | 
				
			||||||
 | 
					            free(kpurgbbuffer);
 | 
				
			||||||
 | 
					            free(model_data);
 | 
				
			||||||
 | 
					            return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    else
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					         size = read(kmodel_fd, model_data, KMODEL_SIZE);
 | 
				
			||||||
 | 
					         if(size != KMODEL_SIZE)
 | 
				
			||||||
 | 
					         {
 | 
				
			||||||
 | 
					            printf("read kmodel error size %d\n",size);
 | 
				
			||||||
 | 
					            close(g_fd);
 | 
				
			||||||
 | 
					            close(kmodel_fd);
 | 
				
			||||||
 | 
					            free(showbuffer);
 | 
				
			||||||
 | 
					            free(kpurgbbuffer);
 | 
				
			||||||
 | 
					            free(model_data);
 | 
				
			||||||
 | 
					            return;
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					         }
 | 
				
			||||||
 | 
					         else
 | 
				
			||||||
 | 
					         {
 | 
				
			||||||
 | 
					             printf("read kmodel success \n");
 | 
				
			||||||
 | 
					         }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    unsigned char *model_data_align = (unsigned char *)(((unsigned int)model_data+255)&(~255));
 | 
				
			||||||
 | 
					    dvp_set_ai_addr((uint32_t)kpurgbbuffer, (uint32_t)(kpurgbbuffer + 320 * 240), (uint32_t)(kpurgbbuffer + 320 * 240 * 2));
 | 
				
			||||||
 | 
					    if (kpu_load_kmodel(&face_detect_task, model_data_align) != 0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        printf("\nmodel init error\n");
 | 
				
			||||||
 | 
					        close(g_fd);
 | 
				
			||||||
 | 
					        close(kmodel_fd);
 | 
				
			||||||
 | 
					        free(showbuffer);
 | 
				
			||||||
 | 
					        free(kpurgbbuffer);
 | 
				
			||||||
 | 
					        free(model_data);
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    face_detect_rl.anchor_number = ANCHOR_NUM;
 | 
				
			||||||
 | 
					    face_detect_rl.anchor = anchor;
 | 
				
			||||||
 | 
					    face_detect_rl.threshold = 0.7;
 | 
				
			||||||
 | 
					    face_detect_rl.nms_value = 0.3;
 | 
				
			||||||
 | 
					    result = region_layer_init(&face_detect_rl, 20, 15, 30, KPUIMAGEWIDTH, KPUIMAGEHEIGHT);
 | 
				
			||||||
 | 
					    printf("region_layer_init result %d \n\r",result);    
 | 
				
			||||||
 | 
					    size_t stack_size = 32*1024;
 | 
				
			||||||
 | 
					    pthread_attr_t attr;      /* 线程属性 */
 | 
				
			||||||
 | 
					    struct sched_param prio;  /* 线程优先级 */
 | 
				
			||||||
 | 
					    prio.sched_priority = 8;  /* 优先级设置为 8 */
 | 
				
			||||||
 | 
					    pthread_attr_init(&attr);  /* 先使用默认值初始化属性 */
 | 
				
			||||||
 | 
					    pthread_attr_setschedparam(&attr,&prio);  /* 修改属性对应的优先级 */
 | 
				
			||||||
 | 
					    pthread_attr_setstacksize(&attr, stack_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* 创建线程 1, 属性为 attr,入口函数是 thread_entry,入口函数参数是 1 */
 | 
				
			||||||
 | 
					    result = pthread_create(&facetid,&attr,thread_face_detcet_entry,NULL);
 | 
				
			||||||
 | 
					    if (0 == result)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        printf("thread_face_detcet_entry successfully!\n");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    else
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        printf("thread_face_detcet_entry failed! error code is %d\n",result);
 | 
				
			||||||
 | 
					        close(g_fd);
 | 
				
			||||||
 | 
					    }     
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#ifdef __RT_THREAD_H__
 | 
				
			||||||
 | 
					MSH_CMD_EXPORT(face_detect,face detect task);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					static void* thread_face_detcet_entry(void *parameter)
 | 
				
			||||||
 | 
					{   
 | 
				
			||||||
 | 
					    extern void lcd_draw_picture(uint16_t x1, uint16_t y1, uint16_t width, uint16_t height, uint32_t *ptr);
 | 
				
			||||||
 | 
					    printf("thread_face_detcet_entry start!\n");
 | 
				
			||||||
 | 
					    int ret = 0;
 | 
				
			||||||
 | 
					    //sysctl_enable_irq();
 | 
				
			||||||
 | 
					    while(1)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        //memset(showbuffer,0,320*240*2);
 | 
				
			||||||
 | 
					        g_ai_done_flag = 0;
 | 
				
			||||||
 | 
					        ret = ioctl(g_fd,IOCTRL_CAMERA_START_SHOT,&shoot_para_t);
 | 
				
			||||||
 | 
					        if(RT_ERROR == ret)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            printf("ov2640 can't wait event flag");
 | 
				
			||||||
 | 
					            rt_free(showbuffer);
 | 
				
			||||||
 | 
					            close(g_fd);
 | 
				
			||||||
 | 
					            pthread_exit(NULL);  
 | 
				
			||||||
 | 
					            return NULL;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        kpu_run_kmodel(&face_detect_task, kpurgbbuffer, DMAC_CHANNEL5, ai_done, NULL);
 | 
				
			||||||
 | 
					        while(!g_ai_done_flag);
 | 
				
			||||||
 | 
					        float *output;
 | 
				
			||||||
 | 
					        size_t output_size;
 | 
				
			||||||
 | 
					        kpu_get_output(&face_detect_task, 0, (uint8_t **)&output, &output_size);
 | 
				
			||||||
 | 
					        face_detect_rl.input = output;        
 | 
				
			||||||
 | 
					        region_layer_run(&face_detect_rl, &face_detect_info);
 | 
				
			||||||
 | 
					        /* display result */
 | 
				
			||||||
 | 
					        #ifdef BSP_USING_LCD
 | 
				
			||||||
 | 
					        for (int face_cnt = 0; face_cnt < face_detect_info.obj_number; face_cnt++)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            draw_edge((uint32_t *)showbuffer, &face_detect_info, face_cnt, 0xF800);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        lcd_draw_picture(0, 0, 320, 240, (unsigned int*)showbuffer); 
 | 
				
			||||||
 | 
					        #endif
 | 
				
			||||||
 | 
					       usleep(1);
 | 
				
			||||||
 | 
					       if(1 == if_exit)
 | 
				
			||||||
 | 
					       {
 | 
				
			||||||
 | 
					          if_exit = 0;
 | 
				
			||||||
 | 
					          printf("thread_face_detcet_entry exit");
 | 
				
			||||||
 | 
					          pthread_exit(NULL);  
 | 
				
			||||||
 | 
					       }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void face_detect_delete()
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if(showbuffer != NULL)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        int ret = 0;
 | 
				
			||||||
 | 
					        close(g_fd);
 | 
				
			||||||
 | 
					        close(kmodel_fd);
 | 
				
			||||||
 | 
					        free(showbuffer);
 | 
				
			||||||
 | 
					        free(kpurgbbuffer);
 | 
				
			||||||
 | 
					        free(model_data);
 | 
				
			||||||
 | 
					        printf("face detect task cancel!!! ret %d ",ret);
 | 
				
			||||||
 | 
					        if_exit = 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#ifdef __RT_THREAD_H__
 | 
				
			||||||
 | 
					MSH_CMD_EXPORT(face_detect_delete,face detect task delete);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					void kmodel_load(unsigned char  * model_data)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int kmodel_fd = 0;
 | 
				
			||||||
 | 
					    int size = 0;
 | 
				
			||||||
 | 
					    kmodel_fd = open("/kmodel/detect.kmodel",O_RDONLY);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    model_data = (unsigned char *)malloc(KMODEL_SIZE + 255);
 | 
				
			||||||
 | 
					    if(NULL ==model_data)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        printf("model_data apply memory fail !!");
 | 
				
			||||||
 | 
					        return ;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    memset(model_data,0,KMODEL_SIZE + 255);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (kmodel_fd>= 0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					         size = read(kmodel_fd, model_data, KMODEL_SIZE);
 | 
				
			||||||
 | 
					         if(size != KMODEL_SIZE)
 | 
				
			||||||
 | 
					         {
 | 
				
			||||||
 | 
					            printf("read kmodel error size %d\n",size);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					         }
 | 
				
			||||||
 | 
					         else
 | 
				
			||||||
 | 
					         {
 | 
				
			||||||
 | 
					              printf("read kmodel success");
 | 
				
			||||||
 | 
					         }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    else
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        free(model_data);
 | 
				
			||||||
 | 
					        printf("open kmodel fail");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#ifdef __RT_THREAD_H__
 | 
				
			||||||
 | 
					MSH_CMD_EXPORT(kmodel_load,kmodel load memory);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 32 KiB  | 
| 
						 | 
					@ -0,0 +1,4 @@
 | 
				
			||||||
 | 
					config APP_MNIST
 | 
				
			||||||
 | 
					    bool "enable apps/mnist"
 | 
				
			||||||
 | 
					    depends on USING_TENSORFLOWLITEMICRO
 | 
				
			||||||
 | 
					    default n
 | 
				
			||||||
| 
						 | 
					@ -1,5 +1,9 @@
 | 
				
			||||||
# MNIST 说明
 | 
					# MNIST 说明
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					要使用本例程,MCU RAM必须至少500K左右,所以本例程目前在K210上面验证过,stm32f407 目前在rtt上原则上只能采取dlmodule加载的方式。
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## 使用
 | 
					## 使用
 | 
				
			||||||
 | 
					
 | 
				
			||||||
tools/mnist-train.py 训练生成 mnist 模型。
 | 
					tools/mnist-train.py 训练生成 mnist 模型。
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,9 @@
 | 
				
			||||||
 | 
					from building import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cwd     = GetCurrentDir()
 | 
				
			||||||
 | 
					src     = Glob('*.c') + Glob('*.cpp')
 | 
				
			||||||
 | 
					CPPPATH = [cwd]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					group = DefineGroup('Applications', src, depend = ['APP_MNIST'], LOCAL_CPPPATH = CPPPATH)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Return('group')
 | 
				
			||||||
| 
						 | 
					@ -1,23 +1,3 @@
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
* Copyright (c) 2020 AIIT XUOS Lab
 | 
					 | 
				
			||||||
* XiOS is licensed under Mulan PSL v2.
 | 
					 | 
				
			||||||
* You can use this software according to the terms and conditions of the Mulan PSL v2.
 | 
					 | 
				
			||||||
* You may obtain a copy of Mulan PSL v2 at:
 | 
					 | 
				
			||||||
*        http://license.coscl.org.cn/MulanPSL2
 | 
					 | 
				
			||||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
					 | 
				
			||||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
					 | 
				
			||||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
					 | 
				
			||||||
* See the Mulan PSL v2 for more details.
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/**
 | 
					 | 
				
			||||||
* @file:    digit.h
 | 
					 | 
				
			||||||
* @brief:   store digits in this file
 | 
					 | 
				
			||||||
* @version: 1.0
 | 
					 | 
				
			||||||
* @author:  AIIT XUOS Lab
 | 
					 | 
				
			||||||
* @date:    2021/4/30
 | 
					 | 
				
			||||||
*
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
const float mnist_digit[] = {
 | 
					const float mnist_digit[] = {
 | 
				
			||||||
  0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00,
 | 
					  0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00,
 | 
				
			||||||
  0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00,
 | 
					  0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00,
 | 
				
			||||||
| 
						 | 
					@ -1,24 +1,5 @@
 | 
				
			||||||
/*
 | 
					#include <transform.h>
 | 
				
			||||||
* Copyright (c) 2020 AIIT XUOS Lab
 | 
					#include <stdio.h>
 | 
				
			||||||
* XiOS is licensed under Mulan PSL v2.
 | 
					 | 
				
			||||||
* You can use this software according to the terms and conditions of the Mulan PSL v2.
 | 
					 | 
				
			||||||
* You may obtain a copy of Mulan PSL v2 at:
 | 
					 | 
				
			||||||
*        http://license.coscl.org.cn/MulanPSL2
 | 
					 | 
				
			||||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
					 | 
				
			||||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
					 | 
				
			||||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
					 | 
				
			||||||
* See the Mulan PSL v2 for more details.
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/**
 | 
					 | 
				
			||||||
* @file:    mnistapp.cpp
 | 
					 | 
				
			||||||
* @brief:   mnist function
 | 
					 | 
				
			||||||
* @version: 1.0
 | 
					 | 
				
			||||||
* @author:  AIIT XUOS Lab
 | 
					 | 
				
			||||||
* @date:    2021/4/30
 | 
					 | 
				
			||||||
*
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
#include <xiuos.h>
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "tensorflow/lite/micro/all_ops_resolver.h"
 | 
					#include "tensorflow/lite/micro/all_ops_resolver.h"
 | 
				
			||||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
 | 
					#include "tensorflow/lite/micro/micro_error_reporter.h"
 | 
				
			||||||
| 
						 | 
					@ -36,8 +17,8 @@ tflite::MicroInterpreter* interpreter = nullptr;
 | 
				
			||||||
TfLiteTensor* input = nullptr;
 | 
					TfLiteTensor* input = nullptr;
 | 
				
			||||||
TfLiteTensor* output = nullptr;
 | 
					TfLiteTensor* output = nullptr;
 | 
				
			||||||
constexpr int kTensorArenaSize = 110 * 1024;
 | 
					constexpr int kTensorArenaSize = 110 * 1024;
 | 
				
			||||||
//uint8_t *tensor_arena = nullptr;
 | 
					uint8_t *tensor_arena = nullptr;
 | 
				
			||||||
uint8_t tensor_arena[kTensorArenaSize];
 | 
					//uint8_t tensor_arena[kTensorArenaSize];
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern "C" void mnist_app() {
 | 
					extern "C" void mnist_app() {
 | 
				
			||||||
| 
						 | 
					@ -52,13 +33,12 @@ extern "C" void mnist_app() {
 | 
				
			||||||
                         model->version(), TFLITE_SCHEMA_VERSION);
 | 
					                         model->version(), TFLITE_SCHEMA_VERSION);
 | 
				
			||||||
    return;
 | 
					    return;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  /*
 | 
					
 | 
				
			||||||
  tensor_arena = (uint8_t *)rt_malloc(kTensorArenaSize);
 | 
					  tensor_arena = (uint8_t *)malloc(kTensorArenaSize);
 | 
				
			||||||
  if (tensor_arena == nullptr) {
 | 
					  if (tensor_arena == nullptr) {
 | 
				
			||||||
    TF_LITE_REPORT_ERROR(error_reporter, "malloc for tensor_arena failed");
 | 
					    TF_LITE_REPORT_ERROR(error_reporter, "malloc for tensor_arena failed");
 | 
				
			||||||
    return;
 | 
					    return;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  */
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  tflite::AllOpsResolver resolver;
 | 
					  tflite::AllOpsResolver resolver;
 | 
				
			||||||
  tflite::MicroInterpreter static_interpreter(
 | 
					  tflite::MicroInterpreter static_interpreter(
 | 
				
			||||||
| 
						 | 
					@ -75,15 +55,15 @@ extern "C" void mnist_app() {
 | 
				
			||||||
  input = interpreter->input(0);
 | 
					  input = interpreter->input(0);
 | 
				
			||||||
  output = interpreter->output(0);
 | 
					  output = interpreter->output(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  KPrintf("\n------- Input Digit -------\n");
 | 
					  printf("------- Input Digit -------\n");
 | 
				
			||||||
  for (int i = 0; i < 28; i++) {
 | 
					  for (int i = 0; i < 28; i++) {
 | 
				
			||||||
    for (int j = 0; j < 28; j++) {
 | 
					    for (int j = 0; j < 28; j++) {
 | 
				
			||||||
      if (mnist_digit[i*28+j] > 0.3)
 | 
					      if (mnist_digit[i*28+j] > 0.3)
 | 
				
			||||||
        KPrintf("#");
 | 
					        printf("#");
 | 
				
			||||||
      else
 | 
					      else
 | 
				
			||||||
        KPrintf(".");
 | 
					        printf(".");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    KPrintf("\n");
 | 
					    printf("\n");
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for (int i = 0; i < 28*28; i++) {
 | 
					  for (int i = 0; i < 28*28; i++) {
 | 
				
			||||||
| 
						 | 
					@ -105,8 +85,12 @@ extern "C" void mnist_app() {
 | 
				
			||||||
           index = i;
 | 
					           index = i;
 | 
				
			||||||
         }
 | 
					         }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  printf("------- Output Result -------\n");
 | 
				
			||||||
  KPrintf("\n------- Output Result -------\n");
 | 
					  printf("result is %d\n", index);
 | 
				
			||||||
  KPrintf("result is %d\n\n", index);
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern "C" {
 | 
				
			||||||
 | 
					#ifdef __RT_THREAD_H__
 | 
				
			||||||
 | 
					MSH_CMD_EXPORT(mnist_app, run mnist app);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1,23 +1,3 @@
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
* Copyright (c) 2020 AIIT XUOS Lab
 | 
					 | 
				
			||||||
* XiOS is licensed under Mulan PSL v2.
 | 
					 | 
				
			||||||
* You can use this software according to the terms and conditions of the Mulan PSL v2.
 | 
					 | 
				
			||||||
* You may obtain a copy of Mulan PSL v2 at:
 | 
					 | 
				
			||||||
*        http://license.coscl.org.cn/MulanPSL2
 | 
					 | 
				
			||||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
					 | 
				
			||||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
					 | 
				
			||||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
					 | 
				
			||||||
* See the Mulan PSL v2 for more details.
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/**
 | 
					 | 
				
			||||||
* @file:    model.h
 | 
					 | 
				
			||||||
* @brief:   store model weights in this file
 | 
					 | 
				
			||||||
* @version: 1.0
 | 
					 | 
				
			||||||
* @author:  AIIT XUOS Lab
 | 
					 | 
				
			||||||
* @date:    2021/4/30
 | 
					 | 
				
			||||||
*
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
unsigned char mnist_model[] = {
 | 
					unsigned char mnist_model[] = {
 | 
				
			||||||
  0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x14, 0x00, 0x20, 0x00,
 | 
					  0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x14, 0x00, 0x20, 0x00,
 | 
				
			||||||
  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00,
 | 
					  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x00, 0x00,
 | 
				
			||||||
| 
						 | 
					@ -1,22 +1,4 @@
 | 
				
			||||||
#!/usr/bin/env python3
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
# ==========================================================================================
 | 
					 | 
				
			||||||
# Copyright (c) 2020 AIIT XUOS Lab
 | 
					 | 
				
			||||||
# XiOS is licensed under Mulan PSL v2.
 | 
					 | 
				
			||||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
 | 
					 | 
				
			||||||
# You may obtain a copy of Mulan PSL v2 at:
 | 
					 | 
				
			||||||
#        http://license.coscl.org.cn/MulanPSL2
 | 
					 | 
				
			||||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
					 | 
				
			||||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
					 | 
				
			||||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
					 | 
				
			||||||
# See the Mulan PSL v2 for more details.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#  @file:    mnist-c-digit.py
 | 
					 | 
				
			||||||
#  @brief:   print image digit at command line
 | 
					 | 
				
			||||||
#  @version: 1.0
 | 
					 | 
				
			||||||
#  @author:  AIIT XUOS Lab
 | 
					 | 
				
			||||||
#  @date:    2021/4/30
 | 
					 | 
				
			||||||
# ==========================================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import tensorflow as tf
 | 
					import tensorflow as tf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,23 @@
 | 
				
			||||||
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#tflite_file_path = 'mnist-default-quan.tflite'
 | 
				
			||||||
 | 
					tflite_file_path = 'mnist.tflite'
 | 
				
			||||||
 | 
					model_file_path = 'model.h'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					tflite_file = open(tflite_file_path, 'rb')
 | 
				
			||||||
 | 
					tflite_data = tflite_file.read()
 | 
				
			||||||
 | 
					tflite_file.close()
 | 
				
			||||||
 | 
					tflite_array = [ '0x%02x' % byte for byte in tflite_data ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					model_content = '''unsigned char mnist_model[] = {
 | 
				
			||||||
 | 
					  %s
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					unsigned int mnist_model_len = %d;
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					# 12 bytes in a line, the same with xxd
 | 
				
			||||||
 | 
					bytes_of_line = 12
 | 
				
			||||||
 | 
					model_data = (',\n  ').join([ (', ').join(tflite_array[i:i+bytes_of_line]) for i in range(0, len(tflite_array), bytes_of_line) ])
 | 
				
			||||||
 | 
					model_file = open(model_file_path, 'w')
 | 
				
			||||||
 | 
					model_file.write(model_content % (model_data, len(tflite_array)))
 | 
				
			||||||
 | 
					model_file.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,22 +1,4 @@
 | 
				
			||||||
#!/usr/bin/env python3
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
# ==========================================================================================
 | 
					 | 
				
			||||||
# Copyright (c) 2020 AIIT XUOS Lab
 | 
					 | 
				
			||||||
# XiOS is licensed under Mulan PSL v2.
 | 
					 | 
				
			||||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
 | 
					 | 
				
			||||||
# You may obtain a copy of Mulan PSL v2 at:
 | 
					 | 
				
			||||||
#        http://license.coscl.org.cn/MulanPSL2
 | 
					 | 
				
			||||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
					 | 
				
			||||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
					 | 
				
			||||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
					 | 
				
			||||||
# See the Mulan PSL v2 for more details.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#  @file:    mnist-inference.py
 | 
					 | 
				
			||||||
#  @brief:   load data amd start model omferemce
 | 
					 | 
				
			||||||
#  @version: 1.0
 | 
					 | 
				
			||||||
#  @author:  AIIT XUOS Lab
 | 
					 | 
				
			||||||
#  @date:    2021/4/30
 | 
					 | 
				
			||||||
# ==========================================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import tensorflow as tf
 | 
					import tensorflow as tf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										18
									
								
								APP_Framework/Framework/know/tflite_mnist/tools/mnist-train.py → APP_Framework/Applications/knowing_app/mnist/tools/mnist-train.py
								
								
								
								
									
									
									Executable file → Normal file
								
							
							
						
						
									
										18
									
								
								APP_Framework/Framework/know/tflite_mnist/tools/mnist-train.py → APP_Framework/Applications/knowing_app/mnist/tools/mnist-train.py
								
								
								
								
									
									
									Executable file → Normal file
								
							| 
						 | 
					@ -1,22 +1,4 @@
 | 
				
			||||||
#!/usr/bin/env python3
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
# ==========================================================================================
 | 
					 | 
				
			||||||
# Copyright (c) 2020 AIIT XUOS Lab
 | 
					 | 
				
			||||||
# XiOS is licensed under Mulan PSL v2.
 | 
					 | 
				
			||||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
 | 
					 | 
				
			||||||
# You may obtain a copy of Mulan PSL v2 at:
 | 
					 | 
				
			||||||
#        http://license.coscl.org.cn/MulanPSL2
 | 
					 | 
				
			||||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
					 | 
				
			||||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
					 | 
				
			||||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
					 | 
				
			||||||
# See the Mulan PSL v2 for more details.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#  @file:    mnist-train.py
 | 
					 | 
				
			||||||
#  @brief:   model training
 | 
					 | 
				
			||||||
#  @version: 1.0
 | 
					 | 
				
			||||||
#  @author:  AIIT XUOS Lab
 | 
					 | 
				
			||||||
#  @date:    2021/4/30
 | 
					 | 
				
			||||||
# ==========================================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import tensorflow as tf
 | 
					import tensorflow as tf
 | 
				
			||||||
| 
						 | 
					@ -20,7 +20,7 @@ menu "Framework"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    source "$APP_DIR/Framework/sensor/Kconfig"
 | 
					    source "$APP_DIR/Framework/sensor/Kconfig"
 | 
				
			||||||
    source "$APP_DIR/Framework/connection/Kconfig"
 | 
					    source "$APP_DIR/Framework/connection/Kconfig"
 | 
				
			||||||
    source "$APP_DIR/Framework/know/Kconfig"
 | 
					    source "$APP_DIR/Framework/knowing/Kconfig"
 | 
				
			||||||
    source "$APP_DIR/Framework/control/Kconfig"
 | 
					    source "$APP_DIR/Framework/control/Kconfig"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,8 +0,0 @@
 | 
				
			||||||
menuconfig SUPPORT_KNOWING_FRAMEWORK
 | 
					 | 
				
			||||||
        bool "support knowing framework"
 | 
					 | 
				
			||||||
        default y
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if SUPPORT_KNOWING_FRAMEWORK
 | 
					 | 
				
			||||||
        source "$APP_DIR/Framework/know/tflite_sin/Kconfig"
 | 
					 | 
				
			||||||
        source "$APP_DIR/Framework/know/tflite_mnist/Kconfig"
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,4 +0,0 @@
 | 
				
			||||||
menuconfig USING_TFLITE_MNIST
 | 
					 | 
				
			||||||
    bool "mnist demo app for tflite micro"
 | 
					 | 
				
			||||||
    depends on INTELLIGENT_TFLITE
 | 
					 | 
				
			||||||
    default n
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,8 +0,0 @@
 | 
				
			||||||
ifeq ($(CONFIG_USING_TFLITE_MNIST),y)
 | 
					 | 
				
			||||||
	SRC_FILES := \
 | 
					 | 
				
			||||||
		mnistapp.cpp \
 | 
					 | 
				
			||||||
		mnistmain.c
 | 
					 | 
				
			||||||
	CPPPATHS += -I.
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
include $(KERNEL_ROOT)/compiler.mk
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,30 +0,0 @@
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
* Copyright (c) 2020 AIIT XUOS Lab
 | 
					 | 
				
			||||||
* XiOS is licensed under Mulan PSL v2.
 | 
					 | 
				
			||||||
* You can use this software according to the terms and conditions of the Mulan PSL v2.
 | 
					 | 
				
			||||||
* You may obtain a copy of Mulan PSL v2 at:
 | 
					 | 
				
			||||||
*        http://license.coscl.org.cn/MulanPSL2
 | 
					 | 
				
			||||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
					 | 
				
			||||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
					 | 
				
			||||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
					 | 
				
			||||||
* See the Mulan PSL v2 for more details.
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/**
 | 
					 | 
				
			||||||
* @file:    mnistmain.c
 | 
					 | 
				
			||||||
* @brief:   start mnist function
 | 
					 | 
				
			||||||
* @version: 1.0
 | 
					 | 
				
			||||||
* @author:  AIIT XUOS Lab
 | 
					 | 
				
			||||||
* @date:    2021/4/30
 | 
					 | 
				
			||||||
*
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <xiuos.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void mnist_app(void);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int tfmnist(void) {
 | 
					 | 
				
			||||||
    mnist_app();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SHELL_EXPORT_CMD(SHELL_CMD_PERMISSION(0)|SHELL_CMD_TYPE(SHELL_TYPE_CMD_FUNC)|SHELL_CMD_PARAM_NUM(0), tfmnist, tfmnist, run mnist demo of tflite);
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,41 +0,0 @@
 | 
				
			||||||
#!/usr/bin/env python3
 | 
					 | 
				
			||||||
# ==========================================================================================
 | 
					 | 
				
			||||||
# Copyright (c) 2020 AIIT XUOS Lab
 | 
					 | 
				
			||||||
# XiOS is licensed under Mulan PSL v2.
 | 
					 | 
				
			||||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
 | 
					 | 
				
			||||||
# You may obtain a copy of Mulan PSL v2 at:
 | 
					 | 
				
			||||||
#        http://license.coscl.org.cn/MulanPSL2
 | 
					 | 
				
			||||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
					 | 
				
			||||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
					 | 
				
			||||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
					 | 
				
			||||||
# See the Mulan PSL v2 for more details.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#  @file:    mnist-c-model.py
 | 
					 | 
				
			||||||
#  @brief:   open file path and load model
 | 
					 | 
				
			||||||
#  @version: 1.0
 | 
					 | 
				
			||||||
#  @author:  AIIT XUOS Lab
 | 
					 | 
				
			||||||
#  @date:    2021/4/30
 | 
					 | 
				
			||||||
# ==========================================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#tflite_file_path = 'mnist-default-quan.tflite'
 | 
					 | 
				
			||||||
tflite_file_path = 'mnist.tflite'
 | 
					 | 
				
			||||||
model_file_path = 'model.h'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
tflite_file = open(tflite_file_path, 'rb')
 | 
					 | 
				
			||||||
tflite_data = tflite_file.read()
 | 
					 | 
				
			||||||
tflite_file.close()
 | 
					 | 
				
			||||||
tflite_array = [ '0x%02x' % byte for byte in tflite_data ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
model_content = '''unsigned char mnist_model[] = {
 | 
					 | 
				
			||||||
  %s
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
unsigned int mnist_model_len = %d;
 | 
					 | 
				
			||||||
'''
 | 
					 | 
				
			||||||
# 12 bytes in a line, the same with xxd
 | 
					 | 
				
			||||||
bytes_of_line = 12
 | 
					 | 
				
			||||||
model_data = (',\n  ').join([ (', ').join(tflite_array[i:i+bytes_of_line]) for i in range(0, len(tflite_array), bytes_of_line) ])
 | 
					 | 
				
			||||||
model_file = open(model_file_path, 'w')
 | 
					 | 
				
			||||||
model_file.write(model_content % (model_data, len(tflite_array)))
 | 
					 | 
				
			||||||
model_file.close()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,4 +0,0 @@
 | 
				
			||||||
menuconfig USING_TFLITE_SIN
 | 
					 | 
				
			||||||
    bool "sin(x) demo app for tflite micro"
 | 
					 | 
				
			||||||
    depends on INTELLIGENT_TFLITE
 | 
					 | 
				
			||||||
    default n
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,11 +0,0 @@
 | 
				
			||||||
ifeq ($(CONFIG_USING_TFLITE_SIN),y)
 | 
					 | 
				
			||||||
	SRC_FILES := \
 | 
					 | 
				
			||||||
		sinmain.c \
 | 
					 | 
				
			||||||
		main_functions.cc \
 | 
					 | 
				
			||||||
		model.cc \
 | 
					 | 
				
			||||||
		output_handler.cc \
 | 
					 | 
				
			||||||
		constants.cc
 | 
					 | 
				
			||||||
	CPPPATHS += -I.
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
include $(KERNEL_ROOT)/compiler.mk
 | 
					 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,8 @@
 | 
				
			||||||
 | 
					menuconfig SUPPORT_KNOWING_FRAMEWORK
 | 
				
			||||||
 | 
					        bool "support knowing framework"
 | 
				
			||||||
 | 
					        default y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if SUPPORT_KNOWING_FRAMEWORK
 | 
				
			||||||
 | 
					        source "$APP_DIR/Framework/knowing/tensorflow-lite/Kconfig"
 | 
				
			||||||
 | 
					        source "$APP_DIR/Framework/knowing/kpu-postprocessing/Kconfig"
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
							
								
								
									
										0
									
								
								APP_Framework/Framework/know/Makefile → APP_Framework/Framework/knowing/Makefile
								
								
								
								
									
									
									Executable file → Normal file
								
							
							
						
						
									
										0
									
								
								APP_Framework/Framework/know/Makefile → APP_Framework/Framework/knowing/Makefile
								
								
								
								
									
									
									Executable file → Normal file
								
							| 
						 | 
					@ -0,0 +1,14 @@
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					Import('RTT_ROOT')
 | 
				
			||||||
 | 
					from building import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cwd = GetCurrentDir()
 | 
				
			||||||
 | 
					objs = []
 | 
				
			||||||
 | 
					list = os.listdir(cwd)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for d in list:
 | 
				
			||||||
 | 
					    path = os.path.join(cwd, d)
 | 
				
			||||||
 | 
					    if os.path.isfile(os.path.join(path, 'SConscript')):
 | 
				
			||||||
 | 
					        objs = objs + SConscript(os.path.join(path, 'SConscript'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Return('objs')
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,5 @@
 | 
				
			||||||
 | 
					menuconfig USING_KPU_POSTPROCESSING
 | 
				
			||||||
 | 
					    bool "kpu model postprocessing"
 | 
				
			||||||
 | 
					    default y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					source "$APP_DIR/Framework/knowing/kpu-postprocessing/yolov2/Kconfig"
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,14 @@
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					Import('RTT_ROOT')
 | 
				
			||||||
 | 
					from building import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cwd = GetCurrentDir()
 | 
				
			||||||
 | 
					objs = []
 | 
				
			||||||
 | 
					list = os.listdir(cwd)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for d in list:
 | 
				
			||||||
 | 
					    path = os.path.join(cwd, d)
 | 
				
			||||||
 | 
					    if os.path.isfile(os.path.join(path, 'SConscript')):
 | 
				
			||||||
 | 
					        objs = objs + SConscript(os.path.join(path, 'SConscript'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Return('objs')
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,7 @@
 | 
				
			||||||
 | 
					menuconfig USING_YOLOV2
 | 
				
			||||||
 | 
					    bool "yolov2 region layer"
 | 
				
			||||||
 | 
						depends on USING_KPU_POSTPROCESSING
 | 
				
			||||||
 | 
					    default n
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,10 @@
 | 
				
			||||||
 | 
					from building import *
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cwd = GetCurrentDir()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					src = Glob('*.c')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					group = DefineGroup('yolov2', src, depend = ['USING_YOLOV2'], CPPPATH = [cwd])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Return('group')
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,437 @@
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <math.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include "region_layer.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    float x;
 | 
				
			||||||
 | 
					    float y;
 | 
				
			||||||
 | 
					    float w;
 | 
				
			||||||
 | 
					    float h;
 | 
				
			||||||
 | 
					} box_t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int index;
 | 
				
			||||||
 | 
					    int class;
 | 
				
			||||||
 | 
					    float **probs;
 | 
				
			||||||
 | 
					} sortable_box_t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int region_layer_init(region_layer_t *rl, int width, int height, int channels, int origin_width, int origin_height)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int flag = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    rl->coords = 4;
 | 
				
			||||||
 | 
					    rl->image_width = 320;
 | 
				
			||||||
 | 
					    rl->image_height = 240;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    rl->classes = channels / 5 - 5;
 | 
				
			||||||
 | 
					    rl->net_width = origin_width;
 | 
				
			||||||
 | 
					    rl->net_height = origin_height;
 | 
				
			||||||
 | 
					    rl->layer_width = width;
 | 
				
			||||||
 | 
					    rl->layer_height = height;
 | 
				
			||||||
 | 
					    rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number); 
 | 
				
			||||||
 | 
					    rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    rl->output = malloc(rl->output_number * sizeof(float));
 | 
				
			||||||
 | 
					    if (rl->output == NULL)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        flag = -1;
 | 
				
			||||||
 | 
					        goto malloc_error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    rl->boxes = malloc(rl->boxes_number * sizeof(box_t));
 | 
				
			||||||
 | 
					    if (rl->boxes == NULL)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        flag = -2;
 | 
				
			||||||
 | 
					        goto malloc_error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    rl->probs_buf = malloc(rl->boxes_number * (rl->classes + 1) * sizeof(float));
 | 
				
			||||||
 | 
					    if (rl->probs_buf == NULL)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        flag = -3;
 | 
				
			||||||
 | 
					        goto malloc_error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    rl->probs = malloc(rl->boxes_number * sizeof(float *));
 | 
				
			||||||
 | 
					    if (rl->probs == NULL)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        flag = -4;
 | 
				
			||||||
 | 
					        goto malloc_error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for (uint32_t i = 0; i < rl->boxes_number; i++)
 | 
				
			||||||
 | 
					        rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]);
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					malloc_error:
 | 
				
			||||||
 | 
					    free(rl->output);
 | 
				
			||||||
 | 
					    free(rl->boxes);
 | 
				
			||||||
 | 
					    free(rl->probs_buf);
 | 
				
			||||||
 | 
					    free(rl->probs);
 | 
				
			||||||
 | 
					    return flag;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void region_layer_deinit(region_layer_t *rl)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    free(rl->output);
 | 
				
			||||||
 | 
					    free(rl->boxes);
 | 
				
			||||||
 | 
					    free(rl->probs_buf);
 | 
				
			||||||
 | 
					    free(rl->probs);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline float sigmoid(float x)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    return 1.f / (1.f + expf(-x));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void activate_array(region_layer_t *rl, int index, int n)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    float *output = &rl->output[index];
 | 
				
			||||||
 | 
					    float *input = &rl->input[index];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < n; ++i)
 | 
				
			||||||
 | 
					        output[i] = sigmoid(input[i]);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int entry_index(region_layer_t *rl, int location, int entry)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int wh = rl->layer_width * rl->layer_height;
 | 
				
			||||||
 | 
					    int n   = location / wh;
 | 
				
			||||||
 | 
					    int loc = location % wh;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return n * wh * (rl->coords + rl->classes + 1) + entry * wh + loc;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void softmax(region_layer_t *rl, float *input, int n, int stride, float *output)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int i;
 | 
				
			||||||
 | 
					    float diff;
 | 
				
			||||||
 | 
					    float e;
 | 
				
			||||||
 | 
					    float sum = 0;
 | 
				
			||||||
 | 
					    float largest_i = input[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (i = 0; i < n; ++i)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        if (input[i * stride] > largest_i)
 | 
				
			||||||
 | 
					            largest_i = input[i * stride];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (i = 0; i < n; ++i) {
 | 
				
			||||||
 | 
					        diff = input[i * stride] - largest_i;
 | 
				
			||||||
 | 
					        e = expf(diff);
 | 
				
			||||||
 | 
					        sum += e;
 | 
				
			||||||
 | 
					        output[i * stride] = e;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for (i = 0; i < n; ++i)
 | 
				
			||||||
 | 
					        output[i * stride] /= sum;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void softmax_cpu(region_layer_t *rl, float *input, int n, int batch, int batch_offset, int groups, int stride, float *output)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int g, b;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (b = 0; b < batch; ++b) {
 | 
				
			||||||
 | 
					        for (g = 0; g < groups; ++g)
 | 
				
			||||||
 | 
					            softmax(rl, input + b * batch_offset + g, n, stride, output + b * batch_offset + g);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void forward_region_layer(region_layer_t *rl)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int index;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (index = 0; index < rl->output_number; index++)
 | 
				
			||||||
 | 
					        rl->output[index] = rl->input[index];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int n = 0; n < rl->anchor_number; ++n)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        index = entry_index(rl, n * rl->layer_width * rl->layer_height, 0);
 | 
				
			||||||
 | 
					        activate_array(rl, index, 2 * rl->layer_width * rl->layer_height);
 | 
				
			||||||
 | 
					        index = entry_index(rl, n * rl->layer_width * rl->layer_height, 4);
 | 
				
			||||||
 | 
					        activate_array(rl, index, rl->layer_width * rl->layer_height);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    index = entry_index(rl, 0, rl->coords + 1);
 | 
				
			||||||
 | 
					    softmax_cpu(rl, rl->input + index, rl->classes, rl->anchor_number,
 | 
				
			||||||
 | 
					            rl->output_number / rl->anchor_number, rl->layer_width * rl->layer_height,
 | 
				
			||||||
 | 
					            rl->layer_width * rl->layer_height, rl->output + index);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void correct_region_boxes(region_layer_t *rl, box_t *boxes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    uint32_t net_width = rl->net_width;
 | 
				
			||||||
 | 
					    uint32_t net_height = rl->net_height;
 | 
				
			||||||
 | 
					    uint32_t image_width = rl->image_width;
 | 
				
			||||||
 | 
					    uint32_t image_height = rl->image_height;
 | 
				
			||||||
 | 
					    uint32_t boxes_number = rl->boxes_number;
 | 
				
			||||||
 | 
					    int new_w = 0;
 | 
				
			||||||
 | 
					    int new_h = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (((float)net_width / image_width) <
 | 
				
			||||||
 | 
					        ((float)net_height / image_height)) {
 | 
				
			||||||
 | 
					        new_w = net_width;
 | 
				
			||||||
 | 
					        new_h = (image_height * net_width) / image_width;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        new_h = net_height;
 | 
				
			||||||
 | 
					        new_w = (image_width * net_height) / image_height;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for (int i = 0; i < boxes_number; ++i) {
 | 
				
			||||||
 | 
					        box_t b = boxes[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        b.x = (b.x - (net_width - new_w) / 2. / net_width) /
 | 
				
			||||||
 | 
					              ((float)new_w / net_width);
 | 
				
			||||||
 | 
					        b.y = (b.y - (net_height - new_h) / 2. / net_height) /
 | 
				
			||||||
 | 
					              ((float)new_h / net_height);
 | 
				
			||||||
 | 
					        b.w *= (float)net_width / new_w;
 | 
				
			||||||
 | 
					        b.h *= (float)net_height / new_h;
 | 
				
			||||||
 | 
					        boxes[i] = b;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static box_t get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    volatile box_t b;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    b.x = (i + x[index + 0 * stride]) / w;
 | 
				
			||||||
 | 
					    b.y = (j + x[index + 1 * stride]) / h;
 | 
				
			||||||
 | 
					    b.w = expf(x[index + 2 * stride]) * biases[2 * n] / w;
 | 
				
			||||||
 | 
					    b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1] / h;
 | 
				
			||||||
 | 
					    return b;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void get_region_boxes(region_layer_t *rl, float *predictions, float **probs, box_t *boxes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    uint32_t layer_width = rl->layer_width;
 | 
				
			||||||
 | 
					    uint32_t layer_height = rl->layer_height;
 | 
				
			||||||
 | 
					    uint32_t anchor_number = rl->anchor_number;
 | 
				
			||||||
 | 
					    uint32_t classes = rl->classes;
 | 
				
			||||||
 | 
					    uint32_t coords = rl->coords;
 | 
				
			||||||
 | 
					    float threshold = rl->threshold;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < layer_width * layer_height; ++i)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        int row = i / layer_width;
 | 
				
			||||||
 | 
					        int col = i % layer_width;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (int n = 0; n < anchor_number; ++n)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            int index = n * layer_width * layer_height + i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for (int j = 0; j < classes; ++j)
 | 
				
			||||||
 | 
					                probs[index][j] = 0;
 | 
				
			||||||
 | 
					            int obj_index = entry_index(rl, n * layer_width * layer_height + i, coords);
 | 
				
			||||||
 | 
					            int box_index = entry_index(rl, n * layer_width * layer_height + i, 0);
 | 
				
			||||||
 | 
					            float scale  = predictions[obj_index];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row,
 | 
				
			||||||
 | 
					                layer_width, layer_height, layer_width * layer_height);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            float max = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for (int j = 0; j < classes; ++j)
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                int class_index = entry_index(rl, n * layer_width * layer_height + i, coords + 1 + j);
 | 
				
			||||||
 | 
					                float prob = scale * predictions[class_index];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                probs[index][j] = (prob > threshold) ? prob : 0;
 | 
				
			||||||
 | 
					                if (prob > max)
 | 
				
			||||||
 | 
					                    max = prob;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            probs[index][classes] = max;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    correct_region_boxes(rl, boxes);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int nms_comparator(void *pa, void *pb)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    sortable_box_t a = *(sortable_box_t *)pa;
 | 
				
			||||||
 | 
					    sortable_box_t b = *(sortable_box_t *)pb;
 | 
				
			||||||
 | 
					    float diff = a.probs[a.index][b.class] - b.probs[b.index][b.class];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (diff < 0)
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					    else if (diff > 0)
 | 
				
			||||||
 | 
					        return -1;
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static float overlap(float x1, float w1, float x2, float w2)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    float l1 = x1 - w1/2;
 | 
				
			||||||
 | 
					    float l2 = x2 - w2/2;
 | 
				
			||||||
 | 
					    float left = l1 > l2 ? l1 : l2;
 | 
				
			||||||
 | 
					    float r1 = x1 + w1/2;
 | 
				
			||||||
 | 
					    float r2 = x2 + w2/2;
 | 
				
			||||||
 | 
					    float right = r1 < r2 ? r1 : r2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return right - left;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static float box_intersection(box_t a, box_t b)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    float w = overlap(a.x, a.w, b.x, b.w);
 | 
				
			||||||
 | 
					    float h = overlap(a.y, a.h, b.y, b.h);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (w < 0 || h < 0)
 | 
				
			||||||
 | 
					        return 0;
 | 
				
			||||||
 | 
					    return w * h;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static float box_union(box_t a, box_t b)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    float i = box_intersection(a, b);
 | 
				
			||||||
 | 
					    float u = a.w * a.h + b.w * b.h - i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return u;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static float box_iou(box_t a, box_t b)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    return box_intersection(a, b) / box_union(a, b);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void do_nms_sort(region_layer_t *rl, box_t *boxes, float **probs)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    uint32_t boxes_number = rl->boxes_number;
 | 
				
			||||||
 | 
					    uint32_t classes = rl->classes;
 | 
				
			||||||
 | 
					    float nms_value = rl->nms_value;
 | 
				
			||||||
 | 
					    int i, j, k;
 | 
				
			||||||
 | 
					    sortable_box_t s[boxes_number];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (i = 0; i < boxes_number; ++i)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        s[i].index = i;
 | 
				
			||||||
 | 
					        s[i].class = 0;
 | 
				
			||||||
 | 
					        s[i].probs = probs;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (k = 0; k < classes; ++k)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        for (i = 0; i < boxes_number; ++i)
 | 
				
			||||||
 | 
					            s[i].class = k;
 | 
				
			||||||
 | 
					        qsort(s, boxes_number, sizeof(sortable_box_t), nms_comparator);
 | 
				
			||||||
 | 
					        for (i = 0; i < boxes_number; ++i)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            if (probs[s[i].index][k] == 0)
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					            box_t a = boxes[s[i].index];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for (j = i + 1; j < boxes_number; ++j)
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                box_t b = boxes[s[j].index];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if (box_iou(a, b) > nms_value)
 | 
				
			||||||
 | 
					                    probs[s[j].index][k] = 0;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int max_index(float *a, int n)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int i, max_i = 0;
 | 
				
			||||||
 | 
					    float max = a[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (i = 1; i < n; ++i)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        if (a[i] > max)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            max   = a[i];
 | 
				
			||||||
 | 
					            max_i = i;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return max_i;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void region_layer_output(region_layer_t *rl, obj_info_t *obj_info)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    uint32_t obj_number = 0;
 | 
				
			||||||
 | 
					    uint32_t image_width = rl->image_width;
 | 
				
			||||||
 | 
					    uint32_t image_height = rl->image_height;
 | 
				
			||||||
 | 
					    uint32_t boxes_number = rl->boxes_number;
 | 
				
			||||||
 | 
					    float threshold = rl->threshold;
 | 
				
			||||||
 | 
					    box_t *boxes = (box_t *)rl->boxes;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for (int i = 0; i < rl->boxes_number; ++i)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        int class  = max_index(rl->probs[i], rl->classes);
 | 
				
			||||||
 | 
					        float prob = rl->probs[i][class];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (prob > threshold)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            box_t *b = boxes + i;
 | 
				
			||||||
 | 
					            obj_info->obj[obj_number].x1 = b->x * image_width - (b->w * image_width / 2);
 | 
				
			||||||
 | 
					            obj_info->obj[obj_number].y1 = b->y * image_height - (b->h * image_height / 2);
 | 
				
			||||||
 | 
					            obj_info->obj[obj_number].x2 = b->x * image_width + (b->w * image_width / 2);
 | 
				
			||||||
 | 
					            obj_info->obj[obj_number].y2 = b->y * image_height + (b->h * image_height / 2);
 | 
				
			||||||
 | 
					            obj_info->obj[obj_number].class_id = class;
 | 
				
			||||||
 | 
					            obj_info->obj[obj_number].prob = prob;
 | 
				
			||||||
 | 
					            obj_number++;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    obj_info->obj_number = obj_number;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void region_layer_run(region_layer_t *rl, obj_info_t *obj_info)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    forward_region_layer(rl);
 | 
				
			||||||
 | 
					    get_region_boxes(rl, rl->output, rl->probs, rl->boxes);
 | 
				
			||||||
 | 
					    do_nms_sort(rl, rl->boxes, rl->probs);
 | 
				
			||||||
 | 
					    region_layer_output(rl, obj_info);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void draw_edge(uint32_t *gram, obj_info_t *obj_info, uint32_t index, uint16_t color)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    uint32_t data = ((uint32_t)color << 16) | (uint32_t)color;
 | 
				
			||||||
 | 
					    uint32_t *addr1, *addr2, *addr3, *addr4, x1, y1, x2, y2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    x1 = obj_info->obj[index].x1;
 | 
				
			||||||
 | 
					    y1 = obj_info->obj[index].y1;
 | 
				
			||||||
 | 
					    x2 = obj_info->obj[index].x2;
 | 
				
			||||||
 | 
					    y2 = obj_info->obj[index].y2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (x1 <= 0)
 | 
				
			||||||
 | 
					        x1 = 1;
 | 
				
			||||||
 | 
					    if (x2 >= 319)
 | 
				
			||||||
 | 
					        x2 = 318;
 | 
				
			||||||
 | 
					    if (y1 <= 0)
 | 
				
			||||||
 | 
					        y1 = 1;
 | 
				
			||||||
 | 
					    if (y2 >= 239)
 | 
				
			||||||
 | 
					        y2 = 238;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    addr1 = gram + (320 * y1 + x1) / 2;
 | 
				
			||||||
 | 
					    addr2 = gram + (320 * y1 + x2 - 8) / 2;
 | 
				
			||||||
 | 
					    addr3 = gram + (320 * (y2 - 1) + x1) / 2;
 | 
				
			||||||
 | 
					    addr4 = gram + (320 * (y2 - 1) + x2 - 8) / 2;
 | 
				
			||||||
 | 
					    for (uint32_t i = 0; i < 4; i++)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        *addr1 = data;
 | 
				
			||||||
 | 
					        *(addr1 + 160) = data;
 | 
				
			||||||
 | 
					        *addr2 = data;
 | 
				
			||||||
 | 
					        *(addr2 + 160) = data;
 | 
				
			||||||
 | 
					        *addr3 = data;
 | 
				
			||||||
 | 
					        *(addr3 + 160) = data;
 | 
				
			||||||
 | 
					        *addr4 = data;
 | 
				
			||||||
 | 
					        *(addr4 + 160) = data;
 | 
				
			||||||
 | 
					        addr1++;
 | 
				
			||||||
 | 
					        addr2++;
 | 
				
			||||||
 | 
					        addr3++;
 | 
				
			||||||
 | 
					        addr4++;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    addr1 = gram + (320 * y1 + x1) / 2;
 | 
				
			||||||
 | 
					    addr2 = gram + (320 * y1 + x2 - 2) / 2;
 | 
				
			||||||
 | 
					    addr3 = gram + (320 * (y2 - 8) + x1) / 2;
 | 
				
			||||||
 | 
					    addr4 = gram + (320 * (y2 - 8) + x2 - 2) / 2;
 | 
				
			||||||
 | 
					    for (uint32_t i = 0; i < 8; i++)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        *addr1 = data;
 | 
				
			||||||
 | 
					        *addr2 = data;
 | 
				
			||||||
 | 
					        *addr3 = data;
 | 
				
			||||||
 | 
					        *addr4 = data;
 | 
				
			||||||
 | 
					        addr1 += 160;
 | 
				
			||||||
 | 
					        addr2 += 160;
 | 
				
			||||||
 | 
					        addr3 += 160;
 | 
				
			||||||
 | 
					        addr4 += 160;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,49 @@
 | 
				
			||||||
 | 
					#ifndef _REGION_LAYER
 | 
				
			||||||
 | 
					#define _REGION_LAYER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdint.h>
 | 
				
			||||||
 | 
					#include "kpu.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    uint32_t obj_number;
 | 
				
			||||||
 | 
					    struct
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        uint32_t x1;
 | 
				
			||||||
 | 
					        uint32_t y1;
 | 
				
			||||||
 | 
					        uint32_t x2;
 | 
				
			||||||
 | 
					        uint32_t y2;
 | 
				
			||||||
 | 
					        uint32_t class_id;
 | 
				
			||||||
 | 
					        float prob;
 | 
				
			||||||
 | 
					    } obj[10];
 | 
				
			||||||
 | 
					} obj_info_t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    float threshold;
 | 
				
			||||||
 | 
					    float nms_value;
 | 
				
			||||||
 | 
					    uint32_t coords;
 | 
				
			||||||
 | 
					    uint32_t anchor_number;
 | 
				
			||||||
 | 
					    float *anchor;
 | 
				
			||||||
 | 
					    uint32_t image_width;
 | 
				
			||||||
 | 
					    uint32_t image_height;
 | 
				
			||||||
 | 
					    uint32_t classes;
 | 
				
			||||||
 | 
					    uint32_t net_width;
 | 
				
			||||||
 | 
					    uint32_t net_height;
 | 
				
			||||||
 | 
					    uint32_t layer_width;
 | 
				
			||||||
 | 
					    uint32_t layer_height;
 | 
				
			||||||
 | 
					    uint32_t boxes_number;
 | 
				
			||||||
 | 
					    uint32_t output_number;
 | 
				
			||||||
 | 
					    void *boxes;
 | 
				
			||||||
 | 
					    float *input;
 | 
				
			||||||
 | 
					    float *output;
 | 
				
			||||||
 | 
					    float *probs_buf;
 | 
				
			||||||
 | 
					    float **probs;
 | 
				
			||||||
 | 
					} region_layer_t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int region_layer_init(region_layer_t *rl, int width, int height, int channels, int origin_width, int origin_height);
 | 
				
			||||||
 | 
					void region_layer_deinit(region_layer_t *rl);
 | 
				
			||||||
 | 
					void region_layer_run(region_layer_t *rl, obj_info_t *obj_info);
 | 
				
			||||||
 | 
					void draw_edge(uint32_t *gram, obj_info_t *obj_info, uint32_t index, uint16_t color);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif // _REGION_LAYER
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,24 @@
 | 
				
			||||||
 | 
					menuconfig USING_TENSORFLOWLITEMICRO
 | 
				
			||||||
 | 
					    bool "Tensorflow Lite for Micro"
 | 
				
			||||||
 | 
					    select RT_USING_CPLUSPLUS
 | 
				
			||||||
 | 
					    default n
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if USING_TENSORFLOWLITEMICRO
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    choice
 | 
				
			||||||
 | 
					        prompt "Select Tensorflow Lite Operators Type"
 | 
				
			||||||
 | 
					        default USING_TENSORFLOWLITEMICRO_NORMAL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        config USING_TENSORFLOWLITEMICRO_NORMAL
 | 
				
			||||||
 | 
					            bool "Using Tensorflow Lite normal operations"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        config USING_TENSORFLOWLITEMICRO_CMSISNN
 | 
				
			||||||
 | 
					            bool "Using Tensorflow Lite CMSIS NN operations"
 | 
				
			||||||
 | 
					    endchoice
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    config USING_TENSORFLOWLITEMICRO_DEMOAPP
 | 
				
			||||||
 | 
					        bool "Using tensorflow lite for micro demo app"
 | 
				
			||||||
 | 
					        default n
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,207 @@
 | 
				
			||||||
 | 
					from building import *
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cwd = GetCurrentDir()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					common = Split('''
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/all_ops_resolver.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/debug_log.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/memory_helpers.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/micro_allocator.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/micro_error_reporter.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/micro_interpreter.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/micro_profiler.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/micro_string.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/micro_time.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/micro_utils.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/recording_micro_allocator.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/recording_simple_memory_allocator.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/simple_memory_allocator.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/test_helpers.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/memory_planner/linear_memory_planner.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/testing/test_conv_model.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/c/common.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/core/api/error_reporter.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/core/api/flatbuffer_conversions.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/core/api/op_resolver.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/core/api/tensor_utils.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/kernels/internal/quantization_util.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/kernels/kernel_util.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/schema/schema_utils.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/activations.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/arg_min_max.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/ceil.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/circular_buffer.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/comparisons.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/concatenation.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/conv_test_common.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/dequantize.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/detection_postprocess.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/elementwise.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/ethosu.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/flexbuffers_generated_data.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/floor.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/hard_swish.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/kernel_runner.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/kernel_util.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/l2norm.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/logical.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/logistic.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/maximum_minimum.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/neg.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/pack.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/pad.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/prelu.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/quantize.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/quantize_common.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/reduce.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/reshape.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/round.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/shape.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/split.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/split_v.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/strided_slice.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/sub.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/svdf_common.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/tanh.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/transpose_conv.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/unpack.cc
 | 
				
			||||||
 | 
					''')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					app = Split('''
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/examples/hello_world/main.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/examples/hello_world/main_functions.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/examples/hello_world/model.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/examples/hello_world/output_handler.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/examples/hello_world/constants.cc
 | 
				
			||||||
 | 
					''')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					normal_ops = Split('''
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/normal/add.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/normal/conv.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/normal/depthwise_conv.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/normal/fully_connected.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/normal/mul.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/normal/pooling.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/normal/softmax.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/normal/svdf.cc
 | 
				
			||||||
 | 
					''')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cmsis_ops = Split('''
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/add.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/kernels/cmsis-nn/svdf.cc
 | 
				
			||||||
 | 
					''')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cmsis = Split('''
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c
 | 
				
			||||||
 | 
					tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c
 | 
				
			||||||
 | 
					''')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CPPPATH = [
 | 
				
			||||||
 | 
					    os.path.join(cwd, 'tensorflow-lite-for-mcu/source'),
 | 
				
			||||||
 | 
					    os.path.join(cwd, 'tensorflow-lite-for-mcu/patch'),
 | 
				
			||||||
 | 
					    os.path.join(cwd, 'tensorflow-lite-for-mcu/source/third_party/gemmlowp'),
 | 
				
			||||||
 | 
					    os.path.join(cwd, 'tensorflow-lite-for-mcu/source/third_party/flatbuffers/include'),
 | 
				
			||||||
 | 
					    os.path.join(cwd, 'tensorflow-lite-for-mcu/source/third_party/ruy'),
 | 
				
			||||||
 | 
					    os.path.join(cwd, 'tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis'),
 | 
				
			||||||
 | 
					    os.path.join(cwd, 'tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include'),
 | 
				
			||||||
 | 
					    os.path.join(cwd, 'tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include'),
 | 
				
			||||||
 | 
					    os.path.join(cwd, 'tensorflow-lite-for-mcu/source/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/Core/Include'),
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# embedded C++ std don't have some math functions, use global math functions instead
 | 
				
			||||||
 | 
					CPPDEFINES = ['TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS', 'TF_LITE_USE_GLOBAL_MAX', 'TF_LITE_USE_GLOBAL_MIN']
 | 
				
			||||||
 | 
					src = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if GetDepend(['USING_TENSORFLOWLITEMICRO_NORMAL']):
 | 
				
			||||||
 | 
					    src += common + normal_ops
 | 
				
			||||||
 | 
					elif GetDepend(['USING_TENSORFLOWLITEMICRO_CMSISNN']):
 | 
				
			||||||
 | 
					    CPPDEFINES += ['CMSIS_NN']
 | 
				
			||||||
 | 
					    src += common + cmsis_ops + cmsis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if GetDepend(['USING_TENSORFLOWLITEMICRO_DEMOAPP']):
 | 
				
			||||||
 | 
					    src += app
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# building scripts don't support building LIBRARY with LOCAL FLAGS(like LOCAL_CPPDEFINES) currently
 | 
				
			||||||
 | 
					# so LIBRARY use GLOBAL FLAGS(like CPPDEFINES)
 | 
				
			||||||
 | 
					#group = DefineGroup('tensorflow-lite-for-mcu', src, depend = ['USING_TENSORFLOWLITEMICRO'], CPPPATH = CPPPATH, CPPDEFINES = CPPDEFINES, LIBRARY = True)
 | 
				
			||||||
 | 
					# static library link order matters, apps/mnist/main.o should be placed before libtensorflow.a, we should fix this later
 | 
				
			||||||
 | 
					group = DefineGroup('tensorflow-lite-for-mcu', src, depend = ['USING_TENSORFLOWLITEMICRO'], CPPPATH = CPPPATH, CPPDEFINES = CPPDEFINES)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Return('group')
 | 
				
			||||||
							
								
								
									
										1
									
								
								APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/.gitignore
								
								
								
									vendored
								
								
									Normal file
								
							
							
						
						
									
										1
									
								
								APP_Framework/Framework/knowing/tensorflow-lite/tensorflow-lite-for-mcu/.gitignore
								
								
								
									vendored
								
								
									Normal file
								
							| 
						 | 
					@ -0,0 +1 @@
 | 
				
			||||||
 | 
					*.o
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,203 @@
 | 
				
			||||||
 | 
					# Tensorflow Lite for MCU
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## 仓库说明
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					该仓库是由 tensorflow 仓库生成:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					# 生成源码项目
 | 
				
			||||||
 | 
					# 不指定 'hello_world' 则生成 hello_world/image_recognition/micro_speech/... 等多个项目
 | 
				
			||||||
 | 
					# 不指定 'make' 则生成 arduino、esp-idf、keil、make、mbed 等多个工程
 | 
				
			||||||
 | 
					# 生成结果在 tensorflow/tensorflow/lite/micro/tools/make/gen/osx_x86_64/prj/hello_world/make
 | 
				
			||||||
 | 
					# 以下指令生成 hello_world 的源码项目,其中算子使用常规算子
 | 
				
			||||||
 | 
					make -f tensorflow/lite/micro/tools/make/Makefile generate_hello_world_make_project
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# 以下指令生成 针对 arm cortex-m 平台使用 cmsis 算子优化的源码项目,详见 tensorflow 仓库的 tensorflow/tensorflow/lite/micro/kernels/cmsis-nn/README.md
 | 
				
			||||||
 | 
					make -f tensorflow/lite/micro/tools/make/Makefile TAGS=cmsis-nn generate_hello_world_make_project
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					当前使用的 tensorflow 版本为:`e30e1d1aedb` (2021-01-07)。
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					我们在其他平台使用常规算子,在 cortex-m 平台使用 cmsis 优化算子,需要将上述生成的两个源码项目整合起来,具体代码如何融合,见本文最后文件列表。
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## 问题说明
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### 问题一:缺少头文件路径
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					tensorflow生成的源码项目中的 makefile 的头文件路径是不全的,需要添加以下头文件路径:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					/tensorflow/lite/micro/tools/make/downloads/cmsis
 | 
				
			||||||
 | 
					/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include
 | 
				
			||||||
 | 
					/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Include
 | 
				
			||||||
 | 
					/tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/Core/Include
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### 问题二:cmsis 缺少头文件
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					tensorflow生成的项目缺少 cmsis_gcc.h 头文件,我们将该文件放在了 patch/cmsis_gcc.h。
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## 文件列表
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					# 共有文件
 | 
				
			||||||
 | 
					tensorflow/lite/micro/all_ops_resolver.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/debug_log.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/memory_helpers.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/micro_allocator.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/micro_error_reporter.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/micro_interpreter.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/micro_profiler.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/micro_string.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/micro_time.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/micro_utils.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/recording_micro_allocator.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/recording_simple_memory_allocator.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/simple_memory_allocator.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/test_helpers.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/memory_planner/linear_memory_planner.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/testing/test_conv_model.cc
 | 
				
			||||||
 | 
					tensorflow/lite/c/common.c
 | 
				
			||||||
 | 
					tensorflow/lite/core/api/error_reporter.cc
 | 
				
			||||||
 | 
					tensorflow/lite/core/api/flatbuffer_conversions.cc
 | 
				
			||||||
 | 
					tensorflow/lite/core/api/op_resolver.cc
 | 
				
			||||||
 | 
					tensorflow/lite/core/api/tensor_utils.cc
 | 
				
			||||||
 | 
					tensorflow/lite/kernels/internal/quantization_util.cc
 | 
				
			||||||
 | 
					tensorflow/lite/kernels/kernel_util.cc
 | 
				
			||||||
 | 
					tensorflow/lite/schema/schema_utils.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/activations.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/arg_min_max.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/ceil.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/circular_buffer.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/comparisons.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/concatenation.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/conv_test_common.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/dequantize.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/detection_postprocess.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/elementwise.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/ethosu.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/flexbuffers_generated_data.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/floor.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/hard_swish.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/kernel_runner.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/kernel_util.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/l2norm.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/logical.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/logistic.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/maximum_minimum.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/neg.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/pack.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/pad.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/prelu.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/quantize.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/quantize_common.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/reduce.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/reshape.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/round.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/shape.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/split.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/split_v.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/strided_slice.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/sub.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/svdf_common.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/tanh.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/transpose_conv.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/unpack.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/examples/hello_world/main.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/examples/hello_world/main_functions.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/examples/hello_world/model.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/examples/hello_world/output_handler.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/examples/hello_world/constants.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# 常规算子,将这些文件移入 tensorflow/lite/micro/kernels/normal 中
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/add.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/conv.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/depthwise_conv.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/fully_connected.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/mul.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/pooling.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/softmax.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/svdf.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# cmsis 优化算子
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/cmsis-nn/add.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/cmsis-nn/conv.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/cmsis-nn/mul.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/kernels/cmsis-nn/svdf.cc
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c
 | 
				
			||||||
 | 
					tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
					@ -0,0 +1,203 @@
 | 
				
			||||||
 | 
					Copyright 2019 The TensorFlow Authors.  All rights reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                 Apache License
 | 
				
			||||||
 | 
					                           Version 2.0, January 2004
 | 
				
			||||||
 | 
					                        http://www.apache.org/licenses/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   1. Definitions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      "License" shall mean the terms and conditions for use, reproduction,
 | 
				
			||||||
 | 
					      and distribution as defined by Sections 1 through 9 of this document.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      "Licensor" shall mean the copyright owner or entity authorized by
 | 
				
			||||||
 | 
					      the copyright owner that is granting the License.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      "Legal Entity" shall mean the union of the acting entity and all
 | 
				
			||||||
 | 
					      other entities that control, are controlled by, or are under common
 | 
				
			||||||
 | 
					      control with that entity. For the purposes of this definition,
 | 
				
			||||||
 | 
					      "control" means (i) the power, direct or indirect, to cause the
 | 
				
			||||||
 | 
					      direction or management of such entity, whether by contract or
 | 
				
			||||||
 | 
					      otherwise, or (ii) ownership of fifty percent (50%) or more of the
 | 
				
			||||||
 | 
					      outstanding shares, or (iii) beneficial ownership of such entity.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      "You" (or "Your") shall mean an individual or Legal Entity
 | 
				
			||||||
 | 
					      exercising permissions granted by this License.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      "Source" form shall mean the preferred form for making modifications,
 | 
				
			||||||
 | 
					      including but not limited to software source code, documentation
 | 
				
			||||||
 | 
					      source, and configuration files.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      "Object" form shall mean any form resulting from mechanical
 | 
				
			||||||
 | 
					      transformation or translation of a Source form, including but
 | 
				
			||||||
 | 
					      not limited to compiled object code, generated documentation,
 | 
				
			||||||
 | 
					      and conversions to other media types.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      "Work" shall mean the work of authorship, whether in Source or
 | 
				
			||||||
 | 
					      Object form, made available under the License, as indicated by a
 | 
				
			||||||
 | 
					      copyright notice that is included in or attached to the work
 | 
				
			||||||
 | 
					      (an example is provided in the Appendix below).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      "Derivative Works" shall mean any work, whether in Source or Object
 | 
				
			||||||
 | 
					      form, that is based on (or derived from) the Work and for which the
 | 
				
			||||||
 | 
					      editorial revisions, annotations, elaborations, or other modifications
 | 
				
			||||||
 | 
					      represent, as a whole, an original work of authorship. For the purposes
 | 
				
			||||||
 | 
					      of this License, Derivative Works shall not include works that remain
 | 
				
			||||||
 | 
					      separable from, or merely link (or bind by name) to the interfaces of,
 | 
				
			||||||
 | 
					      the Work and Derivative Works thereof.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      "Contribution" shall mean any work of authorship, including
 | 
				
			||||||
 | 
					      the original version of the Work and any modifications or additions
 | 
				
			||||||
 | 
					      to that Work or Derivative Works thereof, that is intentionally
 | 
				
			||||||
 | 
					      submitted to Licensor for inclusion in the Work by the copyright owner
 | 
				
			||||||
 | 
					      or by an individual or Legal Entity authorized to submit on behalf of
 | 
				
			||||||
 | 
					      the copyright owner. For the purposes of this definition, "submitted"
 | 
				
			||||||
 | 
					      means any form of electronic, verbal, or written communication sent
 | 
				
			||||||
 | 
					      to the Licensor or its representatives, including but not limited to
 | 
				
			||||||
 | 
					      communication on electronic mailing lists, source code control systems,
 | 
				
			||||||
 | 
					      and issue tracking systems that are managed by, or on behalf of, the
 | 
				
			||||||
 | 
					      Licensor for the purpose of discussing and improving the Work, but
 | 
				
			||||||
 | 
					      excluding communication that is conspicuously marked or otherwise
 | 
				
			||||||
 | 
					      designated in writing by the copyright owner as "Not a Contribution."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      "Contributor" shall mean Licensor and any individual or Legal Entity
 | 
				
			||||||
 | 
					      on behalf of whom a Contribution has been received by Licensor and
 | 
				
			||||||
 | 
					      subsequently incorporated within the Work.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   2. Grant of Copyright License. Subject to the terms and conditions of
 | 
				
			||||||
 | 
					      this License, each Contributor hereby grants to You a perpetual,
 | 
				
			||||||
 | 
					      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 | 
				
			||||||
 | 
					      copyright license to reproduce, prepare Derivative Works of,
 | 
				
			||||||
 | 
					      publicly display, publicly perform, sublicense, and distribute the
 | 
				
			||||||
 | 
					      Work and such Derivative Works in Source or Object form.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   3. Grant of Patent License. Subject to the terms and conditions of
 | 
				
			||||||
 | 
					      this License, each Contributor hereby grants to You a perpetual,
 | 
				
			||||||
 | 
					      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 | 
				
			||||||
 | 
					      (except as stated in this section) patent license to make, have made,
 | 
				
			||||||
 | 
					      use, offer to sell, sell, import, and otherwise transfer the Work,
 | 
				
			||||||
 | 
					      where such license applies only to those patent claims licensable
 | 
				
			||||||
 | 
					      by such Contributor that are necessarily infringed by their
 | 
				
			||||||
 | 
					      Contribution(s) alone or by combination of their Contribution(s)
 | 
				
			||||||
 | 
					      with the Work to which such Contribution(s) was submitted. If You
 | 
				
			||||||
 | 
					      institute patent litigation against any entity (including a
 | 
				
			||||||
 | 
					      cross-claim or counterclaim in a lawsuit) alleging that the Work
 | 
				
			||||||
 | 
					      or a Contribution incorporated within the Work constitutes direct
 | 
				
			||||||
 | 
					      or contributory patent infringement, then any patent licenses
 | 
				
			||||||
 | 
					      granted to You under this License for that Work shall terminate
 | 
				
			||||||
 | 
					      as of the date such litigation is filed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   4. Redistribution. You may reproduce and distribute copies of the
 | 
				
			||||||
 | 
					      Work or Derivative Works thereof in any medium, with or without
 | 
				
			||||||
 | 
					      modifications, and in Source or Object form, provided that You
 | 
				
			||||||
 | 
					      meet the following conditions:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      (a) You must give any other recipients of the Work or
 | 
				
			||||||
 | 
					          Derivative Works a copy of this License; and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      (b) You must cause any modified files to carry prominent notices
 | 
				
			||||||
 | 
					          stating that You changed the files; and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      (c) You must retain, in the Source form of any Derivative Works
 | 
				
			||||||
 | 
					          that You distribute, all copyright, patent, trademark, and
 | 
				
			||||||
 | 
					          attribution notices from the Source form of the Work,
 | 
				
			||||||
 | 
					          excluding those notices that do not pertain to any part of
 | 
				
			||||||
 | 
					          the Derivative Works; and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      (d) If the Work includes a "NOTICE" text file as part of its
 | 
				
			||||||
 | 
					          distribution, then any Derivative Works that You distribute must
 | 
				
			||||||
 | 
					          include a readable copy of the attribution notices contained
 | 
				
			||||||
 | 
					          within such NOTICE file, excluding those notices that do not
 | 
				
			||||||
 | 
					          pertain to any part of the Derivative Works, in at least one
 | 
				
			||||||
 | 
					          of the following places: within a NOTICE text file distributed
 | 
				
			||||||
 | 
					          as part of the Derivative Works; within the Source form or
 | 
				
			||||||
 | 
					          documentation, if provided along with the Derivative Works; or,
 | 
				
			||||||
 | 
					          within a display generated by the Derivative Works, if and
 | 
				
			||||||
 | 
					          wherever such third-party notices normally appear. The contents
 | 
				
			||||||
 | 
					          of the NOTICE file are for informational purposes only and
 | 
				
			||||||
 | 
					          do not modify the License. You may add Your own attribution
 | 
				
			||||||
 | 
					          notices within Derivative Works that You distribute, alongside
 | 
				
			||||||
 | 
					          or as an addendum to the NOTICE text from the Work, provided
 | 
				
			||||||
 | 
					          that such additional attribution notices cannot be construed
 | 
				
			||||||
 | 
					          as modifying the License.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      You may add Your own copyright statement to Your modifications and
 | 
				
			||||||
 | 
					      may provide additional or different license terms and conditions
 | 
				
			||||||
 | 
					      for use, reproduction, or distribution of Your modifications, or
 | 
				
			||||||
 | 
					      for any such Derivative Works as a whole, provided Your use,
 | 
				
			||||||
 | 
					      reproduction, and distribution of the Work otherwise complies with
 | 
				
			||||||
 | 
					      the conditions stated in this License.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   5. Submission of Contributions. Unless You explicitly state otherwise,
 | 
				
			||||||
 | 
					      any Contribution intentionally submitted for inclusion in the Work
 | 
				
			||||||
 | 
					      by You to the Licensor shall be under the terms and conditions of
 | 
				
			||||||
 | 
					      this License, without any additional terms or conditions.
 | 
				
			||||||
 | 
					      Notwithstanding the above, nothing herein shall supersede or modify
 | 
				
			||||||
 | 
					      the terms of any separate license agreement you may have executed
 | 
				
			||||||
 | 
					      with Licensor regarding such Contributions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   6. Trademarks. This License does not grant permission to use the trade
 | 
				
			||||||
 | 
					      names, trademarks, service marks, or product names of the Licensor,
 | 
				
			||||||
 | 
					      except as required for reasonable and customary use in describing the
 | 
				
			||||||
 | 
					      origin of the Work and reproducing the content of the NOTICE file.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   7. Disclaimer of Warranty. Unless required by applicable law or
 | 
				
			||||||
 | 
					      agreed to in writing, Licensor provides the Work (and each
 | 
				
			||||||
 | 
					      Contributor provides its Contributions) on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
				
			||||||
 | 
					      implied, including, without limitation, any warranties or conditions
 | 
				
			||||||
 | 
					      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
 | 
				
			||||||
 | 
					      PARTICULAR PURPOSE. You are solely responsible for determining the
 | 
				
			||||||
 | 
					      appropriateness of using or redistributing the Work and assume any
 | 
				
			||||||
 | 
					      risks associated with Your exercise of permissions under this License.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   8. Limitation of Liability. In no event and under no legal theory,
 | 
				
			||||||
 | 
					      whether in tort (including negligence), contract, or otherwise,
 | 
				
			||||||
 | 
					      unless required by applicable law (such as deliberate and grossly
 | 
				
			||||||
 | 
					      negligent acts) or agreed to in writing, shall any Contributor be
 | 
				
			||||||
 | 
					      liable to You for damages, including any direct, indirect, special,
 | 
				
			||||||
 | 
					      incidental, or consequential damages of any character arising as a
 | 
				
			||||||
 | 
					      result of this License or out of the use or inability to use the
 | 
				
			||||||
 | 
					      Work (including but not limited to damages for loss of goodwill,
 | 
				
			||||||
 | 
					      work stoppage, computer failure or malfunction, or any and all
 | 
				
			||||||
 | 
					      other commercial damages or losses), even if such Contributor
 | 
				
			||||||
 | 
					      has been advised of the possibility of such damages.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   9. Accepting Warranty or Additional Liability. While redistributing
 | 
				
			||||||
 | 
					      the Work or Derivative Works thereof, You may choose to offer,
 | 
				
			||||||
 | 
					      and charge a fee for, acceptance of support, warranty, indemnity,
 | 
				
			||||||
 | 
					      or other liability obligations and/or rights consistent with this
 | 
				
			||||||
 | 
					      License. However, in accepting such obligations, You may act only
 | 
				
			||||||
 | 
					      on Your own behalf and on Your sole responsibility, not on behalf
 | 
				
			||||||
 | 
					      of any other Contributor, and only if You agree to indemnify,
 | 
				
			||||||
 | 
					      defend, and hold each Contributor harmless for any liability
 | 
				
			||||||
 | 
					      incurred by, or claims asserted against, such Contributor by reason
 | 
				
			||||||
 | 
					      of your accepting any such warranty or additional liability.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   END OF TERMS AND CONDITIONS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   APPENDIX: How to apply the Apache License to your work.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      To apply the Apache License to your work, attach the following
 | 
				
			||||||
 | 
					      boilerplate notice, with the fields enclosed by brackets "[]"
 | 
				
			||||||
 | 
					      replaced with your own identifying information. (Don't include
 | 
				
			||||||
 | 
					      the brackets!)  The text should be enclosed in the appropriate
 | 
				
			||||||
 | 
					      comment syntax for the file format. We also recommend that a
 | 
				
			||||||
 | 
					      file or class name and description of purpose be included on the
 | 
				
			||||||
 | 
					      same "printed page" as the copyright notice for easier
 | 
				
			||||||
 | 
					      identification within third-party archives.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Copyright [yyyy] [name of copyright owner]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					   you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					   You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					       http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					   distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					   See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					   limitations under the License.
 | 
				
			||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| 
						 | 
					@ -0,0 +1,29 @@
 | 
				
			||||||
 | 
					# TensorFlow Lite Micro Make Project
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This folder has been autogenerated by TensorFlow, and contains source, header,
 | 
				
			||||||
 | 
					and project files needed to build a single TensorFlow Lite Micro target using
 | 
				
			||||||
 | 
					the make tool.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Usage
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To build this, run:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					make
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This should attempt to build the target locally on your platform, using the
 | 
				
			||||||
 | 
					standard Makefile variables like CFLAGS, CC, CXX, and so on.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Project Generation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See
 | 
				
			||||||
 | 
					[tensorflow/lite/micro](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro)
 | 
				
			||||||
 | 
					for details on how projects like this can be generated from the main source
 | 
				
			||||||
 | 
					tree.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## License
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TensorFlow's code is covered by the Apache2 License included in the repository,
 | 
				
			||||||
 | 
					and third party dependencies are covered by their respective licenses, in the
 | 
				
			||||||
 | 
					third_party folder of this package.
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,139 @@
 | 
				
			||||||
 | 
					/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_CORE_PUBLIC_VERSION_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_CORE_PUBLIC_VERSION_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TensorFlow uses semantic versioning, see http://semver.org/.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Also update tensorflow/tensorflow.bzl and
 | 
				
			||||||
 | 
					// tensorflow/tools/pip_package/setup.py
 | 
				
			||||||
 | 
					#define TF_MAJOR_VERSION 2
 | 
				
			||||||
 | 
					#define TF_MINOR_VERSION 5
 | 
				
			||||||
 | 
					#define TF_PATCH_VERSION 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 | 
				
			||||||
 | 
					// "-beta", "-rc", "-rc.1")
 | 
				
			||||||
 | 
					#define TF_VERSION_SUFFIX ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TF_STR_HELPER(x) #x
 | 
				
			||||||
 | 
					#define TF_STR(x) TF_STR_HELPER(x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// e.g. "0.5.0" or "0.6.0-alpha".
 | 
				
			||||||
 | 
					#define TF_VERSION_STRING                                            \
 | 
				
			||||||
 | 
					  (TF_STR(TF_MAJOR_VERSION) "." TF_STR(TF_MINOR_VERSION) "." TF_STR( \
 | 
				
			||||||
 | 
					      TF_PATCH_VERSION) TF_VERSION_SUFFIX)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// GraphDef compatibility versions (the versions field in graph.proto).
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Each graph has producer and min_consumer versions, and each
 | 
				
			||||||
 | 
					// consumer has its own version and a min_producer.  In addition, graphs can
 | 
				
			||||||
 | 
					// mark specific consumer versions as bad (to prevent bugs from executing).
 | 
				
			||||||
 | 
					// A consumer will execute a graph if the consumer's version is at least the
 | 
				
			||||||
 | 
					// graph's min_consumer, the graph's producer version is at least the consumer's
 | 
				
			||||||
 | 
					// min_producer, and the consumer version isn't specifically disallowed by the
 | 
				
			||||||
 | 
					// graph.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// By default, newly created graphs have producer version TF_GRAPH_DEF_VERSION
 | 
				
			||||||
 | 
					// min_consumer TF_GRAPH_DEF_MIN_CONSUMER, and no other bad consumer versions.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Version history:
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// 0. Graphs created before GraphDef versioning
 | 
				
			||||||
 | 
					// 1. First real version (2dec2015)
 | 
				
			||||||
 | 
					// 2. adjust_contrast only takes float, doesn't perform clamping (11dec2015)
 | 
				
			||||||
 | 
					// 3. Remove TileGrad, since it was equivalent to reduce_sum (30dec2015)
 | 
				
			||||||
 | 
					// 4. When support for this version is removed, we can safely make AttrValue
 | 
				
			||||||
 | 
					//    parsing more strict with respect to empty list values (see
 | 
				
			||||||
 | 
					//    111635679, 7jan2016).
 | 
				
			||||||
 | 
					// 5. Graphs are wholly-validated during Session::Create() (7jan2016).
 | 
				
			||||||
 | 
					// 6. TensorFlow is scalar strict within Google (27jan2016).
 | 
				
			||||||
 | 
					// 7. Remove TopK in favor of TopKV2 (5feb2016).
 | 
				
			||||||
 | 
					// 8. Replace RandomCrop from C++ with pure Python (5feb2016).
 | 
				
			||||||
 | 
					// 9. Deprecate batch_norm_with_global_normalization (16feb2016).
 | 
				
			||||||
 | 
					// 10. Deprecate conv3d_backprop_{filter,input} (10jun2016).
 | 
				
			||||||
 | 
					// 11. Deprecate {batch}_self_adjoint_eig (3aug2016).
 | 
				
			||||||
 | 
					// 12. Graph consumers understand the node_def field of FunctionDef (22aug2016).
 | 
				
			||||||
 | 
					// 13. Deprecate multiple batch linear algebra ops (9sep2016).
 | 
				
			||||||
 | 
					// 14. Deprecate batch_matrix_* ops. (10sep2016).
 | 
				
			||||||
 | 
					// 15. Deprecate batch_fft_* ops. (14sep2016).
 | 
				
			||||||
 | 
					// 16. Deprecate tensor_array (v1) ops in favor of v2 (10nov2016).
 | 
				
			||||||
 | 
					// 17. Deprecate inv (11nov2016).
 | 
				
			||||||
 | 
					// 17. Expose reverse_v2 (10nov2016)
 | 
				
			||||||
 | 
					// 18. Add VariableV2 (30nov2016)
 | 
				
			||||||
 | 
					// 19. Deprecated ops created by models moved out of core SkipGram, NegTrain.
 | 
				
			||||||
 | 
					//     (08dec2016)
 | 
				
			||||||
 | 
					// 20. Catch all version 1.0 changes to Python API generation. SplitV is now
 | 
				
			||||||
 | 
					//     used for tf.split, ReverseV2 is now used by tf.reverse, ConcatV2 is
 | 
				
			||||||
 | 
					//     now used by tf.concat. Graphs use flooring
 | 
				
			||||||
 | 
					//     division and mod semantics. TensorArrayV3. (12dec2016)
 | 
				
			||||||
 | 
					//     Also considered the version for when it is required for reduction
 | 
				
			||||||
 | 
					//     ops' indices to be scalar or vector, and not higher rank.
 | 
				
			||||||
 | 
					//     Some earlier graph def versions allowed this.
 | 
				
			||||||
 | 
					// 21. Dropped FunctionDef.Node support, switched to node_def introduced
 | 
				
			||||||
 | 
					//     in version 12. (11jan2017)
 | 
				
			||||||
 | 
					// 22. Placeholder now can specify and enforce scalar and partial
 | 
				
			||||||
 | 
					//     shapes, particularly when restoring a graph from GraphDef
 | 
				
			||||||
 | 
					//     produced at version 22 or later.  (04/10/2016)
 | 
				
			||||||
 | 
					// 23. Remove NonMaxSuppression in favor of NonMaxSuppressionV2.
 | 
				
			||||||
 | 
					// 24. Deprecate lookup ops (v1) ops in favor of v2 (30may2017)
 | 
				
			||||||
 | 
					// 25. Deprecate stack (v1) ops in favor of v2 (2017/6/15).
 | 
				
			||||||
 | 
					// 25. Deprecate RandomPoisson (v1) ops in favor of v2 (2017/10/25).
 | 
				
			||||||
 | 
					// 26. Add a bool 'stripped_default_attrs' to MetaInfoDef indicating
 | 
				
			||||||
 | 
					//     whether default-valued attrs have been stripped from the nodes in the
 | 
				
			||||||
 | 
					//     GraphDef. (7dec2017)
 | 
				
			||||||
 | 
					// 27. Deprecate TensorArray ops v2 in favor of v3 and deprecated io_ops
 | 
				
			||||||
 | 
					//     deprecated in favor of V2 ops. (2018/01/23)
 | 
				
			||||||
 | 
					// 28. Deprecate MatrixExponential op in favor of Python implementation.
 | 
				
			||||||
 | 
					//     (2018/08/21).
 | 
				
			||||||
 | 
					// (2019/02/15). Added `control_ret` field to FunctionDef proto, and
 | 
				
			||||||
 | 
					//     `control_output` field to OpDef proto.
 | 
				
			||||||
 | 
					// 29. Deprecate StatefulStandardNormal op in favor of StatefulStandardNormalV2.
 | 
				
			||||||
 | 
					//     (2019/03/25).
 | 
				
			||||||
 | 
					// (2019/04/17). Added `arg_attr` field to FunctionDefProto.
 | 
				
			||||||
 | 
					// 30. (2019/05/09) First date based GraphDef version. GraphDef
 | 
				
			||||||
 | 
					//     versions advance by 1 each day after this point.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 | 
				
			||||||
 | 
					#define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
 | 
				
			||||||
 | 
					#define TF_GRAPH_DEF_VERSION 639  // Updated: 2021/1/7
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// The checkpoint versions have the same semantics as GraphDef versions, but the
 | 
				
			||||||
 | 
					// numbering scheme is separate.  We have no plans to ever deprecate checkpoint
 | 
				
			||||||
 | 
					// versions, but it's good to have this in place in case we ever need to.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Version history:
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// 0. Checkpoints saved before checkpoint versioning.
 | 
				
			||||||
 | 
					// 1. First real version (10feb2015).
 | 
				
			||||||
 | 
					#define TF_CHECKPOINT_VERSION_MIN_PRODUCER 0
 | 
				
			||||||
 | 
					#define TF_CHECKPOINT_VERSION_MIN_CONSUMER 0
 | 
				
			||||||
 | 
					#define TF_CHECKPOINT_VERSION 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Version query functions (defined in generated version_info.cc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Host compiler version (declared elsewhere to be __VERSION__)
 | 
				
			||||||
 | 
					extern const char* tf_compiler_version();
 | 
				
			||||||
 | 
					// The git commit designator when tensorflow was built
 | 
				
			||||||
 | 
					// If no git repository, this will be "internal".
 | 
				
			||||||
 | 
					extern const char* tf_git_version();
 | 
				
			||||||
 | 
					// Value of the _GLIBCXX_USE_CXX11_ABI flag, or 0 if it's not set.
 | 
				
			||||||
 | 
					extern int tf_cxx11_abi_flag();
 | 
				
			||||||
 | 
					// Returns 1 if build is monolithic, or 0 otherwise.
 | 
				
			||||||
 | 
					extern int tf_monolithic_build();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_CORE_PUBLIC_VERSION_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,484 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdint.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef __cplusplus
 | 
				
			||||||
 | 
					extern "C" {
 | 
				
			||||||
 | 
					#endif  // __cplusplus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
 | 
				
			||||||
 | 
					// number of dimensions.
 | 
				
			||||||
 | 
					#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO(aselle): Consider using "if this then that" for testing.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Useful placeholder to put in otherwise empty structs to avoid size warnings.
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  char dummy;
 | 
				
			||||||
 | 
					} EmptyStructPlaceholder;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// IMPORTANT: All new members of structs must be added at the end to ensure
 | 
				
			||||||
 | 
					// backwards compatibility.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Possible padding types (for convolutions)
 | 
				
			||||||
 | 
					typedef enum {
 | 
				
			||||||
 | 
					  kTfLitePaddingUnknown = 0,
 | 
				
			||||||
 | 
					  kTfLitePaddingSame,
 | 
				
			||||||
 | 
					  kTfLitePaddingValid,
 | 
				
			||||||
 | 
					} TfLitePadding;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef enum {
 | 
				
			||||||
 | 
					  kTfLiteMirrorPaddingUnknown = 0,
 | 
				
			||||||
 | 
					  kTfLiteMirrorPaddingReflect,
 | 
				
			||||||
 | 
					  kTfLiteMirrorPaddingSymmetric,
 | 
				
			||||||
 | 
					} TfLiteMirrorPaddingMode;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO(b/130259536): We should move this out of builtin_op_data.
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int width;
 | 
				
			||||||
 | 
					  int height;
 | 
				
			||||||
 | 
					  int width_offset;
 | 
				
			||||||
 | 
					  int height_offset;
 | 
				
			||||||
 | 
					} TfLitePaddingValues;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteMirrorPaddingMode mode;
 | 
				
			||||||
 | 
					} TfLiteMirrorPaddingParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Possible fused activation functions.
 | 
				
			||||||
 | 
					// TODO(aselle): rename to TfLiteActivation
 | 
				
			||||||
 | 
					typedef enum {
 | 
				
			||||||
 | 
					  kTfLiteActNone = 0,
 | 
				
			||||||
 | 
					  kTfLiteActRelu,
 | 
				
			||||||
 | 
					  kTfLiteActReluN1To1,                    // min(max(-1, x), 1)
 | 
				
			||||||
 | 
					  kTfLiteActRelu6,                        // min(max(0, x), 6)
 | 
				
			||||||
 | 
					  kTfLiteActTanh,
 | 
				
			||||||
 | 
					  kTfLiteActSignBit,
 | 
				
			||||||
 | 
					  kTfLiteActSigmoid,
 | 
				
			||||||
 | 
					} TfLiteFusedActivation;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  // Parameters for CONV_2D version 1.
 | 
				
			||||||
 | 
					  TfLitePadding padding;
 | 
				
			||||||
 | 
					  int stride_width;
 | 
				
			||||||
 | 
					  int stride_height;
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameters for CONV_2D version 2.
 | 
				
			||||||
 | 
					  // Note: Version 2 supports dilation values not equal to 1.
 | 
				
			||||||
 | 
					  int dilation_width_factor;
 | 
				
			||||||
 | 
					  int dilation_height_factor;
 | 
				
			||||||
 | 
					} TfLiteConvParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLitePadding padding;
 | 
				
			||||||
 | 
					  int stride_width;
 | 
				
			||||||
 | 
					  int stride_height;
 | 
				
			||||||
 | 
					  int filter_width;
 | 
				
			||||||
 | 
					  int filter_height;
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					  struct {
 | 
				
			||||||
 | 
					    TfLitePaddingValues padding;
 | 
				
			||||||
 | 
					  } computed;
 | 
				
			||||||
 | 
					} TfLitePoolParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  // Parameters for DepthwiseConv version 1 or above.
 | 
				
			||||||
 | 
					  TfLitePadding padding;
 | 
				
			||||||
 | 
					  int stride_width;
 | 
				
			||||||
 | 
					  int stride_height;
 | 
				
			||||||
 | 
					  // `depth_multiplier` is redundant. It's used by CPU kernels in
 | 
				
			||||||
 | 
					  // TensorFlow 2.0 or below, but ignored in versions above.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // The information can be deduced from the shape of input and the shape of
 | 
				
			||||||
 | 
					  // weights. Since the TFLiteConverter toolchain doesn't support partially
 | 
				
			||||||
 | 
					  // specified shapes, relying on `depth_multiplier` stops us from supporting
 | 
				
			||||||
 | 
					  // graphs with dynamic shape tensors.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
 | 
				
			||||||
 | 
					  // field.
 | 
				
			||||||
 | 
					  int depth_multiplier;
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					  // Parameters for DepthwiseConv version 2 or above.
 | 
				
			||||||
 | 
					  int dilation_width_factor;
 | 
				
			||||||
 | 
					  int dilation_height_factor;
 | 
				
			||||||
 | 
					} TfLiteDepthwiseConvParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int rank;
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameter for SVDF version 4.
 | 
				
			||||||
 | 
					  bool asymmetric_quantize_inputs;
 | 
				
			||||||
 | 
					} TfLiteSVDFParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameter for RNN version 3.
 | 
				
			||||||
 | 
					  bool asymmetric_quantize_inputs;
 | 
				
			||||||
 | 
					} TfLiteRNNParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  bool time_major;
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameter for Sequence RNN version 3.
 | 
				
			||||||
 | 
					  bool asymmetric_quantize_inputs;
 | 
				
			||||||
 | 
					} TfLiteSequenceRNNParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  bool time_major;
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					  bool merge_outputs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameter for Bidirectional RNN verison 3.
 | 
				
			||||||
 | 
					  bool asymmetric_quantize_inputs;
 | 
				
			||||||
 | 
					} TfLiteBidirectionalSequenceRNNParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef enum {
 | 
				
			||||||
 | 
					  kTfLiteFullyConnectedWeightsFormatDefault = 0,
 | 
				
			||||||
 | 
					  kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
 | 
				
			||||||
 | 
					} TfLiteFullyConnectedWeightsFormat;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  // Parameters for FullyConnected version 1 or above.
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameters for FullyConnected version 2 or above.
 | 
				
			||||||
 | 
					  TfLiteFullyConnectedWeightsFormat weights_format;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameters for FullyConnected version 5 or above.
 | 
				
			||||||
 | 
					  // If set to true, then the number of dimensions in the input and the output
 | 
				
			||||||
 | 
					  // tensors are the same. Furthermore, all but the last dimension of the input
 | 
				
			||||||
 | 
					  // and output shapes will be equal.
 | 
				
			||||||
 | 
					  bool keep_num_dims;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameters for FullyConnected version 7 or above.
 | 
				
			||||||
 | 
					  // If set to true and the weights are quantized, then non constant inputs
 | 
				
			||||||
 | 
					  // are quantized at evaluation time with asymmetric quantization.
 | 
				
			||||||
 | 
					  bool asymmetric_quantize_inputs;
 | 
				
			||||||
 | 
					} TfLiteFullyConnectedParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef enum {
 | 
				
			||||||
 | 
					  kTfLiteLshProjectionUnknown = 0,
 | 
				
			||||||
 | 
					  kTfLiteLshProjectionSparse = 1,
 | 
				
			||||||
 | 
					  kTfLiteLshProjectionDense = 2,
 | 
				
			||||||
 | 
					} TfLiteLSHProjectionType;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteLSHProjectionType type;
 | 
				
			||||||
 | 
					} TfLiteLSHProjectionParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  float beta;
 | 
				
			||||||
 | 
					} TfLiteSoftmaxParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int axis;
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					} TfLiteConcatenationParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					  // Parameter added for the version 4.
 | 
				
			||||||
 | 
					  bool pot_scale_int16;
 | 
				
			||||||
 | 
					} TfLiteAddParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  EmptyStructPlaceholder placeholder;
 | 
				
			||||||
 | 
					} TfLiteSpaceToBatchNDParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  EmptyStructPlaceholder placeholder;
 | 
				
			||||||
 | 
					} TfLiteBatchToSpaceNDParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  bool adj_x;
 | 
				
			||||||
 | 
					  bool adj_y;
 | 
				
			||||||
 | 
					  // Parameters for BatchMatMul version 4 or above.
 | 
				
			||||||
 | 
					  // If set to true and the weights are quantized, then non constant inputs
 | 
				
			||||||
 | 
					  // are quantized at evaluation time with asymmetric quantization.
 | 
				
			||||||
 | 
					  bool asymmetric_quantize_inputs;
 | 
				
			||||||
 | 
					} TfLiteBatchMatMulParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					} TfLiteMulParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					  // Parameter added for the version 5.
 | 
				
			||||||
 | 
					  bool pot_scale_int16;
 | 
				
			||||||
 | 
					} TfLiteSubParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					} TfLiteDivParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					} TfLiteL2NormParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int radius;
 | 
				
			||||||
 | 
					  float bias;
 | 
				
			||||||
 | 
					  float alpha;
 | 
				
			||||||
 | 
					  float beta;
 | 
				
			||||||
 | 
					} TfLiteLocalResponseNormParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef enum {
 | 
				
			||||||
 | 
					  kTfLiteLSTMFullKernel = 0,
 | 
				
			||||||
 | 
					  kTfLiteLSTMBasicKernel
 | 
				
			||||||
 | 
					} TfLiteLSTMKernelType;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  // Parameters for LSTM version 1.
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					  float cell_clip;
 | 
				
			||||||
 | 
					  float proj_clip;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameters for LSTM version 2.
 | 
				
			||||||
 | 
					  // kTfLiteLSTMBasicKernel is only supported in version 2 or above.
 | 
				
			||||||
 | 
					  TfLiteLSTMKernelType kernel_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameters for LSTM version 4.
 | 
				
			||||||
 | 
					  bool asymmetric_quantize_inputs;
 | 
				
			||||||
 | 
					} TfLiteLSTMParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  // Parameters needed for the underlying LSTM.
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					  float cell_clip;
 | 
				
			||||||
 | 
					  float proj_clip;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // If set to true then the first dimension is time, otherwise batch.
 | 
				
			||||||
 | 
					  bool time_major;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameter for unidirectional sequence RNN version 3.
 | 
				
			||||||
 | 
					  bool asymmetric_quantize_inputs;
 | 
				
			||||||
 | 
					} TfLiteUnidirectionalSequenceLSTMParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  // Parameters supported by version 1:
 | 
				
			||||||
 | 
					  // Parameters inherited for the LSTM kernel.
 | 
				
			||||||
 | 
					  TfLiteFusedActivation activation;
 | 
				
			||||||
 | 
					  float cell_clip;
 | 
				
			||||||
 | 
					  float proj_clip;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // If true, store the outputs of both directions in the first output.
 | 
				
			||||||
 | 
					  bool merge_outputs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameters supported by version 2:
 | 
				
			||||||
 | 
					  // If set to true then the first dimension is time, otherwise batch.
 | 
				
			||||||
 | 
					  bool time_major;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameters supported by version 4:
 | 
				
			||||||
 | 
					  // If set to true, then hybrid ops use asymmetric quantization for inputs.
 | 
				
			||||||
 | 
					  bool asymmetric_quantize_inputs;
 | 
				
			||||||
 | 
					} TfLiteBidirectionalSequenceLSTMParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  bool align_corners;
 | 
				
			||||||
 | 
					  // half_pixel_centers assumes pixels are of half the actual dimensions, and
 | 
				
			||||||
 | 
					  // yields more accurate resizes. Corresponds to the same argument for the
 | 
				
			||||||
 | 
					  // original TensorFlow op in TF2.0.
 | 
				
			||||||
 | 
					  bool half_pixel_centers;
 | 
				
			||||||
 | 
					} TfLiteResizeBilinearParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  bool align_corners;
 | 
				
			||||||
 | 
					  bool half_pixel_centers;
 | 
				
			||||||
 | 
					} TfLiteResizeNearestNeighborParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  EmptyStructPlaceholder placeholder;
 | 
				
			||||||
 | 
					} TfLitePadParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  EmptyStructPlaceholder placeholder;
 | 
				
			||||||
 | 
					} TfLitePadV2Params;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
 | 
				
			||||||
 | 
					  // For now we will fix the maximum possible number of dimensions.
 | 
				
			||||||
 | 
					  int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
 | 
				
			||||||
 | 
					  int num_dimensions;
 | 
				
			||||||
 | 
					} TfLiteReshapeParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int ngram_size;
 | 
				
			||||||
 | 
					  int max_skip_size;
 | 
				
			||||||
 | 
					  bool include_all_ngrams;
 | 
				
			||||||
 | 
					} TfLiteSkipGramParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int block_size;
 | 
				
			||||||
 | 
					} TfLiteSpaceToDepthParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int block_size;
 | 
				
			||||||
 | 
					} TfLiteDepthToSpaceParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteType in_data_type;
 | 
				
			||||||
 | 
					  TfLiteType out_data_type;
 | 
				
			||||||
 | 
					} TfLiteCastParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef enum {
 | 
				
			||||||
 | 
					  kTfLiteCombinerTypeSum = 0,
 | 
				
			||||||
 | 
					  kTfLiteCombinerTypeMean = 1,
 | 
				
			||||||
 | 
					  kTfLiteCombinerTypeSqrtn = 2,
 | 
				
			||||||
 | 
					} TfLiteCombinerType;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteCombinerType combiner;
 | 
				
			||||||
 | 
					} TfLiteEmbeddingLookupSparseParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int axis;
 | 
				
			||||||
 | 
					} TfLiteGatherParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  EmptyStructPlaceholder placeholder;
 | 
				
			||||||
 | 
					} TfLiteTransposeParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  bool keep_dims;
 | 
				
			||||||
 | 
					} TfLiteReducerParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int num_splits;
 | 
				
			||||||
 | 
					} TfLiteSplitParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int num_splits;
 | 
				
			||||||
 | 
					} TfLiteSplitVParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
 | 
				
			||||||
 | 
					  // For now we will fix the maximum possible number of dimensions.
 | 
				
			||||||
 | 
					  int squeeze_dims[8];
 | 
				
			||||||
 | 
					  int num_squeeze_dims;
 | 
				
			||||||
 | 
					} TfLiteSqueezeParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int begin_mask;
 | 
				
			||||||
 | 
					  int end_mask;
 | 
				
			||||||
 | 
					  int ellipsis_mask;
 | 
				
			||||||
 | 
					  int new_axis_mask;
 | 
				
			||||||
 | 
					  int shrink_axis_mask;
 | 
				
			||||||
 | 
					} TfLiteStridedSliceParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteType output_type;
 | 
				
			||||||
 | 
					} TfLiteArgMaxParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteType output_type;
 | 
				
			||||||
 | 
					} TfLiteArgMinParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLitePadding padding;
 | 
				
			||||||
 | 
					  int stride_width;
 | 
				
			||||||
 | 
					  int stride_height;
 | 
				
			||||||
 | 
					} TfLiteTransposeConvParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  bool validate_indices;
 | 
				
			||||||
 | 
					} TfLiteSparseToDenseParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteType out_type;
 | 
				
			||||||
 | 
					} TfLiteShapeParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  EmptyStructPlaceholder placeholder;
 | 
				
			||||||
 | 
					} TfLiteRankParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  // Parameters supported by version 1:
 | 
				
			||||||
 | 
					  float min;
 | 
				
			||||||
 | 
					  float max;
 | 
				
			||||||
 | 
					  int num_bits;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameters supported by version 2:
 | 
				
			||||||
 | 
					  bool narrow_range;
 | 
				
			||||||
 | 
					} TfLiteFakeQuantParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int values_count;
 | 
				
			||||||
 | 
					  int axis;
 | 
				
			||||||
 | 
					} TfLitePackParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int axis;
 | 
				
			||||||
 | 
					} TfLiteOneHotParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int num;
 | 
				
			||||||
 | 
					  int axis;
 | 
				
			||||||
 | 
					} TfLiteUnpackParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  float alpha;
 | 
				
			||||||
 | 
					} TfLiteLeakyReluParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  TfLiteType index_out_type;
 | 
				
			||||||
 | 
					} TfLiteUniqueParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int seq_dim;
 | 
				
			||||||
 | 
					  int batch_dim;
 | 
				
			||||||
 | 
					} TfLiteReverseSequenceParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  EmptyStructPlaceholder placeholder;
 | 
				
			||||||
 | 
					} TfLiteMatrixDiagParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  EmptyStructPlaceholder placeholder;
 | 
				
			||||||
 | 
					} TfLiteMatrixSetDiagParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int then_subgraph_index;
 | 
				
			||||||
 | 
					  int else_subgraph_index;
 | 
				
			||||||
 | 
					} TfLiteIfParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int cond_subgraph_index;
 | 
				
			||||||
 | 
					  int body_subgraph_index;
 | 
				
			||||||
 | 
					} TfLiteWhileParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  bool exclusive;
 | 
				
			||||||
 | 
					  bool reverse;
 | 
				
			||||||
 | 
					} TfLiteCumsumParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int init_subgraph_index;
 | 
				
			||||||
 | 
					} TfLiteCallOnceParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef __cplusplus
 | 
				
			||||||
 | 
					}  // extern "C"
 | 
				
			||||||
 | 
					#endif  // __cplusplus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,92 @@
 | 
				
			||||||
 | 
					/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This file declares types used by the pure C inference API defined in c_api.h,
 | 
				
			||||||
 | 
					// some of which are also used in the C++ and C kernel and interpreter APIs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_C_C_API_TYPES_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdint.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef __cplusplus
 | 
				
			||||||
 | 
					extern "C" {
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
 | 
				
			||||||
 | 
					// library.
 | 
				
			||||||
 | 
					#ifdef SWIG
 | 
				
			||||||
 | 
					#define TFL_CAPI_EXPORT
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#if defined(_WIN32)
 | 
				
			||||||
 | 
					#ifdef TFL_COMPILE_LIBRARY
 | 
				
			||||||
 | 
					#define TFL_CAPI_EXPORT __declspec(dllexport)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define TFL_CAPI_EXPORT __declspec(dllimport)
 | 
				
			||||||
 | 
					#endif  // TFL_COMPILE_LIBRARY
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
 | 
				
			||||||
 | 
					#endif  // _WIN32
 | 
				
			||||||
 | 
					#endif  // SWIG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef enum TfLiteStatus {
 | 
				
			||||||
 | 
					  kTfLiteOk = 0,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Generally referring to an error in the runtime (i.e. interpreter)
 | 
				
			||||||
 | 
					  kTfLiteError = 1,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Generally referring to an error from a TfLiteDelegate itself.
 | 
				
			||||||
 | 
					  kTfLiteDelegateError = 2,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Generally referring to an error in applying a delegate due to
 | 
				
			||||||
 | 
					  // incompatibility between runtime and delegate, e.g., this error is returned
 | 
				
			||||||
 | 
					  // when trying to apply a TfLite delegate onto a model graph that's already
 | 
				
			||||||
 | 
					  // immutable.
 | 
				
			||||||
 | 
					  kTfLiteApplicationError = 3
 | 
				
			||||||
 | 
					} TfLiteStatus;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Types supported by tensor
 | 
				
			||||||
 | 
					typedef enum {
 | 
				
			||||||
 | 
					  kTfLiteNoType = 0,
 | 
				
			||||||
 | 
					  kTfLiteFloat32 = 1,
 | 
				
			||||||
 | 
					  kTfLiteInt32 = 2,
 | 
				
			||||||
 | 
					  kTfLiteUInt8 = 3,
 | 
				
			||||||
 | 
					  kTfLiteInt64 = 4,
 | 
				
			||||||
 | 
					  kTfLiteString = 5,
 | 
				
			||||||
 | 
					  kTfLiteBool = 6,
 | 
				
			||||||
 | 
					  kTfLiteInt16 = 7,
 | 
				
			||||||
 | 
					  kTfLiteComplex64 = 8,
 | 
				
			||||||
 | 
					  kTfLiteInt8 = 9,
 | 
				
			||||||
 | 
					  kTfLiteFloat16 = 10,
 | 
				
			||||||
 | 
					  kTfLiteFloat64 = 11,
 | 
				
			||||||
 | 
					  kTfLiteComplex128 = 12,
 | 
				
			||||||
 | 
					  kTfLiteUInt64 = 13,
 | 
				
			||||||
 | 
					} TfLiteType;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
 | 
				
			||||||
 | 
					// If per-layer quantization is specified this field will still be populated in
 | 
				
			||||||
 | 
					// addition to TfLiteAffineQuantization.
 | 
				
			||||||
 | 
					// Parameters for asymmetric quantization. Quantized values can be converted
 | 
				
			||||||
 | 
					// back to float using:
 | 
				
			||||||
 | 
					//     real_value = scale * (quantized_value - zero_point)
 | 
				
			||||||
 | 
					typedef struct TfLiteQuantizationParams {
 | 
				
			||||||
 | 
					  float scale;
 | 
				
			||||||
 | 
					  int32_t zero_point;
 | 
				
			||||||
 | 
					} TfLiteQuantizationParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef __cplusplus
 | 
				
			||||||
 | 
					}  // extern C
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_C_C_API_TYPES_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,236 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/c_api_types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					#endif  // TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int TfLiteIntArrayGetSizeInBytes(int size) {
 | 
				
			||||||
 | 
					  static TfLiteIntArray dummy;
 | 
				
			||||||
 | 
					  return sizeof(dummy) + sizeof(dummy.data[0]) * size;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b) {
 | 
				
			||||||
 | 
					  if (a == b) return 1;
 | 
				
			||||||
 | 
					  if (a == NULL || b == NULL) return 0;
 | 
				
			||||||
 | 
					  return TfLiteIntArrayEqualsArray(a, b->size, b->data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
 | 
				
			||||||
 | 
					                              const int b_data[]) {
 | 
				
			||||||
 | 
					  if (a == NULL) return (b_size == 0);
 | 
				
			||||||
 | 
					  if (a->size != b_size) return 0;
 | 
				
			||||||
 | 
					  int i = 0;
 | 
				
			||||||
 | 
					  for (; i < a->size; i++)
 | 
				
			||||||
 | 
					    if (a->data[i] != b_data[i]) return 0;
 | 
				
			||||||
 | 
					  return 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteIntArray* TfLiteIntArrayCreate(int size) {
 | 
				
			||||||
 | 
					  TfLiteIntArray* ret =
 | 
				
			||||||
 | 
					      (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
 | 
				
			||||||
 | 
					  ret->size = size;
 | 
				
			||||||
 | 
					  return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src) {
 | 
				
			||||||
 | 
					  if (!src) return NULL;
 | 
				
			||||||
 | 
					  TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size);
 | 
				
			||||||
 | 
					  if (ret) {
 | 
				
			||||||
 | 
					    memcpy(ret->data, src->data, src->size * sizeof(int));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int TfLiteFloatArrayGetSizeInBytes(int size) {
 | 
				
			||||||
 | 
					  static TfLiteFloatArray dummy;
 | 
				
			||||||
 | 
					  return sizeof(dummy) + sizeof(dummy.data[0]) * size;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
 | 
				
			||||||
 | 
					  TfLiteFloatArray* ret =
 | 
				
			||||||
 | 
					      (TfLiteFloatArray*)malloc(TfLiteFloatArrayGetSizeInBytes(size));
 | 
				
			||||||
 | 
					  ret->size = size;
 | 
				
			||||||
 | 
					  return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void TfLiteTensorDataFree(TfLiteTensor* t) {
 | 
				
			||||||
 | 
					  if (t->allocation_type == kTfLiteDynamic ||
 | 
				
			||||||
 | 
					      t->allocation_type == kTfLitePersistentRo) {
 | 
				
			||||||
 | 
					    free(t->data.raw);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  t->data.raw = NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
 | 
				
			||||||
 | 
					  if (quantization->type == kTfLiteAffineQuantization) {
 | 
				
			||||||
 | 
					    TfLiteAffineQuantization* q_params =
 | 
				
			||||||
 | 
					        (TfLiteAffineQuantization*)(quantization->params);
 | 
				
			||||||
 | 
					    if (q_params->scale) {
 | 
				
			||||||
 | 
					      TfLiteFloatArrayFree(q_params->scale);
 | 
				
			||||||
 | 
					      q_params->scale = NULL;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (q_params->zero_point) {
 | 
				
			||||||
 | 
					      TfLiteIntArrayFree(q_params->zero_point);
 | 
				
			||||||
 | 
					      q_params->zero_point = NULL;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    free(q_params);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  quantization->params = NULL;
 | 
				
			||||||
 | 
					  quantization->type = kTfLiteNoQuantization;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
 | 
				
			||||||
 | 
					  if (sparsity == NULL) {
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (sparsity->traversal_order) {
 | 
				
			||||||
 | 
					    TfLiteIntArrayFree(sparsity->traversal_order);
 | 
				
			||||||
 | 
					    sparsity->traversal_order = NULL;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (sparsity->block_map) {
 | 
				
			||||||
 | 
					    TfLiteIntArrayFree(sparsity->block_map);
 | 
				
			||||||
 | 
					    sparsity->block_map = NULL;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (sparsity->dim_metadata) {
 | 
				
			||||||
 | 
					    int i = 0;
 | 
				
			||||||
 | 
					    for (; i < sparsity->dim_metadata_size; i++) {
 | 
				
			||||||
 | 
					      TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i];
 | 
				
			||||||
 | 
					      if (metadata.format == kTfLiteDimSparseCSR) {
 | 
				
			||||||
 | 
					        TfLiteIntArrayFree(metadata.array_segments);
 | 
				
			||||||
 | 
					        metadata.array_segments = NULL;
 | 
				
			||||||
 | 
					        TfLiteIntArrayFree(metadata.array_indices);
 | 
				
			||||||
 | 
					        metadata.array_indices = NULL;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    free(sparsity->dim_metadata);
 | 
				
			||||||
 | 
					    sparsity->dim_metadata = NULL;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  free(sparsity);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void TfLiteTensorFree(TfLiteTensor* t) {
 | 
				
			||||||
 | 
					  TfLiteTensorDataFree(t);
 | 
				
			||||||
 | 
					  if (t->dims) TfLiteIntArrayFree(t->dims);
 | 
				
			||||||
 | 
					  t->dims = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (t->dims_signature) {
 | 
				
			||||||
 | 
					    TfLiteIntArrayFree((TfLiteIntArray *) t->dims_signature);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  t->dims_signature = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TfLiteQuantizationFree(&t->quantization);
 | 
				
			||||||
 | 
					  TfLiteSparsityFree(t->sparsity);
 | 
				
			||||||
 | 
					  t->sparsity = NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
 | 
				
			||||||
 | 
					                       TfLiteQuantizationParams quantization, char* buffer,
 | 
				
			||||||
 | 
					                       size_t size, TfLiteAllocationType allocation_type,
 | 
				
			||||||
 | 
					                       const void* allocation, bool is_variable,
 | 
				
			||||||
 | 
					                       TfLiteTensor* tensor) {
 | 
				
			||||||
 | 
					  TfLiteTensorFree(tensor);
 | 
				
			||||||
 | 
					  tensor->type = type;
 | 
				
			||||||
 | 
					  tensor->name = name;
 | 
				
			||||||
 | 
					  tensor->dims = dims;
 | 
				
			||||||
 | 
					  tensor->params = quantization;
 | 
				
			||||||
 | 
					  tensor->data.raw = buffer;
 | 
				
			||||||
 | 
					  tensor->bytes = size;
 | 
				
			||||||
 | 
					  tensor->allocation_type = allocation_type;
 | 
				
			||||||
 | 
					  tensor->allocation = allocation;
 | 
				
			||||||
 | 
					  tensor->is_variable = is_variable;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  tensor->quantization.type = kTfLiteNoQuantization;
 | 
				
			||||||
 | 
					  tensor->quantization.params = NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
 | 
				
			||||||
 | 
					  if (tensor->allocation_type != kTfLiteDynamic &&
 | 
				
			||||||
 | 
					      tensor->allocation_type != kTfLitePersistentRo) {
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // TODO(b/145340303): Tensor data should be aligned.
 | 
				
			||||||
 | 
					  if (!tensor->data.raw) {
 | 
				
			||||||
 | 
					    tensor->data.raw = malloc(num_bytes);
 | 
				
			||||||
 | 
					  } else if (num_bytes > tensor->bytes) {
 | 
				
			||||||
 | 
					    tensor->data.raw = realloc(tensor->data.raw, num_bytes);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  tensor->bytes = num_bytes;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif  // TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const char* TfLiteTypeGetName(TfLiteType type) {
 | 
				
			||||||
 | 
					  switch (type) {
 | 
				
			||||||
 | 
					    case kTfLiteNoType:
 | 
				
			||||||
 | 
					      return "NOTYPE";
 | 
				
			||||||
 | 
					    case kTfLiteFloat32:
 | 
				
			||||||
 | 
					      return "FLOAT32";
 | 
				
			||||||
 | 
					    case kTfLiteInt16:
 | 
				
			||||||
 | 
					      return "INT16";
 | 
				
			||||||
 | 
					    case kTfLiteInt32:
 | 
				
			||||||
 | 
					      return "INT32";
 | 
				
			||||||
 | 
					    case kTfLiteUInt8:
 | 
				
			||||||
 | 
					      return "UINT8";
 | 
				
			||||||
 | 
					    case kTfLiteInt8:
 | 
				
			||||||
 | 
					      return "INT8";
 | 
				
			||||||
 | 
					    case kTfLiteInt64:
 | 
				
			||||||
 | 
					      return "INT64";
 | 
				
			||||||
 | 
					    case kTfLiteUInt64:
 | 
				
			||||||
 | 
					      return "UINT64";
 | 
				
			||||||
 | 
					    case kTfLiteBool:
 | 
				
			||||||
 | 
					      return "BOOL";
 | 
				
			||||||
 | 
					    case kTfLiteComplex64:
 | 
				
			||||||
 | 
					      return "COMPLEX64";
 | 
				
			||||||
 | 
					    case kTfLiteComplex128:
 | 
				
			||||||
 | 
					      return "COMPLEX128";
 | 
				
			||||||
 | 
					    case kTfLiteString:
 | 
				
			||||||
 | 
					      return "STRING";
 | 
				
			||||||
 | 
					    case kTfLiteFloat16:
 | 
				
			||||||
 | 
					      return "FLOAT16";
 | 
				
			||||||
 | 
					    case kTfLiteFloat64:
 | 
				
			||||||
 | 
					      return "FLOAT64";
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return "Unknown type";
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteDelegate TfLiteDelegateCreate() {
 | 
				
			||||||
 | 
					  TfLiteDelegate d = {
 | 
				
			||||||
 | 
					      .data_ = NULL,
 | 
				
			||||||
 | 
					      .Prepare = NULL,
 | 
				
			||||||
 | 
					      .CopyFromBufferHandle = NULL,
 | 
				
			||||||
 | 
					      .CopyToBufferHandle = NULL,
 | 
				
			||||||
 | 
					      .FreeBufferHandle = NULL,
 | 
				
			||||||
 | 
					      .flags = kTfLiteDelegateFlagsNone,
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  return d;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,913 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This file defines common C types and APIs for implementing operations,
 | 
				
			||||||
 | 
					// delegates and other constructs in TensorFlow Lite. The actual operations and
 | 
				
			||||||
 | 
					// delegates can be defined using C++, but the interface between the interpreter
 | 
				
			||||||
 | 
					// and the operations are C.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Summary of abstractions
 | 
				
			||||||
 | 
					// TF_LITE_ENSURE - Self-sufficient error checking
 | 
				
			||||||
 | 
					// TfLiteStatus - Status reporting
 | 
				
			||||||
 | 
					// TfLiteIntArray - stores tensor shapes (dims),
 | 
				
			||||||
 | 
					// TfLiteContext - allows an op to access the tensors
 | 
				
			||||||
 | 
					// TfLiteTensor - tensor (a multidimensional array)
 | 
				
			||||||
 | 
					// TfLiteNode - a single node or operation
 | 
				
			||||||
 | 
					// TfLiteRegistration - the implementation of a conceptual operation.
 | 
				
			||||||
 | 
					// TfLiteDelegate - allows delegation of nodes to alternative backends.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Some abstractions in this file are created and managed by Interpreter.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// NOTE: The order of values in these structs are "semi-ABI stable". New values
 | 
				
			||||||
 | 
					// should be added only to the end of structs and never reordered.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_C_COMMON_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_C_COMMON_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdbool.h>
 | 
				
			||||||
 | 
					#include <stddef.h>
 | 
				
			||||||
 | 
					#include <stdint.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/c_api_types.h"  // IWYU pragma: export
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef __cplusplus
 | 
				
			||||||
 | 
					extern "C" {
 | 
				
			||||||
 | 
					#endif  // __cplusplus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// The list of external context types known to TF Lite. This list exists solely
 | 
				
			||||||
 | 
					// to avoid conflicts and to ensure ops can share the external contexts they
 | 
				
			||||||
 | 
					// need. Access to the external contexts is controlled by one of the
 | 
				
			||||||
 | 
					// corresponding support files.
 | 
				
			||||||
 | 
					typedef enum TfLiteExternalContextType {
 | 
				
			||||||
 | 
					  kTfLiteEigenContext = 0,       // include eigen_support.h to use.
 | 
				
			||||||
 | 
					  kTfLiteGemmLowpContext = 1,    // include gemm_support.h to use.
 | 
				
			||||||
 | 
					  kTfLiteEdgeTpuContext = 2,     // Placeholder for Edge TPU support.
 | 
				
			||||||
 | 
					  kTfLiteCpuBackendContext = 3,  // include cpu_backend_context.h to use.
 | 
				
			||||||
 | 
					  kTfLiteMaxExternalContexts = 4
 | 
				
			||||||
 | 
					} TfLiteExternalContextType;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Forward declare so dependent structs and methods can reference these types
 | 
				
			||||||
 | 
					// prior to the struct definitions.
 | 
				
			||||||
 | 
					struct TfLiteContext;
 | 
				
			||||||
 | 
					struct TfLiteDelegate;
 | 
				
			||||||
 | 
					struct TfLiteRegistration;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// An external context is a collection of information unrelated to the TF Lite
 | 
				
			||||||
 | 
					// framework, but useful to a subset of the ops. TF Lite knows very little
 | 
				
			||||||
 | 
					// about the actual contexts, but it keeps a list of them, and is able to
 | 
				
			||||||
 | 
					// refresh them if configurations like the number of recommended threads
 | 
				
			||||||
 | 
					// change.
 | 
				
			||||||
 | 
					typedef struct TfLiteExternalContext {
 | 
				
			||||||
 | 
					  TfLiteExternalContextType type;
 | 
				
			||||||
 | 
					  TfLiteStatus (*Refresh)(struct TfLiteContext* context);
 | 
				
			||||||
 | 
					} TfLiteExternalContext;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define kTfLiteOptionalTensor (-1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Fixed size list of integers. Used for dimensions and inputs/outputs tensor
 | 
				
			||||||
 | 
					// indices
 | 
				
			||||||
 | 
					typedef struct TfLiteIntArray {
 | 
				
			||||||
 | 
					  int size;
 | 
				
			||||||
 | 
					// gcc 6.1+ have a bug where flexible members aren't properly handled
 | 
				
			||||||
 | 
					// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
 | 
				
			||||||
 | 
					#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
 | 
				
			||||||
 | 
					     __GNUC_MINOR__ >= 1) ||                                      \
 | 
				
			||||||
 | 
					    defined(HEXAGON) || (__clang_major__ == 7 && __clang_minor__ == 1)
 | 
				
			||||||
 | 
					  int data[0];
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					  int data[];
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					} TfLiteIntArray;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Given the size (number of elements) in a TfLiteIntArray, calculate its size
 | 
				
			||||||
 | 
					// in bytes.
 | 
				
			||||||
 | 
					int TfLiteIntArrayGetSizeInBytes(int size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					// Create a array of a given `size` (uninitialized entries).
 | 
				
			||||||
 | 
					// This returns a pointer, that you must free using TfLiteIntArrayFree().
 | 
				
			||||||
 | 
					TfLiteIntArray* TfLiteIntArrayCreate(int size);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Check if two intarrays are equal. Returns 1 if they are equal, 0 otherwise.
 | 
				
			||||||
 | 
					int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Check if an intarray equals an array. Returns 1 if equals, 0 otherwise.
 | 
				
			||||||
 | 
					int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
 | 
				
			||||||
 | 
					                              const int b_data[]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					// Create a copy of an array passed as `src`.
 | 
				
			||||||
 | 
					// You are expected to free memory with TfLiteIntArrayFree
 | 
				
			||||||
 | 
					TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Free memory of array `a`.
 | 
				
			||||||
 | 
					void TfLiteIntArrayFree(TfLiteIntArray* a);
 | 
				
			||||||
 | 
					#endif  // TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Fixed size list of floats. Used for per-channel quantization.
 | 
				
			||||||
 | 
					typedef struct TfLiteFloatArray {
 | 
				
			||||||
 | 
					  int size;
 | 
				
			||||||
 | 
					// gcc 6.1+ have a bug where flexible members aren't properly handled
 | 
				
			||||||
 | 
					// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
 | 
				
			||||||
 | 
					// This also applies to the toolchain used for Qualcomm Hexagon DSPs.
 | 
				
			||||||
 | 
					#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
 | 
				
			||||||
 | 
					    __GNUC_MINOR__ >= 1
 | 
				
			||||||
 | 
					  float data[0];
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					  float data[];
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					} TfLiteFloatArray;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Given the size (number of elements) in a TfLiteFloatArray, calculate its size
 | 
				
			||||||
 | 
					// in bytes.
 | 
				
			||||||
 | 
					int TfLiteFloatArrayGetSizeInBytes(int size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					// Create a array of a given `size` (uninitialized entries).
 | 
				
			||||||
 | 
					// This returns a pointer, that you must free using TfLiteFloatArrayFree().
 | 
				
			||||||
 | 
					TfLiteFloatArray* TfLiteFloatArrayCreate(int size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Free memory of array `a`.
 | 
				
			||||||
 | 
					void TfLiteFloatArrayFree(TfLiteFloatArray* a);
 | 
				
			||||||
 | 
					#endif  // TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Since we must not depend on any libraries, define a minimal subset of
 | 
				
			||||||
 | 
					// error macros while avoiding names that have pre-conceived meanings like
 | 
				
			||||||
 | 
					// assert and check.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Try to make all reporting calls through TF_LITE_KERNEL_LOG rather than
 | 
				
			||||||
 | 
					// calling the context->ReportError function directly, so that message strings
 | 
				
			||||||
 | 
					// can be stripped out if the binary size needs to be severely optimized.
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STRIP_ERROR_STRINGS
 | 
				
			||||||
 | 
					#define TF_LITE_KERNEL_LOG(context, ...)            \
 | 
				
			||||||
 | 
					  do {                                              \
 | 
				
			||||||
 | 
					    (context)->ReportError((context), __VA_ARGS__); \
 | 
				
			||||||
 | 
					  } while (false)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)        \
 | 
				
			||||||
 | 
					  do {                                                \
 | 
				
			||||||
 | 
					    if ((context) != nullptr) {                       \
 | 
				
			||||||
 | 
					      (context)->ReportError((context), __VA_ARGS__); \
 | 
				
			||||||
 | 
					    }                                                 \
 | 
				
			||||||
 | 
					  } while (false)
 | 
				
			||||||
 | 
					#else  // TF_LITE_STRIP_ERROR_STRINGS
 | 
				
			||||||
 | 
					#define TF_LITE_KERNEL_LOG(context, ...)
 | 
				
			||||||
 | 
					#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)
 | 
				
			||||||
 | 
					#endif  // TF_LITE_STRIP_ERROR_STRINGS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Check whether value is true, and if not return kTfLiteError from
 | 
				
			||||||
 | 
					// the current function (and report the error string msg).
 | 
				
			||||||
 | 
					#define TF_LITE_ENSURE_MSG(context, value, msg)        \
 | 
				
			||||||
 | 
					  do {                                                 \
 | 
				
			||||||
 | 
					    if (!(value)) {                                    \
 | 
				
			||||||
 | 
					      TF_LITE_KERNEL_LOG((context), __FILE__ " " msg); \
 | 
				
			||||||
 | 
					      return kTfLiteError;                             \
 | 
				
			||||||
 | 
					    }                                                  \
 | 
				
			||||||
 | 
					  } while (0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Check whether the value `a` is true, and if not return kTfLiteError from
 | 
				
			||||||
 | 
					// the current function, while also reporting the location of the error.
 | 
				
			||||||
 | 
					#define TF_LITE_ENSURE(context, a)                                      \
 | 
				
			||||||
 | 
					  do {                                                                  \
 | 
				
			||||||
 | 
					    if (!(a)) {                                                         \
 | 
				
			||||||
 | 
					      TF_LITE_KERNEL_LOG((context), "%s:%d %s was not true.", __FILE__, \
 | 
				
			||||||
 | 
					                         __LINE__, #a);                                 \
 | 
				
			||||||
 | 
					      return kTfLiteError;                                              \
 | 
				
			||||||
 | 
					    }                                                                   \
 | 
				
			||||||
 | 
					  } while (0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TF_LITE_ENSURE_STATUS(a) \
 | 
				
			||||||
 | 
					  do {                           \
 | 
				
			||||||
 | 
					    const TfLiteStatus s = (a);  \
 | 
				
			||||||
 | 
					    if (s != kTfLiteOk) {        \
 | 
				
			||||||
 | 
					      return s;                  \
 | 
				
			||||||
 | 
					    }                            \
 | 
				
			||||||
 | 
					  } while (0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Check whether the value `a == b` is true, and if not return kTfLiteError from
 | 
				
			||||||
 | 
					// the current function, while also reporting the location of the error.
 | 
				
			||||||
 | 
					// `a` and `b` may be evaluated more than once, so no side effects or
 | 
				
			||||||
 | 
					// extremely expensive computations should be done.
 | 
				
			||||||
 | 
					// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes.
 | 
				
			||||||
 | 
					#define TF_LITE_ENSURE_EQ(context, a, b)                                   \
 | 
				
			||||||
 | 
					  do {                                                                     \
 | 
				
			||||||
 | 
					    if ((a) != (b)) {                                                      \
 | 
				
			||||||
 | 
					      TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%d != %d)", __FILE__, \
 | 
				
			||||||
 | 
					                         __LINE__, #a, #b, (a), (b));                      \
 | 
				
			||||||
 | 
					      return kTfLiteError;                                                 \
 | 
				
			||||||
 | 
					    }                                                                      \
 | 
				
			||||||
 | 
					  } while (0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TF_LITE_ENSURE_TYPES_EQ(context, a, b)                             \
 | 
				
			||||||
 | 
					  do {                                                                     \
 | 
				
			||||||
 | 
					    if ((a) != (b)) {                                                      \
 | 
				
			||||||
 | 
					      TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%s != %s)", __FILE__, \
 | 
				
			||||||
 | 
					                         __LINE__, #a, #b, TfLiteTypeGetName(a),           \
 | 
				
			||||||
 | 
					                         TfLiteTypeGetName(b));                            \
 | 
				
			||||||
 | 
					      return kTfLiteError;                                                 \
 | 
				
			||||||
 | 
					    }                                                                      \
 | 
				
			||||||
 | 
					  } while (0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TF_LITE_ENSURE_NEAR(context, a, b, epsilon)                          \
 | 
				
			||||||
 | 
					  do {                                                                       \
 | 
				
			||||||
 | 
					    auto delta = ((a) > (b)) ? ((a) - (b)) : ((b) - (a));                    \
 | 
				
			||||||
 | 
					    if (delta > epsilon) {                                                   \
 | 
				
			||||||
 | 
					      TF_LITE_KERNEL_LOG((context), "%s:%d %s not near %s (%f != %f)",       \
 | 
				
			||||||
 | 
					                         __FILE__, __LINE__, #a, #b, static_cast<double>(a), \
 | 
				
			||||||
 | 
					                         static_cast<double>(b));                            \
 | 
				
			||||||
 | 
					      return kTfLiteError;                                                   \
 | 
				
			||||||
 | 
					    }                                                                        \
 | 
				
			||||||
 | 
					  } while (0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TF_LITE_ENSURE_OK(context, status) \
 | 
				
			||||||
 | 
					  do {                                     \
 | 
				
			||||||
 | 
					    const TfLiteStatus s = (status);       \
 | 
				
			||||||
 | 
					    if ((s) != kTfLiteOk) {                \
 | 
				
			||||||
 | 
					      return s;                            \
 | 
				
			||||||
 | 
					    }                                      \
 | 
				
			||||||
 | 
					  } while (0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Single-precision complex data type compatible with the C99 definition.
 | 
				
			||||||
 | 
					typedef struct TfLiteComplex64 {
 | 
				
			||||||
 | 
					  float re, im;  // real and imaginary parts, respectively.
 | 
				
			||||||
 | 
					} TfLiteComplex64;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Double-precision complex data type compatible with the C99 definition.
 | 
				
			||||||
 | 
					typedef struct TfLiteComplex128 {
 | 
				
			||||||
 | 
					  double re, im;  // real and imaginary parts, respectively.
 | 
				
			||||||
 | 
					} TfLiteComplex128;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Half precision data type compatible with the C99 definition.
 | 
				
			||||||
 | 
					typedef struct TfLiteFloat16 {
 | 
				
			||||||
 | 
					  uint16_t data;
 | 
				
			||||||
 | 
					} TfLiteFloat16;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Return the name of a given type, for error reporting purposes.
 | 
				
			||||||
 | 
					const char* TfLiteTypeGetName(TfLiteType type);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// SupportedQuantizationTypes.
 | 
				
			||||||
 | 
					typedef enum TfLiteQuantizationType {
 | 
				
			||||||
 | 
					  // No quantization.
 | 
				
			||||||
 | 
					  kTfLiteNoQuantization = 0,
 | 
				
			||||||
 | 
					  // Affine quantization (with support for per-channel quantization).
 | 
				
			||||||
 | 
					  // Corresponds to TfLiteAffineQuantization.
 | 
				
			||||||
 | 
					  kTfLiteAffineQuantization = 1,
 | 
				
			||||||
 | 
					} TfLiteQuantizationType;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Structure specifying the quantization used by the tensor, if-any.
 | 
				
			||||||
 | 
					typedef struct TfLiteQuantization {
 | 
				
			||||||
 | 
					  // The type of quantization held by params.
 | 
				
			||||||
 | 
					  TfLiteQuantizationType type;
 | 
				
			||||||
 | 
					  // Holds an optional reference to a quantization param structure. The actual
 | 
				
			||||||
 | 
					  // type depends on the value of the `type` field (see the comment there for
 | 
				
			||||||
 | 
					  // the values and corresponding types).
 | 
				
			||||||
 | 
					  void* params;
 | 
				
			||||||
 | 
					} TfLiteQuantization;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Parameters for asymmetric quantization across a dimension (i.e per output
 | 
				
			||||||
 | 
					// channel quantization).
 | 
				
			||||||
 | 
					// quantized_dimension specifies which dimension the scales and zero_points
 | 
				
			||||||
 | 
					// correspond to.
 | 
				
			||||||
 | 
					// For a particular value in quantized_dimension, quantized values can be
 | 
				
			||||||
 | 
					// converted back to float using:
 | 
				
			||||||
 | 
					//     real_value = scale * (quantized_value - zero_point)
 | 
				
			||||||
 | 
					typedef struct TfLiteAffineQuantization {
 | 
				
			||||||
 | 
					  TfLiteFloatArray* scale;
 | 
				
			||||||
 | 
					  TfLiteIntArray* zero_point;
 | 
				
			||||||
 | 
					  int32_t quantized_dimension;
 | 
				
			||||||
 | 
					} TfLiteAffineQuantization;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* A union of pointers that points to memory for a given tensor. */
 | 
				
			||||||
 | 
					typedef union TfLitePtrUnion {
 | 
				
			||||||
 | 
					  /* Do not access these members directly, if possible, use
 | 
				
			||||||
 | 
					   * GetTensorData<TYPE>(tensor) instead, otherwise only access .data, as other
 | 
				
			||||||
 | 
					   * members are deprecated. */
 | 
				
			||||||
 | 
					  int32_t* i32;
 | 
				
			||||||
 | 
					  int64_t* i64;
 | 
				
			||||||
 | 
					  uint64_t* u64;
 | 
				
			||||||
 | 
					  float* f;
 | 
				
			||||||
 | 
					  TfLiteFloat16* f16;
 | 
				
			||||||
 | 
					  double* f64;
 | 
				
			||||||
 | 
					  char* raw;
 | 
				
			||||||
 | 
					  const char* raw_const;
 | 
				
			||||||
 | 
					  uint8_t* uint8;
 | 
				
			||||||
 | 
					  bool* b;
 | 
				
			||||||
 | 
					  int16_t* i16;
 | 
				
			||||||
 | 
					  TfLiteComplex64* c64;
 | 
				
			||||||
 | 
					  TfLiteComplex128* c128;
 | 
				
			||||||
 | 
					  int8_t* int8;
 | 
				
			||||||
 | 
					  /* Only use this member. */
 | 
				
			||||||
 | 
					  void* data;
 | 
				
			||||||
 | 
					} TfLitePtrUnion;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Memory allocation strategies.
 | 
				
			||||||
 | 
					//  * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
 | 
				
			||||||
 | 
					//  * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
 | 
				
			||||||
 | 
					//        and available during eval.
 | 
				
			||||||
 | 
					//  * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
 | 
				
			||||||
 | 
					//        only available during eval.
 | 
				
			||||||
 | 
					//  * kTfLiteDynamic: Allocated during eval, or for string tensors.
 | 
				
			||||||
 | 
					//  * kTfLitePersistentRo: Allocated and populated during prepare. This is
 | 
				
			||||||
 | 
					//        useful for tensors that can be computed during prepare and treated
 | 
				
			||||||
 | 
					//        as constant inputs for downstream ops (also in prepare).
 | 
				
			||||||
 | 
					//  * kTfLiteCustom: Custom memory allocation provided by the user. See
 | 
				
			||||||
 | 
					//        TfLiteCustomAllocation below.
 | 
				
			||||||
 | 
					typedef enum TfLiteAllocationType {
 | 
				
			||||||
 | 
					  kTfLiteMemNone = 0,
 | 
				
			||||||
 | 
					  kTfLiteMmapRo,
 | 
				
			||||||
 | 
					  kTfLiteArenaRw,
 | 
				
			||||||
 | 
					  kTfLiteArenaRwPersistent,
 | 
				
			||||||
 | 
					  kTfLiteDynamic,
 | 
				
			||||||
 | 
					  kTfLitePersistentRo,
 | 
				
			||||||
 | 
					  kTfLiteCustom,
 | 
				
			||||||
 | 
					} TfLiteAllocationType;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// The delegates should use zero or positive integers to represent handles.
 | 
				
			||||||
 | 
					// -1 is reserved from unallocated status.
 | 
				
			||||||
 | 
					typedef int TfLiteBufferHandle;
 | 
				
			||||||
 | 
					enum {
 | 
				
			||||||
 | 
					  kTfLiteNullBufferHandle = -1,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Storage format of each dimension in a sparse tensor.
 | 
				
			||||||
 | 
					typedef enum TfLiteDimensionType {
 | 
				
			||||||
 | 
					  kTfLiteDimDense = 0,
 | 
				
			||||||
 | 
					  kTfLiteDimSparseCSR,
 | 
				
			||||||
 | 
					} TfLiteDimensionType;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Metadata to encode each dimension in a sparse tensor.
 | 
				
			||||||
 | 
					typedef struct TfLiteDimensionMetadata {
 | 
				
			||||||
 | 
					  TfLiteDimensionType format;
 | 
				
			||||||
 | 
					  int dense_size;
 | 
				
			||||||
 | 
					  TfLiteIntArray* array_segments;
 | 
				
			||||||
 | 
					  TfLiteIntArray* array_indices;
 | 
				
			||||||
 | 
					} TfLiteDimensionMetadata;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Parameters used to encode a sparse tensor. For detailed explanation of each
 | 
				
			||||||
 | 
					// field please refer to lite/schema/schema.fbs.
 | 
				
			||||||
 | 
					typedef struct TfLiteSparsity {
 | 
				
			||||||
 | 
					  TfLiteIntArray* traversal_order;
 | 
				
			||||||
 | 
					  TfLiteIntArray* block_map;
 | 
				
			||||||
 | 
					  TfLiteDimensionMetadata* dim_metadata;
 | 
				
			||||||
 | 
					  int dim_metadata_size;
 | 
				
			||||||
 | 
					} TfLiteSparsity;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Defines a custom memory allocation not owned by the runtime.
 | 
				
			||||||
 | 
					// `data` should be aligned to kDefaultTensorAlignment defined in
 | 
				
			||||||
 | 
					// lite/util.h. (Currently 64 bytes)
 | 
				
			||||||
 | 
					// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage.
 | 
				
			||||||
 | 
					typedef struct TfLiteCustomAllocation {
 | 
				
			||||||
 | 
					  void* data;
 | 
				
			||||||
 | 
					  size_t bytes;
 | 
				
			||||||
 | 
					} TfLiteCustomAllocation;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// A tensor in the interpreter system which is a wrapper around a buffer of
 | 
				
			||||||
 | 
					// data including a dimensionality (or NULL if not currently defined).
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					typedef struct TfLiteTensor {
 | 
				
			||||||
 | 
					  // The data type specification for data stored in `data`. This affects
 | 
				
			||||||
 | 
					  // what member of `data` union should be used.
 | 
				
			||||||
 | 
					  TfLiteType type;
 | 
				
			||||||
 | 
					  // A union of data pointers. The appropriate type should be used for a typed
 | 
				
			||||||
 | 
					  // tensor based on `type`.
 | 
				
			||||||
 | 
					  TfLitePtrUnion data;
 | 
				
			||||||
 | 
					  // A pointer to a structure representing the dimensionality interpretation
 | 
				
			||||||
 | 
					  // that the buffer should have. NOTE: the product of elements of `dims`
 | 
				
			||||||
 | 
					  // and the element datatype size should be equal to `bytes` below.
 | 
				
			||||||
 | 
					  TfLiteIntArray* dims;
 | 
				
			||||||
 | 
					  // Quantization information.
 | 
				
			||||||
 | 
					  TfLiteQuantizationParams params;
 | 
				
			||||||
 | 
					  // How memory is mapped
 | 
				
			||||||
 | 
					  //  kTfLiteMmapRo: Memory mapped read only.
 | 
				
			||||||
 | 
					  //  i.e. weights
 | 
				
			||||||
 | 
					  //  kTfLiteArenaRw: Arena allocated read write memory
 | 
				
			||||||
 | 
					  //  (i.e. temporaries, outputs).
 | 
				
			||||||
 | 
					  TfLiteAllocationType allocation_type;
 | 
				
			||||||
 | 
					  // The number of bytes required to store the data of this Tensor. I.e.
 | 
				
			||||||
 | 
					  // (bytes of each element) * dims[0] * ... * dims[n-1].  For example, if
 | 
				
			||||||
 | 
					  // type is kTfLiteFloat32 and dims = {3, 2} then
 | 
				
			||||||
 | 
					  // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
 | 
				
			||||||
 | 
					  size_t bytes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // An opaque pointer to a tflite::MMapAllocation
 | 
				
			||||||
 | 
					  const void* allocation;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Null-terminated name of this tensor.
 | 
				
			||||||
 | 
					  const char* name;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The delegate which knows how to handle `buffer_handle`.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  struct TfLiteDelegate* delegate;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // An integer buffer handle that can be handled by `delegate`.
 | 
				
			||||||
 | 
					  // The value is valid only when delegate is not null.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  TfLiteBufferHandle buffer_handle;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // If the delegate uses its own buffer (e.g. GPU memory), the delegate is
 | 
				
			||||||
 | 
					  // responsible to set data_is_stale to true.
 | 
				
			||||||
 | 
					  // `delegate->CopyFromBufferHandle` can be called to copy the data from
 | 
				
			||||||
 | 
					  // delegate buffer.
 | 
				
			||||||
 | 
					  // WARNING: This is an // experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  bool data_is_stale;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // True if the tensor is a variable.
 | 
				
			||||||
 | 
					  bool is_variable;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Quantization information. Replaces params field above.
 | 
				
			||||||
 | 
					  TfLiteQuantization quantization;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Parameters used to encode a sparse tensor.
 | 
				
			||||||
 | 
					  // This is optional. The field is NULL if a tensor is dense.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  TfLiteSparsity* sparsity;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Optional. Encodes shapes with unknown dimensions with -1. This field is
 | 
				
			||||||
 | 
					  // only populated when unknown dimensions exist in a read-write tensor (i.e.
 | 
				
			||||||
 | 
					  // an input or output tensor). (e.g.  `dims` contains [1, 1, 1, 3] and
 | 
				
			||||||
 | 
					  // `dims_signature` contains [1, -1, -1, 3]).
 | 
				
			||||||
 | 
					  const TfLiteIntArray* dims_signature;
 | 
				
			||||||
 | 
					} TfLiteTensor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// A structure representing an instance of a node.
 | 
				
			||||||
 | 
					// This structure only exhibits the inputs, outputs and user defined data, not
 | 
				
			||||||
 | 
					// other features like the type.
 | 
				
			||||||
 | 
					typedef struct TfLiteNode {
 | 
				
			||||||
 | 
					  // Inputs to this node expressed as indices into the simulator's tensors.
 | 
				
			||||||
 | 
					  TfLiteIntArray* inputs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Outputs to this node expressed as indices into the simulator's tensors.
 | 
				
			||||||
 | 
					  TfLiteIntArray* outputs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // intermediate tensors to this node expressed as indices into the simulator's
 | 
				
			||||||
 | 
					  // tensors.
 | 
				
			||||||
 | 
					  TfLiteIntArray* intermediates;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Temporary tensors uses during the computations. This usually contains no
 | 
				
			||||||
 | 
					  // tensors, but ops are allowed to change that if they need scratch space of
 | 
				
			||||||
 | 
					  // any sort.
 | 
				
			||||||
 | 
					  TfLiteIntArray* temporaries;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Opaque data provided by the node implementer through `Registration.init`.
 | 
				
			||||||
 | 
					  void* user_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Opaque data provided to the node if the node is a builtin. This is usually
 | 
				
			||||||
 | 
					  // a structure defined in builtin_op_data.h
 | 
				
			||||||
 | 
					  void* builtin_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Custom initial data. This is the opaque data provided in the flatbuffer.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  const void* custom_initial_data;
 | 
				
			||||||
 | 
					  int custom_initial_data_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The pointer to the delegate. This is non-null only when the node is
 | 
				
			||||||
 | 
					  // created by calling `interpreter.ModifyGraphWithDelegate`.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  struct TfLiteDelegate* delegate;
 | 
				
			||||||
 | 
					} TfLiteNode;
 | 
				
			||||||
 | 
					#else   // defined(TF_LITE_STATIC_MEMORY)?
 | 
				
			||||||
 | 
					// NOTE: This flag is opt-in only at compile time.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
 | 
				
			||||||
 | 
					// contains only the minimum fields required to initialize and prepare a micro
 | 
				
			||||||
 | 
					// inference graph. The fields in this struct have been ordered from
 | 
				
			||||||
 | 
					// largest-to-smallest for optimal struct sizeof.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// This struct does not use:
 | 
				
			||||||
 | 
					// - allocation
 | 
				
			||||||
 | 
					// - buffer_handle
 | 
				
			||||||
 | 
					// - data_is_stale
 | 
				
			||||||
 | 
					// - delegate
 | 
				
			||||||
 | 
					// - dims_signature
 | 
				
			||||||
 | 
					// - name
 | 
				
			||||||
 | 
					// - sparsity
 | 
				
			||||||
 | 
					typedef struct TfLiteTensor {
 | 
				
			||||||
 | 
					  // TODO(b/155784997): Consider consolidating these quantization fields:
 | 
				
			||||||
 | 
					  // Quantization information. Replaces params field above.
 | 
				
			||||||
 | 
					  TfLiteQuantization quantization;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Quantization information.
 | 
				
			||||||
 | 
					  TfLiteQuantizationParams params;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // A union of data pointers. The appropriate type should be used for a typed
 | 
				
			||||||
 | 
					  // tensor based on `type`.
 | 
				
			||||||
 | 
					  TfLitePtrUnion data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // A pointer to a structure representing the dimensionality interpretation
 | 
				
			||||||
 | 
					  // that the buffer should have. NOTE: the product of elements of `dims`
 | 
				
			||||||
 | 
					  // and the element datatype size should be equal to `bytes` below.
 | 
				
			||||||
 | 
					  TfLiteIntArray* dims;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The number of bytes required to store the data of this Tensor. I.e.
 | 
				
			||||||
 | 
					  // (bytes of each element) * dims[0] * ... * dims[n-1].  For example, if
 | 
				
			||||||
 | 
					  // type is kTfLiteFloat32 and dims = {3, 2} then
 | 
				
			||||||
 | 
					  // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
 | 
				
			||||||
 | 
					  size_t bytes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The data type specification for data stored in `data`. This affects
 | 
				
			||||||
 | 
					  // what member of `data` union should be used.
 | 
				
			||||||
 | 
					  TfLiteType type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // How memory is mapped
 | 
				
			||||||
 | 
					  //  kTfLiteMmapRo: Memory mapped read only.
 | 
				
			||||||
 | 
					  //  i.e. weights
 | 
				
			||||||
 | 
					  //  kTfLiteArenaRw: Arena allocated read write memory
 | 
				
			||||||
 | 
					  //  (i.e. temporaries, outputs).
 | 
				
			||||||
 | 
					  TfLiteAllocationType allocation_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // True if the tensor is a variable.
 | 
				
			||||||
 | 
					  bool is_variable;
 | 
				
			||||||
 | 
					} TfLiteTensor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Specific reduced TfLiteNode struct for TF Micro runtime. This struct contains
 | 
				
			||||||
 | 
					// only the minimum fields required to represent a node.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// This struct does not use:
 | 
				
			||||||
 | 
					// - delegate
 | 
				
			||||||
 | 
					// - intermediates
 | 
				
			||||||
 | 
					// - temporaries
 | 
				
			||||||
 | 
					typedef struct TfLiteNode {
 | 
				
			||||||
 | 
					  // Inputs to this node expressed as indices into the simulator's tensors.
 | 
				
			||||||
 | 
					  TfLiteIntArray* inputs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Outputs to this node expressed as indices into the simulator's tensors.
 | 
				
			||||||
 | 
					  TfLiteIntArray* outputs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Opaque data provided by the node implementer through `Registration.init`.
 | 
				
			||||||
 | 
					  void* user_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Opaque data provided to the node if the node is a builtin. This is usually
 | 
				
			||||||
 | 
					  // a structure defined in builtin_op_data.h
 | 
				
			||||||
 | 
					  void* builtin_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Custom initial data. This is the opaque data provided in the flatbuffer.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  const void* custom_initial_data;
 | 
				
			||||||
 | 
					  int custom_initial_data_size;
 | 
				
			||||||
 | 
					} TfLiteNode;
 | 
				
			||||||
 | 
					#endif  // TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Light-weight tensor struct for TF Micro runtime. Provides the minimal amount
 | 
				
			||||||
 | 
					// of information required for a kernel to run during TfLiteRegistration::Eval.
 | 
				
			||||||
 | 
					// TODO(b/160955687): Move this field into TF_LITE_STATIC_MEMORY when TFLM
 | 
				
			||||||
 | 
					// builds with this flag by default internally.
 | 
				
			||||||
 | 
					typedef struct TfLiteEvalTensor {
 | 
				
			||||||
 | 
					  // A union of data pointers. The appropriate type should be used for a typed
 | 
				
			||||||
 | 
					  // tensor based on `type`.
 | 
				
			||||||
 | 
					  TfLitePtrUnion data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // A pointer to a structure representing the dimensionality interpretation
 | 
				
			||||||
 | 
					  // that the buffer should have.
 | 
				
			||||||
 | 
					  TfLiteIntArray* dims;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The data type specification for data stored in `data`. This affects
 | 
				
			||||||
 | 
					  // what member of `data` union should be used.
 | 
				
			||||||
 | 
					  TfLiteType type;
 | 
				
			||||||
 | 
					} TfLiteEvalTensor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					// Free data memory of tensor `t`.
 | 
				
			||||||
 | 
					void TfLiteTensorDataFree(TfLiteTensor* t);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Free quantization data.
 | 
				
			||||||
 | 
					void TfLiteQuantizationFree(TfLiteQuantization* quantization);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Free sparsity parameters.
 | 
				
			||||||
 | 
					void TfLiteSparsityFree(TfLiteSparsity* sparsity);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Free memory of tensor `t`.
 | 
				
			||||||
 | 
					void TfLiteTensorFree(TfLiteTensor* t);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Set all of a tensor's fields (and free any previously allocated data).
 | 
				
			||||||
 | 
					void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
 | 
				
			||||||
 | 
					                       TfLiteQuantizationParams quantization, char* buffer,
 | 
				
			||||||
 | 
					                       size_t size, TfLiteAllocationType allocation_type,
 | 
				
			||||||
 | 
					                       const void* allocation, bool is_variable,
 | 
				
			||||||
 | 
					                       TfLiteTensor* tensor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
 | 
				
			||||||
 | 
					// types other than kTfLiteDynamic will be ignored.
 | 
				
			||||||
 | 
					void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
 | 
				
			||||||
 | 
					#endif  // TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Currently, TfLiteDelegateParams has to be allocated in a way that it's
 | 
				
			||||||
 | 
					// trivially destructable. It will be stored as `builtin_data` field in
 | 
				
			||||||
 | 
					// `TfLiteNode` of the delegate node.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// See also the `CreateDelegateParams` function in `interpreter.cc` details.
 | 
				
			||||||
 | 
					typedef struct TfLiteDelegateParams {
 | 
				
			||||||
 | 
					  struct TfLiteDelegate* delegate;
 | 
				
			||||||
 | 
					  TfLiteIntArray* nodes_to_replace;
 | 
				
			||||||
 | 
					  TfLiteIntArray* input_tensors;
 | 
				
			||||||
 | 
					  TfLiteIntArray* output_tensors;
 | 
				
			||||||
 | 
					} TfLiteDelegateParams;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct TfLiteContext {
 | 
				
			||||||
 | 
					  // Number of tensors in the context.
 | 
				
			||||||
 | 
					  size_t tensors_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The execution plan contains a list of the node indices in execution
 | 
				
			||||||
 | 
					  // order. execution_plan->size is the current number of nodes. And,
 | 
				
			||||||
 | 
					  // execution_plan->data[0] is the first node that needs to be run.
 | 
				
			||||||
 | 
					  // TfLiteDelegates can traverse the current execution plan by iterating
 | 
				
			||||||
 | 
					  // through each member of this array and using GetNodeAndRegistration() to
 | 
				
			||||||
 | 
					  // access details about a node. i.e.
 | 
				
			||||||
 | 
					  // TfLiteIntArray* execution_plan;
 | 
				
			||||||
 | 
					  // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
 | 
				
			||||||
 | 
					  // for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
 | 
				
			||||||
 | 
					  //    int node_index = execution_plan->data[exec_index];
 | 
				
			||||||
 | 
					  //    TfLiteNode* node;
 | 
				
			||||||
 | 
					  //    TfLiteRegistration* reg;
 | 
				
			||||||
 | 
					  //    context->GetNodeAndRegistration(context, node_index, &node, ®);
 | 
				
			||||||
 | 
					  // }
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context,
 | 
				
			||||||
 | 
					                                   TfLiteIntArray** execution_plan);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // An array of tensors in the interpreter context (of length `tensors_size`)
 | 
				
			||||||
 | 
					  TfLiteTensor* tensors;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // opaque full context ptr (an opaque c++ data structure)
 | 
				
			||||||
 | 
					  void* impl_;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Request memory pointer be resized. Updates dimensions on the tensor.
 | 
				
			||||||
 | 
					  // NOTE: ResizeTensor takes ownership of newSize.
 | 
				
			||||||
 | 
					  TfLiteStatus (*ResizeTensor)(struct TfLiteContext*, TfLiteTensor* tensor,
 | 
				
			||||||
 | 
					                               TfLiteIntArray* new_size);
 | 
				
			||||||
 | 
					  // Request that an error be reported with format string msg.
 | 
				
			||||||
 | 
					  void (*ReportError)(struct TfLiteContext*, const char* msg, ...);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Add `tensors_to_add` tensors, preserving pre-existing Tensor entries.  If
 | 
				
			||||||
 | 
					  // non-null, the value pointed to by `first_new_tensor_index` will be set to
 | 
				
			||||||
 | 
					  // the index of the first new tensor.
 | 
				
			||||||
 | 
					  TfLiteStatus (*AddTensors)(struct TfLiteContext*, int tensors_to_add,
 | 
				
			||||||
 | 
					                             int* first_new_tensor_index);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Get a Tensor node by node_index.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  TfLiteStatus (*GetNodeAndRegistration)(
 | 
				
			||||||
 | 
					      struct TfLiteContext*, int node_index, TfLiteNode** node,
 | 
				
			||||||
 | 
					      struct TfLiteRegistration** registration);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Replace ops with one or more stub delegate operations. This function
 | 
				
			||||||
 | 
					  // does not take ownership of `nodes_to_replace`.
 | 
				
			||||||
 | 
					  TfLiteStatus (*ReplaceNodeSubsetsWithDelegateKernels)(
 | 
				
			||||||
 | 
					      struct TfLiteContext*, struct TfLiteRegistration registration,
 | 
				
			||||||
 | 
					      const TfLiteIntArray* nodes_to_replace, struct TfLiteDelegate* delegate);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Number of threads that are recommended to subsystems like gemmlowp and
 | 
				
			||||||
 | 
					  // eigen.
 | 
				
			||||||
 | 
					  int recommended_num_threads;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Access external contexts by type.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  TfLiteExternalContext* (*GetExternalContext)(struct TfLiteContext*,
 | 
				
			||||||
 | 
					                                               TfLiteExternalContextType);
 | 
				
			||||||
 | 
					  // Set the value of a external context. Does not take ownership of the
 | 
				
			||||||
 | 
					  // pointer.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType,
 | 
				
			||||||
 | 
					                             TfLiteExternalContext*);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Flag for allowing float16 precision for FP32 calculation.
 | 
				
			||||||
 | 
					  // default: false.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental API and subject to change.
 | 
				
			||||||
 | 
					  bool allow_fp32_relax_to_fp16;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Pointer to the op-level profiler, if set; nullptr otherwise.
 | 
				
			||||||
 | 
					  void* profiler;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Allocate persistent buffer which has the same life time as the interpreter.
 | 
				
			||||||
 | 
					  // Returns nullptr on failure.
 | 
				
			||||||
 | 
					  // The memory is allocated from heap for TFL, and from tail in TFLM.
 | 
				
			||||||
 | 
					  // This method is only available in Init or Prepare stage.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Allocate a buffer which will be deallocated right after invoke phase.
 | 
				
			||||||
 | 
					  // The memory is allocated from heap in TFL, and from volatile arena in TFLM.
 | 
				
			||||||
 | 
					  // This method is only available in invoke stage.
 | 
				
			||||||
 | 
					  // NOTE: If possible use RequestScratchBufferInArena method to avoid memory
 | 
				
			||||||
 | 
					  // allocation during inference time.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  TfLiteStatus (*AllocateBufferForEval)(struct TfLiteContext* ctx, size_t bytes,
 | 
				
			||||||
 | 
					                                        void** ptr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Request a scratch buffer in the arena through static memory planning.
 | 
				
			||||||
 | 
					  // This method is only available in Prepare stage and the buffer is allocated
 | 
				
			||||||
 | 
					  // by the interpreter between Prepare and Eval stage. In Eval stage,
 | 
				
			||||||
 | 
					  // GetScratchBuffer API can be used to fetch the address.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  TfLiteStatus (*RequestScratchBufferInArena)(struct TfLiteContext* ctx,
 | 
				
			||||||
 | 
					                                              size_t bytes, int* buffer_idx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Get the scratch buffer pointer.
 | 
				
			||||||
 | 
					  // This method is only available in Eval stage.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  void* (*GetScratchBuffer)(struct TfLiteContext* ctx, int buffer_idx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Resize the memory pointer of the `tensor`. This method behaves the same as
 | 
				
			||||||
 | 
					  // `ResizeTensor`, except that it makes a copy of the shape array internally
 | 
				
			||||||
 | 
					  // so the shape array could be deallocated right afterwards.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  TfLiteStatus (*ResizeTensorExplicit)(struct TfLiteContext* ctx,
 | 
				
			||||||
 | 
					                                       TfLiteTensor* tensor, int dims,
 | 
				
			||||||
 | 
					                                       const int* shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // This method provides a preview of post-delegation partitioning. Each
 | 
				
			||||||
 | 
					  // TfLiteDelegateParams in the referenced array corresponds to one instance of
 | 
				
			||||||
 | 
					  // the delegate kernel.
 | 
				
			||||||
 | 
					  // Example usage:
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // TfLiteIntArray* nodes_to_replace = ...;
 | 
				
			||||||
 | 
					  // TfLiteDelegateParams* params_array;
 | 
				
			||||||
 | 
					  // int num_partitions = 0;
 | 
				
			||||||
 | 
					  // TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
 | 
				
			||||||
 | 
					  //    context, delegate, nodes_to_replace, ¶ms_array, &num_partitions));
 | 
				
			||||||
 | 
					  // for (int idx = 0; idx < num_partitions; idx++) {
 | 
				
			||||||
 | 
					  //    const auto& partition_params = params_array[idx];
 | 
				
			||||||
 | 
					  //    ...
 | 
				
			||||||
 | 
					  // }
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // NOTE: The context owns the memory referenced by partition_params_array. It
 | 
				
			||||||
 | 
					  // will be cleared with another call to PreviewDelegateParitioning, or after
 | 
				
			||||||
 | 
					  // TfLiteDelegateParams::Prepare returns.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  TfLiteStatus (*PreviewDelegatePartitioning)(
 | 
				
			||||||
 | 
					      struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
 | 
				
			||||||
 | 
					      TfLiteDelegateParams** partition_params_array, int* num_partitions);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Returns a TfLiteTensor struct for a given index.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  // WARNING: This method may not be available on all platforms.
 | 
				
			||||||
 | 
					  TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context,
 | 
				
			||||||
 | 
					                             int tensor_idx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Returns a TfLiteEvalTensor struct for a given index.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  // WARNING: This method may not be available on all platforms.
 | 
				
			||||||
 | 
					  TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
 | 
				
			||||||
 | 
					                                     int tensor_idx);
 | 
				
			||||||
 | 
					} TfLiteContext;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct TfLiteRegistration {
 | 
				
			||||||
 | 
					  // Initializes the op from serialized data.
 | 
				
			||||||
 | 
					  // If a built-in op:
 | 
				
			||||||
 | 
					  //   `buffer` is the op's params data (TfLiteLSTMParams*).
 | 
				
			||||||
 | 
					  //   `length` is zero.
 | 
				
			||||||
 | 
					  // If custom op:
 | 
				
			||||||
 | 
					  //   `buffer` is the op's `custom_options`.
 | 
				
			||||||
 | 
					  //   `length` is the size of the buffer.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // Returns a type-punned (i.e. void*) opaque data (e.g. a primitive pointer
 | 
				
			||||||
 | 
					  // or an instance of a struct).
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // The returned pointer will be stored with the node in the `user_data` field,
 | 
				
			||||||
 | 
					  // accessible within prepare and invoke functions below.
 | 
				
			||||||
 | 
					  // NOTE: if the data is already in the desired format, simply implement this
 | 
				
			||||||
 | 
					  // function to return `nullptr` and implement the free function to be a no-op.
 | 
				
			||||||
 | 
					  void* (*init)(TfLiteContext* context, const char* buffer, size_t length);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The pointer `buffer` is the data previously returned by an init invocation.
 | 
				
			||||||
 | 
					  void (*free)(TfLiteContext* context, void* buffer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // prepare is called when the inputs this node depends on have been resized.
 | 
				
			||||||
 | 
					  // context->ResizeTensor() can be called to request output tensors to be
 | 
				
			||||||
 | 
					  // resized.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // Returns kTfLiteOk on success.
 | 
				
			||||||
 | 
					  TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Execute the node (should read node->inputs and output to node->outputs).
 | 
				
			||||||
 | 
					  // Returns kTfLiteOk on success.
 | 
				
			||||||
 | 
					  TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // profiling_string is called during summarization of profiling information
 | 
				
			||||||
 | 
					  // in order to group executions together. Providing a value here will cause a
 | 
				
			||||||
 | 
					  // given op to appear multiple times is the profiling report. This is
 | 
				
			||||||
 | 
					  // particularly useful for custom ops that can perform significantly
 | 
				
			||||||
 | 
					  // different calculations depending on their `user-data`.
 | 
				
			||||||
 | 
					  const char* (*profiling_string)(const TfLiteContext* context,
 | 
				
			||||||
 | 
					                                  const TfLiteNode* node);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Builtin codes. If this kernel refers to a builtin this is the code
 | 
				
			||||||
 | 
					  // of the builtin. This is so we can do marshaling to other frameworks like
 | 
				
			||||||
 | 
					  // NN API.
 | 
				
			||||||
 | 
					  // Note: It is the responsibility of the registration binder to set this
 | 
				
			||||||
 | 
					  // properly.
 | 
				
			||||||
 | 
					  int32_t builtin_code;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Custom op name. If the op is a builtin, this will be null.
 | 
				
			||||||
 | 
					  // Note: It is the responsibility of the registration binder to set this
 | 
				
			||||||
 | 
					  // properly.
 | 
				
			||||||
 | 
					  // WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					  const char* custom_name;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The version of the op.
 | 
				
			||||||
 | 
					  // Note: It is the responsibility of the registration binder to set this
 | 
				
			||||||
 | 
					  // properly.
 | 
				
			||||||
 | 
					  int version;
 | 
				
			||||||
 | 
					} TfLiteRegistration;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// The flags used in `TfLiteDelegate`. Note that this is a bitmask, so the
 | 
				
			||||||
 | 
					// values should be 1, 2, 4, 8, ...etc.
 | 
				
			||||||
 | 
					typedef enum TfLiteDelegateFlags {
 | 
				
			||||||
 | 
					  kTfLiteDelegateFlagsNone = 0,
 | 
				
			||||||
 | 
					  // The flag is set if the delegate can handle dynamic sized tensors.
 | 
				
			||||||
 | 
					  // For example, the output shape of a `Resize` op with non-constant shape
 | 
				
			||||||
 | 
					  // can only be inferred when the op is invoked.
 | 
				
			||||||
 | 
					  // In this case, the Delegate is responsible for calling
 | 
				
			||||||
 | 
					  // `SetTensorToDynamic` to mark the tensor as a dynamic tensor, and calling
 | 
				
			||||||
 | 
					  // `ResizeTensor` when invoking the op.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // If the delegate isn't capable to handle dynamic tensors, this flag need
 | 
				
			||||||
 | 
					  // to be set to false.
 | 
				
			||||||
 | 
					  kTfLiteDelegateFlagsAllowDynamicTensors = 1,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // This flag can be used by delegates (that allow dynamic tensors) to ensure
 | 
				
			||||||
 | 
					  // applicable tensor shapes are automatically propagated in the case of tensor
 | 
				
			||||||
 | 
					  // resizing.
 | 
				
			||||||
 | 
					  // This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors
 | 
				
			||||||
 | 
					  // of a delegate kernel will have correct shapes before its Prepare() method
 | 
				
			||||||
 | 
					  // is called. The runtime leverages TFLite builtin ops in the original
 | 
				
			||||||
 | 
					  // execution plan to propagate shapes.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // A few points to note:
 | 
				
			||||||
 | 
					  // 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is
 | 
				
			||||||
 | 
					  // false, this one is redundant since the delegate kernels are re-initialized
 | 
				
			||||||
 | 
					  // every time tensors are resized.
 | 
				
			||||||
 | 
					  // 2. Enabling this flag adds some overhead to AllocateTensors(), since extra
 | 
				
			||||||
 | 
					  // work is required to prepare the original execution plan.
 | 
				
			||||||
 | 
					  // 3. This flag requires that the original execution plan only have ops with
 | 
				
			||||||
 | 
					  // valid registrations (and not 'dummy' custom ops like with Flex).
 | 
				
			||||||
 | 
					  // WARNING: This feature is experimental and subject to change.
 | 
				
			||||||
 | 
					  kTfLiteDelegateFlagsRequirePropagatedShapes = 2
 | 
				
			||||||
 | 
					} TfLiteDelegateFlags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// WARNING: This is an experimental interface that is subject to change.
 | 
				
			||||||
 | 
					typedef struct TfLiteDelegate {
 | 
				
			||||||
 | 
					  // Data that delegate needs to identify itself. This data is owned by the
 | 
				
			||||||
 | 
					  // delegate. The delegate is owned in the user code, so the delegate is
 | 
				
			||||||
 | 
					  // responsible for doing this when it is destroyed.
 | 
				
			||||||
 | 
					  void* data_;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the
 | 
				
			||||||
 | 
					  // delegate a view of the current graph through TfLiteContext*. It typically
 | 
				
			||||||
 | 
					  // will look at the nodes and call ReplaceNodeSubsetsWithDelegateKernels()
 | 
				
			||||||
 | 
					  // to ask the TensorFlow lite runtime to create macro-nodes to represent
 | 
				
			||||||
 | 
					  // delegated subgraphs of the original graph.
 | 
				
			||||||
 | 
					  TfLiteStatus (*Prepare)(TfLiteContext* context,
 | 
				
			||||||
 | 
					                          struct TfLiteDelegate* delegate);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Copy the data from delegate buffer handle into raw memory of the given
 | 
				
			||||||
 | 
					  // 'tensor'. Note that the delegate is allowed to allocate the raw bytes as
 | 
				
			||||||
 | 
					  // long as it follows the rules for kTfLiteDynamic tensors, in which case this
 | 
				
			||||||
 | 
					  // cannot be null.
 | 
				
			||||||
 | 
					  TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
 | 
				
			||||||
 | 
					                                       struct TfLiteDelegate* delegate,
 | 
				
			||||||
 | 
					                                       TfLiteBufferHandle buffer_handle,
 | 
				
			||||||
 | 
					                                       TfLiteTensor* tensor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Copy the data from raw memory of the given 'tensor' to delegate buffer
 | 
				
			||||||
 | 
					  // handle. This can be null if the delegate doesn't use its own buffer.
 | 
				
			||||||
 | 
					  TfLiteStatus (*CopyToBufferHandle)(TfLiteContext* context,
 | 
				
			||||||
 | 
					                                     struct TfLiteDelegate* delegate,
 | 
				
			||||||
 | 
					                                     TfLiteBufferHandle buffer_handle,
 | 
				
			||||||
 | 
					                                     TfLiteTensor* tensor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Free the Delegate Buffer Handle. Note: This only frees the handle, but
 | 
				
			||||||
 | 
					  // this doesn't release the underlying resource (e.g. textures). The
 | 
				
			||||||
 | 
					  // resources are either owned by application layer or the delegate.
 | 
				
			||||||
 | 
					  // This can be null if the delegate doesn't use its own buffer.
 | 
				
			||||||
 | 
					  void (*FreeBufferHandle)(TfLiteContext* context,
 | 
				
			||||||
 | 
					                           struct TfLiteDelegate* delegate,
 | 
				
			||||||
 | 
					                           TfLiteBufferHandle* handle);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Bitmask flags. See the comments in `TfLiteDelegateFlags`.
 | 
				
			||||||
 | 
					  int64_t flags;
 | 
				
			||||||
 | 
					} TfLiteDelegate;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Build a 'null' delegate, with all the fields properly set to their default
 | 
				
			||||||
 | 
					// values.
 | 
				
			||||||
 | 
					TfLiteDelegate TfLiteDelegateCreate();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef __cplusplus
 | 
				
			||||||
 | 
					}  // extern "C"
 | 
				
			||||||
 | 
					#endif  // __cplusplus
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_C_COMMON_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,38 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#include "tensorflow/lite/core/api/error_reporter.h"
 | 
				
			||||||
 | 
					#include <cstdarg>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int ErrorReporter::Report(const char* format, ...) {
 | 
				
			||||||
 | 
					  va_list args;
 | 
				
			||||||
 | 
					  va_start(args, format);
 | 
				
			||||||
 | 
					  int code = Report(format, args);
 | 
				
			||||||
 | 
					  va_end(args);
 | 
				
			||||||
 | 
					  return code;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO(aselle): Make the name of ReportError on context the same, so
 | 
				
			||||||
 | 
					// we can use the ensure functions w/o a context and w/ a reporter.
 | 
				
			||||||
 | 
					int ErrorReporter::ReportError(void*, const char* format, ...) {
 | 
				
			||||||
 | 
					  va_list args;
 | 
				
			||||||
 | 
					  va_start(args, format);
 | 
				
			||||||
 | 
					  int code = Report(format, args);
 | 
				
			||||||
 | 
					  va_end(args);
 | 
				
			||||||
 | 
					  return code;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,59 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cstdarg>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// A functor that reports error to supporting system. Invoked similar to
 | 
				
			||||||
 | 
					/// printf.
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// Usage:
 | 
				
			||||||
 | 
					///  ErrorReporter foo;
 | 
				
			||||||
 | 
					///  foo.Report("test %d", 5);
 | 
				
			||||||
 | 
					/// or
 | 
				
			||||||
 | 
					///  va_list args;
 | 
				
			||||||
 | 
					///  foo.Report("test %d", args); // where args is va_list
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// Subclass ErrorReporter to provide another reporting destination.
 | 
				
			||||||
 | 
					/// For example, if you have a GUI program, you might redirect to a buffer
 | 
				
			||||||
 | 
					/// that drives a GUI error log box.
 | 
				
			||||||
 | 
					class ErrorReporter {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  virtual ~ErrorReporter() {}
 | 
				
			||||||
 | 
					  virtual int Report(const char* format, va_list args) = 0;
 | 
				
			||||||
 | 
					  int Report(const char* format, ...);
 | 
				
			||||||
 | 
					  int ReportError(void*, const char* format, ...);
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// You should not make bare calls to the error reporter, instead use the
 | 
				
			||||||
 | 
					// TF_LITE_REPORT_ERROR macro, since this allows message strings to be
 | 
				
			||||||
 | 
					// stripped when the binary size has to be optimized. If you are looking to
 | 
				
			||||||
 | 
					// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and
 | 
				
			||||||
 | 
					// every call will be stubbed out, taking no memory.
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STRIP_ERROR_STRINGS
 | 
				
			||||||
 | 
					#define TF_LITE_REPORT_ERROR(reporter, ...)                             \
 | 
				
			||||||
 | 
					  do {                                                                  \
 | 
				
			||||||
 | 
					    static_cast<tflite::ErrorReporter*>(reporter)->Report(__VA_ARGS__); \
 | 
				
			||||||
 | 
					  } while (false)
 | 
				
			||||||
 | 
					#else  // TF_LITE_STRIP_ERROR_STRINGS
 | 
				
			||||||
 | 
					#define TF_LITE_REPORT_ERROR(reporter, ...)
 | 
				
			||||||
 | 
					#endif  // TF_LITE_STRIP_ERROR_STRINGS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
					@ -0,0 +1,301 @@
 | 
				
			||||||
 | 
					/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// These functions transform codes and data structures that are defined in the
 | 
				
			||||||
 | 
					// flatbuffer serialization format into in-memory values that are used by the
 | 
				
			||||||
 | 
					// runtime API and interpreter.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cstddef>
 | 
				
			||||||
 | 
					#include <new>
 | 
				
			||||||
 | 
					#include <type_traits>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/core/api/error_reporter.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/schema/schema_generated.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Interface class for builtin data allocations.
 | 
				
			||||||
 | 
					class BuiltinDataAllocator {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  virtual void* Allocate(size_t size, size_t alignment_hint) = 0;
 | 
				
			||||||
 | 
					  virtual void Deallocate(void* data) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Allocate a structure, but make sure it is a POD structure that doesn't
 | 
				
			||||||
 | 
					  // require constructors to run. The reason we do this, is that Interpreter's C
 | 
				
			||||||
 | 
					  // extension part will take ownership so destructors  will not be run during
 | 
				
			||||||
 | 
					  // deallocation.
 | 
				
			||||||
 | 
					  template <typename T>
 | 
				
			||||||
 | 
					  T* AllocatePOD() {
 | 
				
			||||||
 | 
					    // TODO(b/154346074): Change this to is_trivially_destructible when all
 | 
				
			||||||
 | 
					    // platform targets support that properly.
 | 
				
			||||||
 | 
					    static_assert(std::is_pod<T>::value, "Builtin data structure must be POD.");
 | 
				
			||||||
 | 
					    void* allocated_memory = this->Allocate(sizeof(T), alignof(T));
 | 
				
			||||||
 | 
					    return new (allocated_memory) T();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual ~BuiltinDataAllocator() {}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Parse the appropriate data out of the op.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// This handles builtin data explicitly as there are flatbuffer schemas.
 | 
				
			||||||
 | 
					// If it returns kTfLiteOk, it passes the data out with `builtin_data`. The
 | 
				
			||||||
 | 
					// calling function has to pass in an allocator object, and this allocator
 | 
				
			||||||
 | 
					// will be called to reserve space for the output data. If the calling
 | 
				
			||||||
 | 
					// function's allocator reserves memory on the heap, then it's the calling
 | 
				
			||||||
 | 
					// function's responsibility to free it.
 | 
				
			||||||
 | 
					// If it returns kTfLiteError, `builtin_data` will be `nullptr`.
 | 
				
			||||||
 | 
					TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
 | 
				
			||||||
 | 
					                         ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                         BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Converts the tensor data type used in the flat buffer to the representation
 | 
				
			||||||
 | 
					// used by the runtime.
 | 
				
			||||||
 | 
					TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
 | 
				
			||||||
 | 
					                               ErrorReporter* error_reporter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseAbs(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                         BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                         BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                       BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseCast(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                       BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseConcatenation(const Operator* op,
 | 
				
			||||||
 | 
					                                ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                                BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                                void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                         BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseCos(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseDepthwiseConv2D(const Operator* op,
 | 
				
			||||||
 | 
					                                  ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                                  BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                                  void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseDequantize(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                             BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                             void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseDiv(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                        BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseExp(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseFill(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                       BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseFloor(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                        BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseFloorDiv(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                           BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                           void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseFloorMod(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                           BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                           void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseFullyConnected(const Operator* op,
 | 
				
			||||||
 | 
					                                 ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                                 BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                                 void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseGreater(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                          BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseGreaterEqual(const Operator* op,
 | 
				
			||||||
 | 
					                               ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                               BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                               void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                            BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                            void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseL2Normalization(const Operator* op,
 | 
				
			||||||
 | 
					                                  ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                                  BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                                  void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseLess(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                       BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseLessEqual(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                            BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                            void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseLog(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseLogicalAnd(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                             BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                             void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseLogicalNot(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                             BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                             void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseLogicalOr(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                            BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                            void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseLogistic(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                           BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                           void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                          BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                          BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseNeg(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseNotEqual(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                           BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                           void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                       BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParsePad(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParsePadV2(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                        BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                       BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParsePow(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParsePrelu(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                        BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                           BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                           void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                          BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseRelu(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                       BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseRelu6(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                        BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                          BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseResizeBilinear(const Operator* op,
 | 
				
			||||||
 | 
					                                 ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                                 BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                                 void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseResizeNearestNeighbor(const Operator* op,
 | 
				
			||||||
 | 
					                                        ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                                        BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                                        void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                        BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                        BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                        BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseSin(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                          BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseSpaceToDepth(const Operator* op,
 | 
				
			||||||
 | 
					                               ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                               BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                               void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                        BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseSplitV(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                         BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                       BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                         BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseStridedSlice(const Operator* op,
 | 
				
			||||||
 | 
					                               ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                               BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                               void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                      BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                       BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseTanh(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                       BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseTransposeConv(const Operator* op,
 | 
				
			||||||
 | 
					                                ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                                BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                                void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                         BuiltinDataAllocator* allocator, void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ParseZerosLike(const Operator* op, ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                            BuiltinDataAllocator* allocator,
 | 
				
			||||||
 | 
					                            void** builtin_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,67 @@
 | 
				
			||||||
 | 
					/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/core/api/op_resolver.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/core/api/error_reporter.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/schema/schema_utils.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus GetRegistrationFromOpCode(
 | 
				
			||||||
 | 
					    const OperatorCode* opcode, const OpResolver& op_resolver,
 | 
				
			||||||
 | 
					    ErrorReporter* error_reporter, const TfLiteRegistration** registration) {
 | 
				
			||||||
 | 
					  TfLiteStatus status = kTfLiteOk;
 | 
				
			||||||
 | 
					  *registration = nullptr;
 | 
				
			||||||
 | 
					  auto builtin_code = GetBuiltinCode(opcode);
 | 
				
			||||||
 | 
					  int version = opcode->version();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (builtin_code > BuiltinOperator_MAX ||
 | 
				
			||||||
 | 
					      builtin_code < BuiltinOperator_MIN) {
 | 
				
			||||||
 | 
					    TF_LITE_REPORT_ERROR(
 | 
				
			||||||
 | 
					        error_reporter,
 | 
				
			||||||
 | 
					        "Op builtin_code out of range: %d. Are you using old TFLite binary "
 | 
				
			||||||
 | 
					        "with newer model?",
 | 
				
			||||||
 | 
					        builtin_code);
 | 
				
			||||||
 | 
					    status = kTfLiteError;
 | 
				
			||||||
 | 
					  } else if (builtin_code != BuiltinOperator_CUSTOM) {
 | 
				
			||||||
 | 
					    *registration = op_resolver.FindOp(builtin_code, version);
 | 
				
			||||||
 | 
					    if (*registration == nullptr) {
 | 
				
			||||||
 | 
					      TF_LITE_REPORT_ERROR(
 | 
				
			||||||
 | 
					          error_reporter,
 | 
				
			||||||
 | 
					          "Didn't find op for builtin opcode '%s' version '%d'\n",
 | 
				
			||||||
 | 
					          EnumNameBuiltinOperator(builtin_code), version);
 | 
				
			||||||
 | 
					      status = kTfLiteError;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else if (!opcode->custom_code()) {
 | 
				
			||||||
 | 
					    TF_LITE_REPORT_ERROR(
 | 
				
			||||||
 | 
					        error_reporter,
 | 
				
			||||||
 | 
					        "Operator with CUSTOM builtin_code has no custom_code.\n");
 | 
				
			||||||
 | 
					    status = kTfLiteError;
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    const char* name = opcode->custom_code()->c_str();
 | 
				
			||||||
 | 
					    *registration = op_resolver.FindOp(name, version);
 | 
				
			||||||
 | 
					    if (*registration == nullptr) {
 | 
				
			||||||
 | 
					      // Do not report error for unresolved custom op, we do the final check
 | 
				
			||||||
 | 
					      // while preparing ops.
 | 
				
			||||||
 | 
					      status = kTfLiteError;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return status;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,60 @@
 | 
				
			||||||
 | 
					/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <vector>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/core/api/error_reporter.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/schema/schema_generated.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Abstract interface that returns TfLiteRegistrations given op codes or custom
 | 
				
			||||||
 | 
					/// op names. This is the mechanism that ops being referenced in the flatbuffer
 | 
				
			||||||
 | 
					/// model are mapped to executable function pointers (TfLiteRegistrations).
 | 
				
			||||||
 | 
					class OpResolver {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  /// Finds the op registration for a builtin operator by enum code.
 | 
				
			||||||
 | 
					  virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
 | 
				
			||||||
 | 
					                                           int version) const = 0;
 | 
				
			||||||
 | 
					  /// Finds the op registration of a custom operator by op name.
 | 
				
			||||||
 | 
					  virtual const TfLiteRegistration* FindOp(const char* op,
 | 
				
			||||||
 | 
					                                           int version) const = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Returns optional delegates for resolving and handling ops in the flatbuffer
 | 
				
			||||||
 | 
					  // model. This may be used in addition to the standard TfLiteRegistration
 | 
				
			||||||
 | 
					  // lookup for graph resolution.
 | 
				
			||||||
 | 
					  using TfLiteDelegatePtrVector =
 | 
				
			||||||
 | 
					      std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
 | 
				
			||||||
 | 
					  virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const {
 | 
				
			||||||
 | 
					    return TfLiteDelegatePtrVector();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual ~OpResolver() {}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Handles the logic for converting between an OperatorCode structure extracted
 | 
				
			||||||
 | 
					// from a flatbuffer and information about a registered operator
 | 
				
			||||||
 | 
					// implementation.
 | 
				
			||||||
 | 
					TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
 | 
				
			||||||
 | 
					                                       const OpResolver& op_resolver,
 | 
				
			||||||
 | 
					                                       ErrorReporter* error_reporter,
 | 
				
			||||||
 | 
					                                       const TfLiteRegistration** registration);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,194 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_CORE_API_PROFILER_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_CORE_API_PROFILER_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cstdint>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// A simple utility for enabling profiled event tracing in TensorFlow Lite.
 | 
				
			||||||
 | 
					class Profiler {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  // As certain Profiler instance might be only interested in certain event
 | 
				
			||||||
 | 
					  // types, we define each event type value to allow a Profiler to use
 | 
				
			||||||
 | 
					  // bitmasking bitwise operations to determine whether an event should be
 | 
				
			||||||
 | 
					  // recorded or not.
 | 
				
			||||||
 | 
					  enum class EventType {
 | 
				
			||||||
 | 
					    // Default event type, the metadata field has no special significance.
 | 
				
			||||||
 | 
					    DEFAULT = 1,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // The event is an operator invocation and the event_metadata field is the
 | 
				
			||||||
 | 
					    // index of operator node.
 | 
				
			||||||
 | 
					    OPERATOR_INVOKE_EVENT = 2,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // The event is an invocation for an internal operator of a TFLite delegate.
 | 
				
			||||||
 | 
					    // The event_metadata field is the index of operator node that's specific to
 | 
				
			||||||
 | 
					    // the delegate.
 | 
				
			||||||
 | 
					    DELEGATE_OPERATOR_INVOKE_EVENT = 4,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // The event is a recording of runtime instrumentation such as the overall
 | 
				
			||||||
 | 
					    // TFLite runtime status, the TFLite delegate status (if a delegate
 | 
				
			||||||
 | 
					    // is applied), and the overall model inference latency etc.
 | 
				
			||||||
 | 
					    // Note, the delegate status and overall status are stored as separate
 | 
				
			||||||
 | 
					    // event_metadata fields. In particular, the delegate status is encoded
 | 
				
			||||||
 | 
					    // as DelegateStatus::full_status().
 | 
				
			||||||
 | 
					    GENERAL_RUNTIME_INSTRUMENTATION_EVENT = 8,
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual ~Profiler() {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Signals the beginning of an event and returns a handle to the profile
 | 
				
			||||||
 | 
					  // event. The `event_metadata1` and `event_metadata2` have different
 | 
				
			||||||
 | 
					  // interpretations based on the actual Profiler instance and the `event_type`.
 | 
				
			||||||
 | 
					  // For example, as for the 'SubgraphAwareProfiler' defined in
 | 
				
			||||||
 | 
					  // lite/core/subgraph.h, when the event_type is OPERATOR_INVOKE_EVENT,
 | 
				
			||||||
 | 
					  // `event_metadata1` represents the index of a TFLite node, and
 | 
				
			||||||
 | 
					  // `event_metadata2` represents the index of the subgraph that this event
 | 
				
			||||||
 | 
					  // comes from.
 | 
				
			||||||
 | 
					  virtual uint32_t BeginEvent(const char* tag, EventType event_type,
 | 
				
			||||||
 | 
					                              int64_t event_metadata1,
 | 
				
			||||||
 | 
					                              int64_t event_metadata2) = 0;
 | 
				
			||||||
 | 
					  // Similar w/ the above, but `event_metadata2` defaults to 0.
 | 
				
			||||||
 | 
					  uint32_t BeginEvent(const char* tag, EventType event_type,
 | 
				
			||||||
 | 
					                      int64_t event_metadata) {
 | 
				
			||||||
 | 
					    return BeginEvent(tag, event_type, event_metadata, /*event_metadata2*/ 0);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Signals an end to the specified profile event with 'event_metadata's, This
 | 
				
			||||||
 | 
					  // is useful when 'event_metadata's are not available when the event begins
 | 
				
			||||||
 | 
					  // or when one wants to overwrite the 'event_metadata's set at the beginning.
 | 
				
			||||||
 | 
					  virtual void EndEvent(uint32_t event_handle, int64_t event_metadata1,
 | 
				
			||||||
 | 
					                        int64_t event_metadata2) {}
 | 
				
			||||||
 | 
					  // Signals an end to the specified profile event.
 | 
				
			||||||
 | 
					  virtual void EndEvent(uint32_t event_handle) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Appends an event of type 'event_type' with 'tag' and 'event_metadata'
 | 
				
			||||||
 | 
					  // which started at 'start' and ended at 'end'
 | 
				
			||||||
 | 
					  // Note:
 | 
				
			||||||
 | 
					  // In cases were ProfileSimmarizer and tensorflow::StatsCalculator are used
 | 
				
			||||||
 | 
					  // they assume the value is in "usec", if in any case subclasses
 | 
				
			||||||
 | 
					  // didn't put usec, then the values are not meaningful.
 | 
				
			||||||
 | 
					  // TODO karimnosseir: Revisit and make the function more clear.
 | 
				
			||||||
 | 
					  void AddEvent(const char* tag, EventType event_type, uint64_t start,
 | 
				
			||||||
 | 
					                uint64_t end, int64_t event_metadata) {
 | 
				
			||||||
 | 
					    AddEvent(tag, event_type, start, end, event_metadata,
 | 
				
			||||||
 | 
					             /*event_metadata2*/ 0);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual void AddEvent(const char* tag, EventType event_type, uint64_t start,
 | 
				
			||||||
 | 
					                        uint64_t end, int64_t event_metadata1,
 | 
				
			||||||
 | 
					                        int64_t event_metadata2) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 protected:
 | 
				
			||||||
 | 
					  friend class ScopedProfile;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Adds a profile event to `profiler` that begins with the construction
 | 
				
			||||||
 | 
					// of the object and ends when the object goes out of scope.
 | 
				
			||||||
 | 
					// The lifetime of tag should be at least the lifetime of `profiler`.
 | 
				
			||||||
 | 
					// `profiler` may be null, in which case nothing is profiled.
 | 
				
			||||||
 | 
					class ScopedProfile {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  ScopedProfile(Profiler* profiler, const char* tag,
 | 
				
			||||||
 | 
					                Profiler::EventType event_type = Profiler::EventType::DEFAULT,
 | 
				
			||||||
 | 
					                int64_t event_metadata = 0)
 | 
				
			||||||
 | 
					      : profiler_(profiler), event_handle_(0) {
 | 
				
			||||||
 | 
					    if (profiler) {
 | 
				
			||||||
 | 
					      event_handle_ = profiler_->BeginEvent(tag, event_type, event_metadata);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ~ScopedProfile() {
 | 
				
			||||||
 | 
					    if (profiler_) {
 | 
				
			||||||
 | 
					      profiler_->EndEvent(event_handle_);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 protected:
 | 
				
			||||||
 | 
					  Profiler* profiler_;
 | 
				
			||||||
 | 
					  uint32_t event_handle_;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ScopedOperatorProfile : public ScopedProfile {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  ScopedOperatorProfile(Profiler* profiler, const char* tag, int node_index)
 | 
				
			||||||
 | 
					      : ScopedProfile(profiler, tag, Profiler::EventType::OPERATOR_INVOKE_EVENT,
 | 
				
			||||||
 | 
					                      static_cast<uint32_t>(node_index)) {}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ScopedDelegateOperatorProfile : public ScopedProfile {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  ScopedDelegateOperatorProfile(Profiler* profiler, const char* tag,
 | 
				
			||||||
 | 
					                                int node_index)
 | 
				
			||||||
 | 
					      : ScopedProfile(profiler, tag,
 | 
				
			||||||
 | 
					                      Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT,
 | 
				
			||||||
 | 
					                      static_cast<uint32_t>(node_index)) {}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ScopedRuntimeInstrumentationProfile : public ScopedProfile {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  ScopedRuntimeInstrumentationProfile(Profiler* profiler, const char* tag)
 | 
				
			||||||
 | 
					      : ScopedProfile(
 | 
				
			||||||
 | 
					            profiler, tag,
 | 
				
			||||||
 | 
					            Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, -1) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void set_runtime_status(int64_t delegate_status, int64_t interpreter_status) {
 | 
				
			||||||
 | 
					    if (profiler_) {
 | 
				
			||||||
 | 
					      delegate_status_ = delegate_status;
 | 
				
			||||||
 | 
					      interpreter_status_ = interpreter_status;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ~ScopedRuntimeInstrumentationProfile() {
 | 
				
			||||||
 | 
					    if (profiler_) {
 | 
				
			||||||
 | 
					      profiler_->EndEvent(event_handle_, delegate_status_, interpreter_status_);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 private:
 | 
				
			||||||
 | 
					  int64_t delegate_status_;
 | 
				
			||||||
 | 
					  int64_t interpreter_status_;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TFLITE_VARNAME_UNIQ_IMPL(name, ctr) name##ctr
 | 
				
			||||||
 | 
					#define TFLITE_VARNAME_UNIQ(name, ctr) TFLITE_VARNAME_UNIQ_IMPL(name, ctr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler, tag)          \
 | 
				
			||||||
 | 
					  tflite::ScopedProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
 | 
				
			||||||
 | 
					      (profiler), (tag))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE(profiler, tag, node_index)     \
 | 
				
			||||||
 | 
					  tflite::ScopedOperatorProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
 | 
				
			||||||
 | 
					      (profiler), (tag), (node_index))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TFLITE_SCOPED_DELEGATE_OPERATOR_PROFILE(profiler, tag, node_index) \
 | 
				
			||||||
 | 
					  tflite::ScopedDelegateOperatorProfile TFLITE_VARNAME_UNIQ(               \
 | 
				
			||||||
 | 
					      _profile_, __COUNTER__)((profiler), (tag), (node_index))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TFLITE_ADD_RUNTIME_INSTRUMENTATION_EVENT(                          \
 | 
				
			||||||
 | 
					    profiler, tag, event_metadata1, event_metadata2)                       \
 | 
				
			||||||
 | 
					  do {                                                                     \
 | 
				
			||||||
 | 
					    if (profiler) {                                                        \
 | 
				
			||||||
 | 
					      const auto handle = profiler->BeginEvent(                            \
 | 
				
			||||||
 | 
					          tag, Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, \
 | 
				
			||||||
 | 
					          event_metadata1, event_metadata2);                               \
 | 
				
			||||||
 | 
					      profiler->EndEvent(handle);                                          \
 | 
				
			||||||
 | 
					    }                                                                      \
 | 
				
			||||||
 | 
					  } while (false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_CORE_API_PROFILER_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,50 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/core/api/tensor_utils.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
 | 
				
			||||||
 | 
					  if (!tensor->is_variable) {
 | 
				
			||||||
 | 
					    return kTfLiteOk;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
 | 
				
			||||||
 | 
					  // to the value of the buffer.
 | 
				
			||||||
 | 
					  int value = 0;
 | 
				
			||||||
 | 
					  if (tensor->type == kTfLiteInt8) {
 | 
				
			||||||
 | 
					    value = tensor->params.zero_point;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // TODO(b/139446230): Provide a platform header to better handle these
 | 
				
			||||||
 | 
					  // specific scenarios.
 | 
				
			||||||
 | 
					#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
 | 
				
			||||||
 | 
					    defined(__i386) || defined(__x86__) || defined(__X86__) || \
 | 
				
			||||||
 | 
					    defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
 | 
				
			||||||
 | 
					  memset(tensor->data.raw, value, tensor->bytes);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					  char* raw_ptr = tensor->data.raw;
 | 
				
			||||||
 | 
					  for (size_t i = 0; i < tensor->bytes; ++i) {
 | 
				
			||||||
 | 
					    *raw_ptr = value;
 | 
				
			||||||
 | 
					    raw_ptr++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  return kTfLiteOk;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,28 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Resets a variable tensor to the default value.
 | 
				
			||||||
 | 
					TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
					@ -0,0 +1,112 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cstdint>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/op_macros.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_DCHECK
 | 
				
			||||||
 | 
					#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_DCHECK_EQ
 | 
				
			||||||
 | 
					#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_DCHECK_NE
 | 
				
			||||||
 | 
					#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_DCHECK_GE
 | 
				
			||||||
 | 
					#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_DCHECK_GT
 | 
				
			||||||
 | 
					#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_DCHECK_LE
 | 
				
			||||||
 | 
					#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_DCHECK_LT
 | 
				
			||||||
 | 
					#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
 | 
				
			||||||
 | 
					#ifndef TFLITE_CHECK
 | 
				
			||||||
 | 
					#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_CHECK_EQ
 | 
				
			||||||
 | 
					#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_CHECK_NE
 | 
				
			||||||
 | 
					#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_CHECK_GE
 | 
				
			||||||
 | 
					#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_CHECK_GT
 | 
				
			||||||
 | 
					#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_CHECK_LE
 | 
				
			||||||
 | 
					#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_CHECK_LT
 | 
				
			||||||
 | 
					#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TF_LITE_STATIC_MEMORY
 | 
				
			||||||
 | 
					// TODO(b/162019032): Consider removing these type-aliases.
 | 
				
			||||||
 | 
					using int8 = std::int8_t;
 | 
				
			||||||
 | 
					using uint8 = std::uint8_t;
 | 
				
			||||||
 | 
					using int16 = std::int16_t;
 | 
				
			||||||
 | 
					using uint16 = std::uint16_t;
 | 
				
			||||||
 | 
					using int32 = std::int32_t;
 | 
				
			||||||
 | 
					using uint32 = std::uint32_t;
 | 
				
			||||||
 | 
					#endif  // !defined(TF_LITE_STATIC_MEMORY)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TFLITE_DEPRECATED()
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Duplicated from absl/base/macros.h to avoid pulling in that library.
 | 
				
			||||||
 | 
					// Marks a deprecated class, struct, enum, function, method and variable
 | 
				
			||||||
 | 
					// declarations. The macro argument is used as a custom diagnostic message (e.g.
 | 
				
			||||||
 | 
					// suggestion of a better alternative).
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Example:
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//   class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
 | 
				
			||||||
 | 
					//   TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Every usage of a deprecated entity will trigger a warning when compiled with
 | 
				
			||||||
 | 
					// clang's `-Wdeprecated-declarations` option. This option is turned off by
 | 
				
			||||||
 | 
					// default, but the warnings will be reported by clang-tidy.
 | 
				
			||||||
 | 
					#if defined(__clang__) && __cplusplus >= 201103L
 | 
				
			||||||
 | 
					#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TFLITE_DEPRECATED
 | 
				
			||||||
 | 
					#define TFLITE_DEPRECATED(message)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,40 @@
 | 
				
			||||||
 | 
					/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cmath>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) ||                           \
 | 
				
			||||||
 | 
					    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
 | 
				
			||||||
 | 
					    defined(__ZEPHYR__)
 | 
				
			||||||
 | 
					#define TF_LITE_GLOBAL_STD_PREFIX
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define TF_LITE_GLOBAL_STD_PREFIX std
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \
 | 
				
			||||||
 | 
					  template <class T>                                  \
 | 
				
			||||||
 | 
					  inline T tf_name(const T x) {                       \
 | 
				
			||||||
 | 
					    return TF_LITE_GLOBAL_STD_PREFIX::std_name(x);    \
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,35 @@
 | 
				
			||||||
 | 
					/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cmath>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
 | 
				
			||||||
 | 
					inline float TfLiteMax(const float& x, const float& y) {
 | 
				
			||||||
 | 
					  return std::max(x, y);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					template <class T>
 | 
				
			||||||
 | 
					inline T TfLiteMax(const T& x, const T& y) {
 | 
				
			||||||
 | 
					  return std::fmax(x, y);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,35 @@
 | 
				
			||||||
 | 
					/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cmath>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
 | 
				
			||||||
 | 
					inline float TfLiteMin(const float& x, const float& y) {
 | 
				
			||||||
 | 
					  return std::min(x, y);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					template <class T>
 | 
				
			||||||
 | 
					inline T TfLiteMin(const T& x, const T& y) {
 | 
				
			||||||
 | 
					  return std::fmin(x, y);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,40 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(__ARM_NEON__) || defined(__ARM_NEON)
 | 
				
			||||||
 | 
					#define USE_NEON
 | 
				
			||||||
 | 
					#include <arm_neon.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
 | 
				
			||||||
 | 
					#define USE_NEON
 | 
				
			||||||
 | 
					#include "NEON_2_SSE.h"
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
 | 
				
			||||||
 | 
					// defined, PortableSomeFunc(args) otherwise.
 | 
				
			||||||
 | 
					#ifdef USE_NEON
 | 
				
			||||||
 | 
					// Always use Neon code
 | 
				
			||||||
 | 
					#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					// No NEON available: Use Portable code
 | 
				
			||||||
 | 
					#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // defined(USE_NEON)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,122 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <vector>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline RuntimeShape GetTensorShape(std::vector<int32_t> data) {
 | 
				
			||||||
 | 
					  return RuntimeShape(data.size(), data.data());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// A list of tensors in a format that can be used by kernels like split and
 | 
				
			||||||
 | 
					// concatenation.
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					class VectorOfTensors {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  // Build with the tensors in 'tensor_list'.
 | 
				
			||||||
 | 
					  VectorOfTensors(const TfLiteContext& context,
 | 
				
			||||||
 | 
					                  const TfLiteIntArray& tensor_list) {
 | 
				
			||||||
 | 
					    int num_tensors = tensor_list.size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    all_data_.reserve(num_tensors);
 | 
				
			||||||
 | 
					    all_shape_.reserve(num_tensors);
 | 
				
			||||||
 | 
					    all_shape_ptr_.reserve(num_tensors);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < num_tensors; ++i) {
 | 
				
			||||||
 | 
					      TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
 | 
				
			||||||
 | 
					      all_data_.push_back(GetTensorData<T>(t));
 | 
				
			||||||
 | 
					      all_shape_.push_back(GetTensorShape(t));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Taking the pointer from inside a std::vector is only OK if the vector is
 | 
				
			||||||
 | 
					    // never modified, so we populate all_shape in the previous loop and then we
 | 
				
			||||||
 | 
					    // are free to grab iterators here.
 | 
				
			||||||
 | 
					    for (int i = 0; i < num_tensors; ++i) {
 | 
				
			||||||
 | 
					      all_shape_ptr_.push_back(&all_shape_[i]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // Return a pointer to the data pointers of all tensors in the list. For
 | 
				
			||||||
 | 
					  // example:
 | 
				
			||||||
 | 
					  //   float* const* f = v.data();
 | 
				
			||||||
 | 
					  //   f[0][1] is the second element of the first tensor.
 | 
				
			||||||
 | 
					  T* const* data() const { return all_data_.data(); }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Return a pointer the shape pointers of all tensors in the list. For
 | 
				
			||||||
 | 
					  // example:
 | 
				
			||||||
 | 
					  //   const RuntimeShape* const* d = v.dims();
 | 
				
			||||||
 | 
					  //   dims[1] are the dimensions of the second tensor in the list.
 | 
				
			||||||
 | 
					  const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 private:
 | 
				
			||||||
 | 
					  std::vector<T*> all_data_;
 | 
				
			||||||
 | 
					  std::vector<RuntimeShape> all_shape_;
 | 
				
			||||||
 | 
					  std::vector<RuntimeShape*> all_shape_ptr_;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// A list of quantized tensors in a format that can be used by kernels like
 | 
				
			||||||
 | 
					// split and concatenation.
 | 
				
			||||||
 | 
					class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t> {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  // Build with the tensors in 'tensor_list'.
 | 
				
			||||||
 | 
					  VectorOfQuantizedTensors(const TfLiteContext& context,
 | 
				
			||||||
 | 
					                           const TfLiteIntArray& tensor_list)
 | 
				
			||||||
 | 
					      : VectorOfTensors<uint8_t>(context, tensor_list) {
 | 
				
			||||||
 | 
					    for (int i = 0; i < tensor_list.size; ++i) {
 | 
				
			||||||
 | 
					      TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
 | 
				
			||||||
 | 
					      zero_point_.push_back(t->params.zero_point);
 | 
				
			||||||
 | 
					      scale_.push_back(t->params.scale);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const float* scale() const { return scale_.data(); }
 | 
				
			||||||
 | 
					  const int32_t* zero_point() const { return zero_point_.data(); }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 private:
 | 
				
			||||||
 | 
					  std::vector<int32_t> zero_point_;
 | 
				
			||||||
 | 
					  std::vector<float> scale_;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Writes randomly accessed values from `input` sequentially into `output`.
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					class SequentialTensorWriter {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output) {
 | 
				
			||||||
 | 
					    input_data_ = GetTensorData<T>(input);
 | 
				
			||||||
 | 
					    output_ptr_ = GetTensorData<T>(output);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  SequentialTensorWriter(const T* input_data, T* output_data)
 | 
				
			||||||
 | 
					      : input_data_(input_data), output_ptr_(output_data) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void Write(int position) { *output_ptr_++ = input_data_[position]; }
 | 
				
			||||||
 | 
					  void WriteN(int position, int len) {
 | 
				
			||||||
 | 
					    memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
 | 
				
			||||||
 | 
					    output_ptr_ += len;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 private:
 | 
				
			||||||
 | 
					  const T* input_data_;
 | 
				
			||||||
 | 
					  T* output_ptr_;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,395 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/quantization_util.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <algorithm>
 | 
				
			||||||
 | 
					#include <cmath>
 | 
				
			||||||
 | 
					#include <limits>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/compatibility.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/cppmath.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace {
 | 
				
			||||||
 | 
					// These constants are used to manipulate the binary representation of doubles.
 | 
				
			||||||
 | 
					// Double-precision binary64 floating point format is:
 | 
				
			||||||
 | 
					// Bit |  63  |  62-52   |   51-0   |
 | 
				
			||||||
 | 
					//     | Sign | Exponent | Fraction |
 | 
				
			||||||
 | 
					// To avoid 64-bit integers as much as possible, I break this into high and
 | 
				
			||||||
 | 
					// low 32-bit chunks. High is:
 | 
				
			||||||
 | 
					// Bit |  31  |  30-20   |      19-0     |
 | 
				
			||||||
 | 
					//     | Sign | Exponent | High Fraction |
 | 
				
			||||||
 | 
					// Low is:
 | 
				
			||||||
 | 
					// Bit |     31-0     |
 | 
				
			||||||
 | 
					//     | Low Fraction |
 | 
				
			||||||
 | 
					// We then access the components through logical bit-wise operations to
 | 
				
			||||||
 | 
					// extract the parts needed, with the positions and masks derived from the
 | 
				
			||||||
 | 
					// layout shown above.
 | 
				
			||||||
 | 
					constexpr uint64_t kSignMask = 0x8000000000000000LL;
 | 
				
			||||||
 | 
					constexpr uint64_t kExponentMask = 0x7ff0000000000000LL;
 | 
				
			||||||
 | 
					constexpr int32_t kExponentShift = 52;
 | 
				
			||||||
 | 
					constexpr int32_t kExponentBias = 1023;
 | 
				
			||||||
 | 
					constexpr uint32_t kExponentIsBadNum = 0x7ff;
 | 
				
			||||||
 | 
					constexpr uint64_t kFractionMask = 0x000fffffffc00000LL;
 | 
				
			||||||
 | 
					constexpr uint32_t kFractionShift = 22;
 | 
				
			||||||
 | 
					constexpr uint32_t kFractionRoundingMask = 0x003fffff;
 | 
				
			||||||
 | 
					constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
 | 
				
			||||||
 | 
					}  // namespace
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
 | 
				
			||||||
 | 
					                        int* shift) {
 | 
				
			||||||
 | 
					  if (double_multiplier == 0.) {
 | 
				
			||||||
 | 
					    *quantized_multiplier = 0;
 | 
				
			||||||
 | 
					    *shift = 0;
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					#ifdef TFLITE_EMULATE_FLOAT
 | 
				
			||||||
 | 
					  // If we're trying to avoid the use of floating-point instructions (for
 | 
				
			||||||
 | 
					  // example on microcontrollers) then use an alternative implementation
 | 
				
			||||||
 | 
					  // that only requires integer and bitwise operations. To enable this, you
 | 
				
			||||||
 | 
					  // need to set the define during the build process for your platform.
 | 
				
			||||||
 | 
					  int64_t q_fixed = IntegerFrExp(double_multiplier, shift);
 | 
				
			||||||
 | 
					#else   // TFLITE_EMULATE_FLOAT
 | 
				
			||||||
 | 
					  const double q = std::frexp(double_multiplier, shift);
 | 
				
			||||||
 | 
					  auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1ll << 31)));
 | 
				
			||||||
 | 
					#endif  // TFLITE_EMULATE_FLOAT
 | 
				
			||||||
 | 
					  TFLITE_CHECK(q_fixed <= (1ll << 31));
 | 
				
			||||||
 | 
					  if (q_fixed == (1ll << 31)) {
 | 
				
			||||||
 | 
					    q_fixed /= 2;
 | 
				
			||||||
 | 
					    ++*shift;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  TFLITE_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
 | 
				
			||||||
 | 
					  // A shift amount smaller than -31 would cause all bits to be shifted out
 | 
				
			||||||
 | 
					  // and thus all results would be zero. We implement that instead with
 | 
				
			||||||
 | 
					  // q_fixed==0, so as to avoid hitting issues with right-shift
 | 
				
			||||||
 | 
					  // operations with shift amounts greater than 31. Note that this happens
 | 
				
			||||||
 | 
					  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
 | 
				
			||||||
 | 
					  // that we're effectively flushing tiny double_multiplier's to zero.
 | 
				
			||||||
 | 
					  // We could conceivably handle values in the range (roughly) [32, 63]
 | 
				
			||||||
 | 
					  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
 | 
				
			||||||
 | 
					  // the present handling is just doing 'flush denormals to zero'. We could
 | 
				
			||||||
 | 
					  // reconsider and actually generate nonzero denormals if a need arises.
 | 
				
			||||||
 | 
					  if (*shift < -31) {
 | 
				
			||||||
 | 
					    *shift = 0;
 | 
				
			||||||
 | 
					    q_fixed = 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  *quantized_multiplier = static_cast<int32_t>(q_fixed);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void QuantizeMultiplierGreaterThanOne(double double_multiplier,
 | 
				
			||||||
 | 
					                                      int32_t* quantized_multiplier,
 | 
				
			||||||
 | 
					                                      int* left_shift) {
 | 
				
			||||||
 | 
					  TFLITE_CHECK_GT(double_multiplier, 1.);
 | 
				
			||||||
 | 
					  QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift);
 | 
				
			||||||
 | 
					  TFLITE_CHECK_GE(*left_shift, 0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
 | 
				
			||||||
 | 
					                                         int32_t* quantized_multiplier,
 | 
				
			||||||
 | 
					                                         int* left_shift) {
 | 
				
			||||||
 | 
					  TFLITE_CHECK_LT(double_multiplier, 1.);
 | 
				
			||||||
 | 
					  TFLITE_CHECK_GT(double_multiplier, 0.);
 | 
				
			||||||
 | 
					  int shift;
 | 
				
			||||||
 | 
					  QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
 | 
				
			||||||
 | 
					  TFLITE_CHECK_LE(shift, 0);
 | 
				
			||||||
 | 
					  *left_shift = shift;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int64_t IntegerFrExp(double input, int* shift) {
 | 
				
			||||||
 | 
					  // Make sure our assumptions about the double layout hold.
 | 
				
			||||||
 | 
					  TFLITE_CHECK_EQ(8, sizeof(double));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // We want to access the bits of the input double value directly, which is
 | 
				
			||||||
 | 
					  // tricky to do safely, so use a union to handle the casting.
 | 
				
			||||||
 | 
					  union {
 | 
				
			||||||
 | 
					    double double_value;
 | 
				
			||||||
 | 
					    uint64_t double_as_uint;
 | 
				
			||||||
 | 
					  } cast_union;
 | 
				
			||||||
 | 
					  cast_union.double_value = input;
 | 
				
			||||||
 | 
					  const uint64_t u = cast_union.double_as_uint;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // If the bitfield is all zeros apart from the sign bit, this is a normalized
 | 
				
			||||||
 | 
					  // zero value, so return standard values for this special case.
 | 
				
			||||||
 | 
					  if ((u & ~kSignMask) == 0) {
 | 
				
			||||||
 | 
					    *shift = 0;
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Deal with NaNs and Infs, which are always indicated with a fixed pattern in
 | 
				
			||||||
 | 
					  // the exponent, and distinguished by whether the fractions are zero or
 | 
				
			||||||
 | 
					  // non-zero.
 | 
				
			||||||
 | 
					  const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift);
 | 
				
			||||||
 | 
					  if (exponent_part == kExponentIsBadNum) {
 | 
				
			||||||
 | 
					    *shift = std::numeric_limits<int>::max();
 | 
				
			||||||
 | 
					    if (u & kFractionMask) {
 | 
				
			||||||
 | 
					      // NaN, so just return zero (with the exponent set to INT_MAX).
 | 
				
			||||||
 | 
					      return 0;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      // Infinity, so return +/- INT_MAX.
 | 
				
			||||||
 | 
					      if (u & kSignMask) {
 | 
				
			||||||
 | 
					        return std::numeric_limits<int64_t>::min();
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        return std::numeric_limits<int64_t>::max();
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The shift is fairly easy to extract from the high bits of the double value,
 | 
				
			||||||
 | 
					  // just by masking it out and applying a bias. The std::frexp() implementation
 | 
				
			||||||
 | 
					  // always returns values between 0.5 and 1.0 though, whereas the exponent
 | 
				
			||||||
 | 
					  // assumes 1.0 to 2.0 is the standard range, so I add on one to match that
 | 
				
			||||||
 | 
					  // interface.
 | 
				
			||||||
 | 
					  *shift = (exponent_part - kExponentBias) + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // There's an implicit high bit in the double format definition, so make sure
 | 
				
			||||||
 | 
					  // we include that at the top, and then reconstruct the rest of the fractional
 | 
				
			||||||
 | 
					  // value from the remaining fragments.
 | 
				
			||||||
 | 
					  int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // We're cutting off some bits at the bottom, so to exactly match the standard
 | 
				
			||||||
 | 
					  // frexp implementation here we'll apply rounding by adding one to the least
 | 
				
			||||||
 | 
					  // significant bit of the result if the discarded portion is over half of the
 | 
				
			||||||
 | 
					  // maximum.
 | 
				
			||||||
 | 
					  if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) {
 | 
				
			||||||
 | 
					    fraction += 1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // Negate the fraction if the sign bit was set.
 | 
				
			||||||
 | 
					  if (u & kSignMask) {
 | 
				
			||||||
 | 
					    fraction *= -1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return fraction;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double DoubleFromFractionAndShift(int64_t fraction, int shift) {
 | 
				
			||||||
 | 
					  union {
 | 
				
			||||||
 | 
					    double double_value;
 | 
				
			||||||
 | 
					    uint64_t double_as_uint;
 | 
				
			||||||
 | 
					  } result;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Detect NaNs and infinities.
 | 
				
			||||||
 | 
					  if (shift == std::numeric_limits<int>::max()) {
 | 
				
			||||||
 | 
					    if (fraction == 0) {
 | 
				
			||||||
 | 
					      return std::numeric_limits<double>::quiet_NaN();
 | 
				
			||||||
 | 
					    } else if (fraction > 0) {
 | 
				
			||||||
 | 
					      return std::numeric_limits<double>::infinity();
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      return -std::numeric_limits<double>::infinity();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Return a normalized zero for a zero fraction.
 | 
				
			||||||
 | 
					  if (fraction == 0) {
 | 
				
			||||||
 | 
					    result.double_as_uint = 0;
 | 
				
			||||||
 | 
					    return result.double_value;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  bool is_negative = (fraction < 0);
 | 
				
			||||||
 | 
					  int64_t encoded_fraction = is_negative ? -fraction : fraction;
 | 
				
			||||||
 | 
					  int64_t encoded_shift = (shift - 1);
 | 
				
			||||||
 | 
					  while (encoded_fraction < 0x40000000) {
 | 
				
			||||||
 | 
					    encoded_fraction *= 2;
 | 
				
			||||||
 | 
					    encoded_shift -= 1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  while (encoded_fraction > 0x80000000) {
 | 
				
			||||||
 | 
					    encoded_fraction /= 2;
 | 
				
			||||||
 | 
					    encoded_shift += 1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  encoded_fraction -= 0x40000000;
 | 
				
			||||||
 | 
					  if (encoded_shift < -1022) {
 | 
				
			||||||
 | 
					    encoded_shift = -1023;
 | 
				
			||||||
 | 
					  } else if (encoded_shift > 1022) {
 | 
				
			||||||
 | 
					    encoded_shift = 1023;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  encoded_shift += kExponentBias;
 | 
				
			||||||
 | 
					  uint64_t encoded_sign = is_negative ? kSignMask : 0;
 | 
				
			||||||
 | 
					  result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) |
 | 
				
			||||||
 | 
					                          (encoded_fraction << kFractionShift);
 | 
				
			||||||
 | 
					  return result.double_value;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double IntegerDoubleMultiply(double a, double b) {
 | 
				
			||||||
 | 
					  int a_shift;
 | 
				
			||||||
 | 
					  const int64_t a_fraction = IntegerFrExp(a, &a_shift);
 | 
				
			||||||
 | 
					  int b_shift;
 | 
				
			||||||
 | 
					  const int64_t b_fraction = IntegerFrExp(b, &b_shift);
 | 
				
			||||||
 | 
					  // Detect NaNs and infinities.
 | 
				
			||||||
 | 
					  if (a_shift == std::numeric_limits<int>::max() ||
 | 
				
			||||||
 | 
					      (b_shift == std::numeric_limits<int>::max())) {
 | 
				
			||||||
 | 
					    return std::numeric_limits<double>::quiet_NaN();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  const int result_shift = a_shift + b_shift + 1;
 | 
				
			||||||
 | 
					  const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
 | 
				
			||||||
 | 
					  return DoubleFromFractionAndShift(result_fraction, result_shift);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int IntegerDoubleCompare(double a, double b) {
 | 
				
			||||||
 | 
					  int a_shift;
 | 
				
			||||||
 | 
					  const int64_t a_fraction = IntegerFrExp(a, &a_shift);
 | 
				
			||||||
 | 
					  int b_shift;
 | 
				
			||||||
 | 
					  const int64_t b_fraction = IntegerFrExp(b, &b_shift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Detect NaNs and infinities.
 | 
				
			||||||
 | 
					  if (a_shift == std::numeric_limits<int>::max() ||
 | 
				
			||||||
 | 
					      (b_shift == std::numeric_limits<int>::max())) {
 | 
				
			||||||
 | 
					    return 1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((a_fraction == 0) && (b_fraction < 0)) {
 | 
				
			||||||
 | 
					    return 1;
 | 
				
			||||||
 | 
					  } else if ((a_fraction < 0) && (b_fraction == 0)) {
 | 
				
			||||||
 | 
					    return -1;
 | 
				
			||||||
 | 
					  } else if (a_shift < b_shift) {
 | 
				
			||||||
 | 
					    return -1;
 | 
				
			||||||
 | 
					  } else if (a_shift > b_shift) {
 | 
				
			||||||
 | 
					    return 1;
 | 
				
			||||||
 | 
					  } else if (a_fraction < b_fraction) {
 | 
				
			||||||
 | 
					    return -1;
 | 
				
			||||||
 | 
					  } else if (a_fraction > b_fraction) {
 | 
				
			||||||
 | 
					    return 1;
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void PreprocessSoftmaxScaling(double beta, double input_scale,
 | 
				
			||||||
 | 
					                              int input_integer_bits,
 | 
				
			||||||
 | 
					                              int32_t* quantized_multiplier, int* left_shift) {
 | 
				
			||||||
 | 
					  // If the overall multiplier (input and beta) is large, then exp() of an
 | 
				
			||||||
 | 
					  // input difference of 1 scaled by this will be large.  In other words, we
 | 
				
			||||||
 | 
					  // can cap the multiplier and know that, when it is used, the output will be
 | 
				
			||||||
 | 
					  // (round to) zero wherever the input is not at the maximum value.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // If the overall scale is less than one, and input_integer_bits=0, then the
 | 
				
			||||||
 | 
					  // result is double equivalent of Q0.31 (actually with more precision). Thus
 | 
				
			||||||
 | 
					  // this generates a Q(input_integer_bits).(31-input_integer_bits)
 | 
				
			||||||
 | 
					  // representation.
 | 
				
			||||||
 | 
					#ifdef TFLITE_EMULATE_FLOAT
 | 
				
			||||||
 | 
					  const double input_beta = IntegerDoubleMultiply(beta, input_scale);
 | 
				
			||||||
 | 
					  int shift;
 | 
				
			||||||
 | 
					  int64_t fraction = IntegerFrExp(input_beta, &shift);
 | 
				
			||||||
 | 
					  shift += (31 - input_integer_bits);
 | 
				
			||||||
 | 
					  double input_beta_real_multiplier =
 | 
				
			||||||
 | 
					      DoubleFromFractionAndShift(fraction, shift);
 | 
				
			||||||
 | 
					  if (IntegerDoubleCompare(input_beta_real_multiplier, (1ll << 31) - 1.0) > 0) {
 | 
				
			||||||
 | 
					    input_beta_real_multiplier = (1ll << 31) - 1.0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					#else   // TFLITE_EMULATE_FLOAT
 | 
				
			||||||
 | 
					  const double input_beta_real_multiplier = std::min<double>(
 | 
				
			||||||
 | 
					      beta * input_scale * (1 << (31 - input_integer_bits)), (1ll << 31) - 1.0);
 | 
				
			||||||
 | 
					#endif  // TFLITE_EMULATE_FLOAT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
 | 
				
			||||||
 | 
					                                   quantized_multiplier, left_shift);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
 | 
				
			||||||
 | 
					                                    int input_integer_bits,
 | 
				
			||||||
 | 
					                                    int32_t* quantized_multiplier,
 | 
				
			||||||
 | 
					                                    int* left_shift,
 | 
				
			||||||
 | 
					                                    int32_t* reverse_scaling_divisor,
 | 
				
			||||||
 | 
					                                    int* reverse_scaling_left_shift) {
 | 
				
			||||||
 | 
					  PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits,
 | 
				
			||||||
 | 
					                           quantized_multiplier, left_shift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Also calculate what amounts to the inverse scaling factor for the input.
 | 
				
			||||||
 | 
					  const double real_reverse_scaling_divisor =
 | 
				
			||||||
 | 
					      (1 << (31 - *left_shift)) / static_cast<double>(*quantized_multiplier);
 | 
				
			||||||
 | 
					  tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor,
 | 
				
			||||||
 | 
					                                              reverse_scaling_divisor,
 | 
				
			||||||
 | 
					                                              reverse_scaling_left_shift);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int CalculateInputRadius(int input_integer_bits, int input_left_shift,
 | 
				
			||||||
 | 
					                         int total_signed_bits) {
 | 
				
			||||||
 | 
					#ifdef TFLITE_EMULATE_FLOAT
 | 
				
			||||||
 | 
					  int64_t result = (1 << input_integer_bits) - 1;
 | 
				
			||||||
 | 
					  result <<= (total_signed_bits - input_integer_bits);
 | 
				
			||||||
 | 
					  result >>= input_left_shift;
 | 
				
			||||||
 | 
					  return result;
 | 
				
			||||||
 | 
					#else   // TFLITE_EMULATE_FLOAT
 | 
				
			||||||
 | 
					  const double max_input_rescaled =
 | 
				
			||||||
 | 
					      1.0 * ((1 << input_integer_bits) - 1) *
 | 
				
			||||||
 | 
					      (1ll << (total_signed_bits - input_integer_bits)) /
 | 
				
			||||||
 | 
					      (1ll << input_left_shift);
 | 
				
			||||||
 | 
					  // Tighten bound using floor.  Suppose that we could use the exact value.
 | 
				
			||||||
 | 
					  // After scaling the difference, the result would be at the maximum.  Thus we
 | 
				
			||||||
 | 
					  // must ensure that our value has lower magnitude.
 | 
				
			||||||
 | 
					  return static_cast<int>(std::floor(max_input_rescaled));
 | 
				
			||||||
 | 
					#endif  // TFLITE_EMULATE_FLOAT
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void NudgeQuantizationRange(const float min, const float max,
 | 
				
			||||||
 | 
					                            const int quant_min, const int quant_max,
 | 
				
			||||||
 | 
					                            float* nudged_min, float* nudged_max,
 | 
				
			||||||
 | 
					                            float* nudged_scale) {
 | 
				
			||||||
 | 
					  // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
 | 
				
			||||||
 | 
					  const float quant_min_float = static_cast<float>(quant_min);
 | 
				
			||||||
 | 
					  const float quant_max_float = static_cast<float>(quant_max);
 | 
				
			||||||
 | 
					  *nudged_scale = (max - min) / (quant_max_float - quant_min_float);
 | 
				
			||||||
 | 
					  const float zero_point_from_min = quant_min_float - min / *nudged_scale;
 | 
				
			||||||
 | 
					  uint16_t nudged_zero_point;
 | 
				
			||||||
 | 
					  if (zero_point_from_min < quant_min_float) {
 | 
				
			||||||
 | 
					    nudged_zero_point = static_cast<uint16_t>(quant_min);
 | 
				
			||||||
 | 
					  } else if (zero_point_from_min > quant_max_float) {
 | 
				
			||||||
 | 
					    nudged_zero_point = static_cast<uint16_t>(quant_max);
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  *nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
 | 
				
			||||||
 | 
					  *nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
 | 
				
			||||||
 | 
					                       const float nudged_max, const float* input_data,
 | 
				
			||||||
 | 
					                       float* output_data, const float size) {
 | 
				
			||||||
 | 
					  // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
 | 
				
			||||||
 | 
					  const float inv_nudged_scale = 1.0f / nudged_scale;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < size; i++) {
 | 
				
			||||||
 | 
					    const float src_val = input_data[i];
 | 
				
			||||||
 | 
					    const float clamped = std::min(nudged_max, std::max(nudged_min, src_val));
 | 
				
			||||||
 | 
					    const float clamped_shifted = clamped - nudged_min;
 | 
				
			||||||
 | 
					    const float dst_val =
 | 
				
			||||||
 | 
					        TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale +
 | 
				
			||||||
 | 
					        nudged_min;
 | 
				
			||||||
 | 
					    output_data[i] = dst_val;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool CheckedLog2(const float x, int* log2_result) {
 | 
				
			||||||
 | 
					  // Using TfLiteRound instead of std::round and std::log instead of
 | 
				
			||||||
 | 
					  // std::log2 to work around these functions being missing in a toolchain
 | 
				
			||||||
 | 
					  // used in some TensorFlow tests as of May 2018.
 | 
				
			||||||
 | 
					  const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
 | 
				
			||||||
 | 
					  const float x_log2_rounded = TfLiteRound(x_log2);
 | 
				
			||||||
 | 
					  const float x_log2_fracpart = x_log2 - x_log2_rounded;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  *log2_result = static_cast<int>(x_log2_rounded);
 | 
				
			||||||
 | 
					  return std::abs(x_log2_fracpart) < 1e-3f;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void QuantizeMultiplierArray(const double* effective_scales, size_t size,
 | 
				
			||||||
 | 
					                             int32_t* effective_scale_significand,
 | 
				
			||||||
 | 
					                             int* effective_shift) {
 | 
				
			||||||
 | 
					  for (size_t i = 0; i < size; ++i) {
 | 
				
			||||||
 | 
					    QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i],
 | 
				
			||||||
 | 
					                       &effective_shift[i]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,292 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cmath>
 | 
				
			||||||
 | 
					#include <cstdint>
 | 
				
			||||||
 | 
					#include <limits>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/compatibility.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/cppmath.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Given the min and max values of a float array, return
 | 
				
			||||||
 | 
					// reasonable quantization parameters to use for this array.
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
 | 
				
			||||||
 | 
					                                            bool narrow_range) {
 | 
				
			||||||
 | 
					  const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
 | 
				
			||||||
 | 
					  const T qmax = std::numeric_limits<T>::max();
 | 
				
			||||||
 | 
					  const double qmin_double = qmin;
 | 
				
			||||||
 | 
					  const double qmax_double = qmax;
 | 
				
			||||||
 | 
					  // 0 should always be a representable value. Let's assume that the initial
 | 
				
			||||||
 | 
					  // min,max range contains 0.
 | 
				
			||||||
 | 
					  TFLITE_CHECK_LE(rmin, 0.);
 | 
				
			||||||
 | 
					  TFLITE_CHECK_GE(rmax, 0.);
 | 
				
			||||||
 | 
					  if (rmin == rmax) {
 | 
				
			||||||
 | 
					    // Special case where the min,max range is a point. Should be {0}.
 | 
				
			||||||
 | 
					    TFLITE_CHECK_EQ(rmin, 0.);
 | 
				
			||||||
 | 
					    TFLITE_CHECK_EQ(rmax, 0.);
 | 
				
			||||||
 | 
					    QuantizationParams quantization_params;
 | 
				
			||||||
 | 
					    quantization_params.zero_point = 0;
 | 
				
			||||||
 | 
					    quantization_params.scale = 0.;
 | 
				
			||||||
 | 
					    return quantization_params;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // General case.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // First determine the scale.
 | 
				
			||||||
 | 
					  const double scale = (rmax - rmin) / (qmax_double - qmin_double);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Zero-point computation.
 | 
				
			||||||
 | 
					  // First the initial floating-point computation. The zero-point can be
 | 
				
			||||||
 | 
					  // determined from solving an affine equation for any known pair
 | 
				
			||||||
 | 
					  // (real value, corresponding quantized value).
 | 
				
			||||||
 | 
					  // We know two such pairs: (rmin, qmin) and (rmax, qmax).
 | 
				
			||||||
 | 
					  // The arithmetic error on the zero point computed from either pair
 | 
				
			||||||
 | 
					  // will be roughly machine_epsilon * (sum of absolute values of terms)
 | 
				
			||||||
 | 
					  // so we want to use the variant that adds the smaller terms.
 | 
				
			||||||
 | 
					  const double zero_point_from_min = qmin_double - rmin / scale;
 | 
				
			||||||
 | 
					  const double zero_point_from_max = qmax_double - rmax / scale;
 | 
				
			||||||
 | 
					  const double zero_point_from_min_error =
 | 
				
			||||||
 | 
					      std::abs(qmin_double) + std::abs(rmin / scale);
 | 
				
			||||||
 | 
					  const double zero_point_from_max_error =
 | 
				
			||||||
 | 
					      std::abs(qmax_double) + std::abs(rmax / scale);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const double zero_point_double =
 | 
				
			||||||
 | 
					      zero_point_from_min_error < zero_point_from_max_error
 | 
				
			||||||
 | 
					          ? zero_point_from_min
 | 
				
			||||||
 | 
					          : zero_point_from_max;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Now we need to nudge the zero point to be an integer
 | 
				
			||||||
 | 
					  // (our zero points are integer, and this is motivated by the requirement
 | 
				
			||||||
 | 
					  // to be able to represent the real value "0" exactly as a quantized value,
 | 
				
			||||||
 | 
					  // which is required in multiple places, for example in Im2col with SAME
 | 
				
			||||||
 | 
					  // padding).
 | 
				
			||||||
 | 
					  T nudged_zero_point = 0;
 | 
				
			||||||
 | 
					  if (zero_point_double < qmin_double) {
 | 
				
			||||||
 | 
					    nudged_zero_point = qmin;
 | 
				
			||||||
 | 
					  } else if (zero_point_double > qmax_double) {
 | 
				
			||||||
 | 
					    nudged_zero_point = qmax;
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    nudged_zero_point = static_cast<T>(round(zero_point_double));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // The zero point should always be in the range of quantized value,
 | 
				
			||||||
 | 
					  // [qmin, qmax].
 | 
				
			||||||
 | 
					  TFLITE_CHECK_GE(nudged_zero_point, qmin);
 | 
				
			||||||
 | 
					  TFLITE_CHECK_LE(nudged_zero_point, qmax);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Finally, store the result nudged quantization params.
 | 
				
			||||||
 | 
					  QuantizationParams quantization_params;
 | 
				
			||||||
 | 
					  quantization_params.zero_point = nudged_zero_point;
 | 
				
			||||||
 | 
					  quantization_params.scale = scale;
 | 
				
			||||||
 | 
					  return quantization_params;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					QuantizationParams ChooseQuantizationParams(double rmin, double rmax) {
 | 
				
			||||||
 | 
					  return ChooseQuantizationParams<T>(rmin, rmax, false);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Converts a floating-point number to an integer. For all inputs x where
 | 
				
			||||||
 | 
					// static_cast<IntOut>(x) is legal according to the C++ standard, the result
 | 
				
			||||||
 | 
					// is identical to that cast (i.e. the result is x with its fractional part
 | 
				
			||||||
 | 
					// truncated whenever that is representable as IntOut).
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// static_cast would cause undefined behavior for the following cases, which
 | 
				
			||||||
 | 
					// have well-defined behavior for this function:
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//  1. If x is NaN, the result is zero.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//  2. If the truncated form of x is above the representable range of IntOut,
 | 
				
			||||||
 | 
					//     the result is std::numeric_limits<IntOut>::max().
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//  3. If the truncated form of x is below the representable range of IntOut,
 | 
				
			||||||
 | 
					//     the result is std::numeric_limits<IntOut>::min().
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Note that cases #2 and #3 cover infinities as well as finite numbers.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// The range of FloatIn must include the range of IntOut, otherwise
 | 
				
			||||||
 | 
					// the results are undefined.
 | 
				
			||||||
 | 
					// TODO(sfeuz): Replace by absl::SafeCast once available.
 | 
				
			||||||
 | 
					template <class IntOut, class FloatIn>
 | 
				
			||||||
 | 
					IntOut SafeCast(FloatIn x) {
 | 
				
			||||||
 | 
					  static_assert(!std::numeric_limits<FloatIn>::is_integer,
 | 
				
			||||||
 | 
					                "FloatIn is integer");
 | 
				
			||||||
 | 
					  static_assert(std::numeric_limits<IntOut>::is_integer,
 | 
				
			||||||
 | 
					                "IntOut is not integer");
 | 
				
			||||||
 | 
					  static_assert(std::numeric_limits<IntOut>::radix == 2, "IntOut is base 2");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Special case NaN, for which the logic below doesn't work.
 | 
				
			||||||
 | 
					  if (std::isnan(x)) {
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Negative values all clip to zero for unsigned results.
 | 
				
			||||||
 | 
					  if (!std::numeric_limits<IntOut>::is_signed && x < 0) {
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Handle infinities.
 | 
				
			||||||
 | 
					  if (std::isinf(x)) {
 | 
				
			||||||
 | 
					    return x < 0 ? std::numeric_limits<IntOut>::min()
 | 
				
			||||||
 | 
					                 : std::numeric_limits<IntOut>::max();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0),
 | 
				
			||||||
 | 
					  // unless x is zero in which case exp == 0. Note that this implies that the
 | 
				
			||||||
 | 
					  // magnitude of x is strictly less than 2^exp.
 | 
				
			||||||
 | 
					  int exp = 0;
 | 
				
			||||||
 | 
					  std::frexp(x, &exp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Let N be the number of non-sign bits in the representation of IntOut. If
 | 
				
			||||||
 | 
					  // the magnitude of x is strictly less than 2^N, the truncated version of x
 | 
				
			||||||
 | 
					  // is representable as IntOut. The only representable integer for which this
 | 
				
			||||||
 | 
					  // is not the case is kMin for signed types (i.e. -2^N), but that is covered
 | 
				
			||||||
 | 
					  // by the fall-through below.
 | 
				
			||||||
 | 
					  if (exp <= std::numeric_limits<IntOut>::digits) {
 | 
				
			||||||
 | 
					    return x;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Handle numbers with magnitude >= 2^N.
 | 
				
			||||||
 | 
					  return x < 0 ? std::numeric_limits<IntOut>::min()
 | 
				
			||||||
 | 
					               : std::numeric_limits<IntOut>::max();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Decompose a double multiplier into a Q0.31 int32 representation of its
 | 
				
			||||||
 | 
					// significand, and shift representation of NEGATIVE its exponent ---
 | 
				
			||||||
 | 
					// this is intended as a RIGHT-shift.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Restricted to the case where the multiplier < 1 (and non-negative).
 | 
				
			||||||
 | 
					void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
 | 
				
			||||||
 | 
					                                         int32_t* quantized_multiplier,
 | 
				
			||||||
 | 
					                                         int* left_shift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Decompose a double multiplier into a Q0.31 int32 representation of its
 | 
				
			||||||
 | 
					// significand, and shift representation of its exponent.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Restricted to the case where the multiplier > 1.
 | 
				
			||||||
 | 
					void QuantizeMultiplierGreaterThanOne(double double_multiplier,
 | 
				
			||||||
 | 
					                                      int32_t* quantized_multiplier,
 | 
				
			||||||
 | 
					                                      int* left_shift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Decompose a double multiplier into a Q0.31 int32 representation of its
 | 
				
			||||||
 | 
					// significand, and shift representation of its exponent.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Handles an arbitrary positive multiplier. The 'shift' output-value is
 | 
				
			||||||
 | 
					// basically the 'floating-point exponent' of the multiplier:
 | 
				
			||||||
 | 
					// Negative for a right-shift (when the multiplier is <1), positive for a
 | 
				
			||||||
 | 
					// left-shift (when the multiplier is >1)
 | 
				
			||||||
 | 
					void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
 | 
				
			||||||
 | 
					                        int* shift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Splits a double input value into a returned fraction, and a shift value from
 | 
				
			||||||
 | 
					// the exponent, using only bitwise and integer operations to support
 | 
				
			||||||
 | 
					// microcontrollers and other environments without floating-point support.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// This is designed to be a replacement for how std::frexp() is used within the
 | 
				
			||||||
 | 
					// QuantizeMultiplier() function, and so has a different signature than the
 | 
				
			||||||
 | 
					// standard version, returning a 64-bit integer rather than a double. This
 | 
				
			||||||
 | 
					// result has a maximum value of 1<<31, with the fraction expressed as a
 | 
				
			||||||
 | 
					// proportion of that maximum.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// std::frexp() returns NaNs and infinities unmodified, but since we're
 | 
				
			||||||
 | 
					// returning integers that can't represent those values, instead we return
 | 
				
			||||||
 | 
					// a shift of std::numeric_limits<int>::max() for all bad numbers, with an int64
 | 
				
			||||||
 | 
					// result of 0 for NaNs, std:numeric_limits<int64_t>::max() for +INFINITY, and
 | 
				
			||||||
 | 
					// std::numeric_limits<int64_t>::min() for -INFINITY. Denormalized inputs will
 | 
				
			||||||
 | 
					// result in return values that end up truncating some bits at the end,
 | 
				
			||||||
 | 
					// reflecting the loss of precision inherent in denormalization.
 | 
				
			||||||
 | 
					int64_t IntegerFrExp(double input, int* shift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Converts an integer fraction in the format produced by IntegerFrExp (where
 | 
				
			||||||
 | 
					// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an
 | 
				
			||||||
 | 
					// IEEE binary64 double format result. The implementation uses only integer and
 | 
				
			||||||
 | 
					// bitwise operators, so no floating point hardware support or emulation is
 | 
				
			||||||
 | 
					// needed. This is here so quantized operations can run non-time-critical
 | 
				
			||||||
 | 
					// preparation calculations on microcontrollers and other platforms without
 | 
				
			||||||
 | 
					// float support.
 | 
				
			||||||
 | 
					double DoubleFromFractionAndShift(int64_t fraction, int shift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Performs a multiplication of two numbers in double format, using only integer
 | 
				
			||||||
 | 
					// and bitwise instructions. This is aimed at supporting housekeeping functions
 | 
				
			||||||
 | 
					// for quantized operations on microcontrollers without floating-point hardware.
 | 
				
			||||||
 | 
					double IntegerDoubleMultiply(double a, double b);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is
 | 
				
			||||||
 | 
					// greater than b. It is implemented using only integer and logical instructions
 | 
				
			||||||
 | 
					// so that it can be easily run on microcontrollers for quantized operations.
 | 
				
			||||||
 | 
					int IntegerDoubleCompare(double a, double b);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This first creates a multiplier in a double equivalent of
 | 
				
			||||||
 | 
					// Q(input_integer_bits).(31-input_integer_bits) representation, with extra
 | 
				
			||||||
 | 
					// precision in the double's fractional bits.  It then splits the result into
 | 
				
			||||||
 | 
					// significand and exponent.
 | 
				
			||||||
 | 
					void PreprocessSoftmaxScaling(double beta, double input_scale,
 | 
				
			||||||
 | 
					                              int input_integer_bits,
 | 
				
			||||||
 | 
					                              int32_t* quantized_multiplier, int* left_shift);
 | 
				
			||||||
 | 
					// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
 | 
				
			||||||
 | 
					void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
 | 
				
			||||||
 | 
					                                    int input_integer_bits,
 | 
				
			||||||
 | 
					                                    int32_t* quantized_multiplier,
 | 
				
			||||||
 | 
					                                    int* left_shift,
 | 
				
			||||||
 | 
					                                    int32_t* reverse_scaling_divisor,
 | 
				
			||||||
 | 
					                                    int* reverse_scaling_left_shift);
 | 
				
			||||||
 | 
					// Calculate the largest input that will result in a within-bounds intermediate
 | 
				
			||||||
 | 
					// result within MultiplyByQuantizedMultiplierGreaterThanOne.  In other words,
 | 
				
			||||||
 | 
					// it must not overflow before we reduce the value by multiplication by the
 | 
				
			||||||
 | 
					// input multiplier.  The negative radius is used as the minimum difference in
 | 
				
			||||||
 | 
					// Softmax.
 | 
				
			||||||
 | 
					int CalculateInputRadius(int input_integer_bits, int input_left_shift,
 | 
				
			||||||
 | 
					                         int total_signed_bits = 31);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Nudges a min/max quantization range to ensure zero is zero.
 | 
				
			||||||
 | 
					// Gymnastics with nudged zero point is to ensure that real zero maps to
 | 
				
			||||||
 | 
					// an integer, which is required for e.g. zero-padding in convolutional layers.
 | 
				
			||||||
 | 
					// Outputs nudged_min, nudged_max, nudged_scale.
 | 
				
			||||||
 | 
					void NudgeQuantizationRange(const float min, const float max,
 | 
				
			||||||
 | 
					                            const int quant_min, const int quant_max,
 | 
				
			||||||
 | 
					                            float* nudged_min, float* nudged_max,
 | 
				
			||||||
 | 
					                            float* nudged_scale);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Fake quantizes (quantizes and dequantizes) input_data using the scale,
 | 
				
			||||||
 | 
					// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code
 | 
				
			||||||
 | 
					// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor.
 | 
				
			||||||
 | 
					void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
 | 
				
			||||||
 | 
					                       const float nudged_max, const float* input_data,
 | 
				
			||||||
 | 
					                       float* output_data, const float size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// If x is approximately a power of two (with any positive or negative
 | 
				
			||||||
 | 
					// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise
 | 
				
			||||||
 | 
					// returns false.
 | 
				
			||||||
 | 
					bool CheckedLog2(const float x, int* log2_result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Decomposes an array of double multipliers into a Q0.31 int32 representation
 | 
				
			||||||
 | 
					// of its significand, and shift representation of its exponent.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Handles an arbitrary multiplier. The 'shift' output-value is
 | 
				
			||||||
 | 
					// basically the 'floating-point exponent' of the multiplier:
 | 
				
			||||||
 | 
					// Negative for a right-shift (when the multiplier is <1), positive for a
 | 
				
			||||||
 | 
					// left-shift (when the multiplier is >1)
 | 
				
			||||||
 | 
					void QuantizeMultiplierArray(const double* effective_scales, size_t size,
 | 
				
			||||||
 | 
					                             int32_t* effective_scale_significand,
 | 
				
			||||||
 | 
					                             int* effective_shift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,446 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "fixedpoint/fixedpoint.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void Add(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input1_shape, const T* input1_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& input2_shape, const T* input2_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& output_shape, T* output_data) {
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingElementsSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; ++i) {
 | 
				
			||||||
 | 
					    output_data[i] = ActivationFunctionWithMinMax(
 | 
				
			||||||
 | 
					        input1_data[i] + input2_data[i], params.quantized_activation_min,
 | 
				
			||||||
 | 
					        params.quantized_activation_max);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Add(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input1_shape, const float* input1_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& input2_shape, const float* input2_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& output_shape, float* output_data) {
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingElementsSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; i++) {
 | 
				
			||||||
 | 
					    auto x = input1_data[i] + input2_data[i];
 | 
				
			||||||
 | 
					    output_data[i] = ActivationFunctionWithMinMax(
 | 
				
			||||||
 | 
					        x, params.float_activation_min, params.float_activation_max);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Element-wise add that can often be used for inner loop of broadcast add as
 | 
				
			||||||
 | 
					// well as the non-broadcast add.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This function is used for 8-bit as well as for 16-bit, but the accumulator
 | 
				
			||||||
 | 
					// is 32-bit for both cases. The overflow does not happen due to the
 | 
				
			||||||
 | 
					// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void AddElementwise(int size, const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                           const T* input1_data, const T* input2_data,
 | 
				
			||||||
 | 
					                           T* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < size; ++i) {
 | 
				
			||||||
 | 
					    const int32_t input1_val = params.input1_offset + input1_data[i];
 | 
				
			||||||
 | 
					    const int32_t input2_val = params.input2_offset + input2_data[i];
 | 
				
			||||||
 | 
					    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
 | 
				
			||||||
 | 
					    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
 | 
				
			||||||
 | 
					    const int32_t scaled_input1_val =
 | 
				
			||||||
 | 
					        MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					            shifted_input1_val, params.input1_multiplier, params.input1_shift);
 | 
				
			||||||
 | 
					    const int32_t scaled_input2_val =
 | 
				
			||||||
 | 
					        MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					            shifted_input2_val, params.input2_multiplier, params.input2_shift);
 | 
				
			||||||
 | 
					    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
 | 
				
			||||||
 | 
					    const int32_t raw_output =
 | 
				
			||||||
 | 
					        MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					            raw_sum, params.output_multiplier, params.output_shift) +
 | 
				
			||||||
 | 
					        params.output_offset;
 | 
				
			||||||
 | 
					    const int32_t clamped_output =
 | 
				
			||||||
 | 
					        std::min(params.quantized_activation_max,
 | 
				
			||||||
 | 
					                 std::max(params.quantized_activation_min, raw_output));
 | 
				
			||||||
 | 
					    output_data[i] = static_cast<T>(clamped_output);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Scalar-broadcast add that can be used for inner loop of more general
 | 
				
			||||||
 | 
					// broadcast add, so that, for example, scalar-broadcast with batch will still
 | 
				
			||||||
 | 
					// be fast.
 | 
				
			||||||
 | 
					inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                               uint8_t input1_data, const uint8_t* input2_data,
 | 
				
			||||||
 | 
					                               uint8_t* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GT(params.input1_offset, -256);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GT(params.input2_offset, -256);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LT(params.input1_offset, 256);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LT(params.input2_offset, 256);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int32_t input1_val = params.input1_offset + input1_data;
 | 
				
			||||||
 | 
					  const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
 | 
				
			||||||
 | 
					  const int32_t scaled_input1_val =
 | 
				
			||||||
 | 
					      MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					          shifted_input1_val, params.input1_multiplier, params.input1_shift);
 | 
				
			||||||
 | 
					  for (int i = 0; i < size; ++i) {
 | 
				
			||||||
 | 
					    const int32_t input2_val = params.input2_offset + input2_data[i];
 | 
				
			||||||
 | 
					    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
 | 
				
			||||||
 | 
					    const int32_t scaled_input2_val =
 | 
				
			||||||
 | 
					        MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					            shifted_input2_val, params.input2_multiplier, params.input2_shift);
 | 
				
			||||||
 | 
					    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
 | 
				
			||||||
 | 
					    const int32_t raw_output =
 | 
				
			||||||
 | 
					        MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					            raw_sum, params.output_multiplier, params.output_shift) +
 | 
				
			||||||
 | 
					        params.output_offset;
 | 
				
			||||||
 | 
					    const int32_t clamped_output =
 | 
				
			||||||
 | 
					        std::min(params.quantized_activation_max,
 | 
				
			||||||
 | 
					                 std::max(params.quantized_activation_min, raw_output));
 | 
				
			||||||
 | 
					    output_data[i] = static_cast<uint8_t>(clamped_output);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Add(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input1_shape, const uint8_t* input1_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& input2_shape, const uint8_t* input2_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& output_shape, uint8_t* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingElementsSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GT(params.input1_offset, -256);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GT(params.input2_offset, -256);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LT(params.input1_offset, 256);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LT(params.input2_offset, 256);
 | 
				
			||||||
 | 
					  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void AddGeneralParamScale(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                                 const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					                                 const int16_t* input1_data,
 | 
				
			||||||
 | 
					                                 const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					                                 const int16_t* input2_data,
 | 
				
			||||||
 | 
					                                 const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                                 int16_t* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingElementsSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int max_value = std::numeric_limits<int16_t>::max();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GT(params.input1_offset, -max_value);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GT(params.input2_offset, -max_value);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LT(params.input1_offset, max_value);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LT(params.input2_offset, max_value);
 | 
				
			||||||
 | 
					  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Add(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input1_shape, const int16_t* input1_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& input2_shape, const int16_t* input2_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& output_shape, int16_t* output_data,
 | 
				
			||||||
 | 
					                bool pot_scale = true) {
 | 
				
			||||||
 | 
					  if (!pot_scale) {
 | 
				
			||||||
 | 
					    AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
 | 
				
			||||||
 | 
					                         input2_data, output_shape, output_data);
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int input1_shift = params.input1_shift;
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingElementsSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					  const int16_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int16_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(input1_shift, 0);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.input2_shift, 0);
 | 
				
			||||||
 | 
					  const int16_t* not_shift_input =
 | 
				
			||||||
 | 
					      input1_shift == 0 ? input1_data : input2_data;
 | 
				
			||||||
 | 
					  const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
 | 
				
			||||||
 | 
					  const int input_right_shift =
 | 
				
			||||||
 | 
					      input1_shift == 0 ? -params.input2_shift : -input1_shift;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; i++) {
 | 
				
			||||||
 | 
					    // F0 uses 0 integer bits, range [-1, 1].
 | 
				
			||||||
 | 
					    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
 | 
				
			||||||
 | 
					    F0 scaled_input = F0::FromRaw(
 | 
				
			||||||
 | 
					        gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
 | 
				
			||||||
 | 
					    F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
 | 
				
			||||||
 | 
					    const int16_t raw_output = result.raw();
 | 
				
			||||||
 | 
					    const int16_t clamped_output = std::min(
 | 
				
			||||||
 | 
					        output_activation_max, std::max(output_activation_min, raw_output));
 | 
				
			||||||
 | 
					    output_data[i] = clamped_output;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                               const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					                               const float* input1_data,
 | 
				
			||||||
 | 
					                               const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					                               const float* input2_data,
 | 
				
			||||||
 | 
					                               const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                               float* output_data) {
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
 | 
				
			||||||
 | 
					                                      &desc2);
 | 
				
			||||||
 | 
					  const RuntimeShape extended_output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // In Tensorflow, the dimensions are canonically named (batch_number, row,
 | 
				
			||||||
 | 
					  // col, channel), with extents (batches, height, width, depth), with the
 | 
				
			||||||
 | 
					  // trailing dimension changing most rapidly (channels has the smallest stride,
 | 
				
			||||||
 | 
					  // typically 1 element).
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // In generated C code, we store arrays with the dimensions reversed. The
 | 
				
			||||||
 | 
					  // first dimension has smallest stride.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // We name our variables by their Tensorflow convention, but generate C code
 | 
				
			||||||
 | 
					  // nesting loops such that the innermost loop has the smallest stride for the
 | 
				
			||||||
 | 
					  // best cache behavior.
 | 
				
			||||||
 | 
					  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          output_data[Offset(extended_output_shape, b, y, x, c)] =
 | 
				
			||||||
 | 
					              ActivationFunctionWithMinMax(
 | 
				
			||||||
 | 
					                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
 | 
				
			||||||
 | 
					                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
 | 
				
			||||||
 | 
					                  params.float_activation_min, params.float_activation_max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                               const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					                               const int32_t* input1_data,
 | 
				
			||||||
 | 
					                               const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					                               const int32_t* input2_data,
 | 
				
			||||||
 | 
					                               const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                               int32_t* output_data) {
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
 | 
				
			||||||
 | 
					                                      &desc2);
 | 
				
			||||||
 | 
					  const RuntimeShape extended_output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // In Tensorflow, the dimensions are canonically named (batch_number, row,
 | 
				
			||||||
 | 
					  // col, channel), with extents (batches, height, width, depth), with the
 | 
				
			||||||
 | 
					  // trailing dimension changing most rapidly (channels has the smallest stride,
 | 
				
			||||||
 | 
					  // typically 1 element).
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // In generated C code, we store arrays with the dimensions reversed. The
 | 
				
			||||||
 | 
					  // first dimension has smallest stride.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // We name our variables by their Tensorflow convention, but generate C code
 | 
				
			||||||
 | 
					  // nesting loops such that the innermost loop has the smallest stride for the
 | 
				
			||||||
 | 
					  // best cache behavior.
 | 
				
			||||||
 | 
					  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          output_data[Offset(extended_output_shape, b, y, x, c)] =
 | 
				
			||||||
 | 
					              ActivationFunctionWithMinMax(
 | 
				
			||||||
 | 
					                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
 | 
				
			||||||
 | 
					                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
 | 
				
			||||||
 | 
					                  params.quantized_activation_min,
 | 
				
			||||||
 | 
					                  params.quantized_activation_max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This function is used for 8-bit as well as for 16-bit, but the accumulator
 | 
				
			||||||
 | 
					// is 32-bit for both cases. The overflow does not happen due to the
 | 
				
			||||||
 | 
					// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void BroadcastAdd4DSlow(
 | 
				
			||||||
 | 
					    const ArithmeticParams& params, const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					    const T* input1_data, const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
 | 
				
			||||||
 | 
					                                      &desc2);
 | 
				
			||||||
 | 
					  const RuntimeShape extended_output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // In Tensorflow, the dimensions are canonically named (batch_number, row,
 | 
				
			||||||
 | 
					  // col, channel), with extents (batches, height, width, depth), with the
 | 
				
			||||||
 | 
					  // trailing dimension changing most rapidly (channels has the smallest stride,
 | 
				
			||||||
 | 
					  // typically 1 element).
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // In generated C code, we store arrays with the dimensions reversed. The
 | 
				
			||||||
 | 
					  // first dimension has smallest stride.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // We name our variables by their Tensorflow convention, but generate C code
 | 
				
			||||||
 | 
					  // nesting loops such that the innermost loop has the smallest stride for the
 | 
				
			||||||
 | 
					  // best cache behavior.
 | 
				
			||||||
 | 
					  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          const int32_t input1_val =
 | 
				
			||||||
 | 
					              params.input1_offset +
 | 
				
			||||||
 | 
					              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
 | 
				
			||||||
 | 
					          const int32_t input2_val =
 | 
				
			||||||
 | 
					              params.input2_offset +
 | 
				
			||||||
 | 
					              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
 | 
				
			||||||
 | 
					          const int32_t shifted_input1_val =
 | 
				
			||||||
 | 
					              input1_val * (1 << params.left_shift);
 | 
				
			||||||
 | 
					          const int32_t shifted_input2_val =
 | 
				
			||||||
 | 
					              input2_val * (1 << params.left_shift);
 | 
				
			||||||
 | 
					          const int32_t scaled_input1_val =
 | 
				
			||||||
 | 
					              MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					                  shifted_input1_val, params.input1_multiplier,
 | 
				
			||||||
 | 
					                  params.input1_shift);
 | 
				
			||||||
 | 
					          const int32_t scaled_input2_val =
 | 
				
			||||||
 | 
					              MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					                  shifted_input2_val, params.input2_multiplier,
 | 
				
			||||||
 | 
					                  params.input2_shift);
 | 
				
			||||||
 | 
					          const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
 | 
				
			||||||
 | 
					          const int32_t raw_output =
 | 
				
			||||||
 | 
					              MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					                  raw_sum, params.output_multiplier, params.output_shift) +
 | 
				
			||||||
 | 
					              params.output_offset;
 | 
				
			||||||
 | 
					          const int32_t clamped_output =
 | 
				
			||||||
 | 
					              std::min(params.quantized_activation_max,
 | 
				
			||||||
 | 
					                       std::max(params.quantized_activation_min, raw_output));
 | 
				
			||||||
 | 
					          output_data[Offset(extended_output_shape, b, y, x, c)] =
 | 
				
			||||||
 | 
					              static_cast<T>(clamped_output);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
 | 
				
			||||||
 | 
					                                 const RuntimeShape& unswitched_input1_shape,
 | 
				
			||||||
 | 
					                                 const uint8_t* unswitched_input1_data,
 | 
				
			||||||
 | 
					                                 const RuntimeShape& unswitched_input2_shape,
 | 
				
			||||||
 | 
					                                 const uint8_t* unswitched_input2_data,
 | 
				
			||||||
 | 
					                                 const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                                 uint8_t* output_data) {
 | 
				
			||||||
 | 
					  ArithmeticParams switched_params = unswitched_params;
 | 
				
			||||||
 | 
					  switched_params.input1_offset = unswitched_params.input2_offset;
 | 
				
			||||||
 | 
					  switched_params.input1_multiplier = unswitched_params.input2_multiplier;
 | 
				
			||||||
 | 
					  switched_params.input1_shift = unswitched_params.input2_shift;
 | 
				
			||||||
 | 
					  switched_params.input2_offset = unswitched_params.input1_offset;
 | 
				
			||||||
 | 
					  switched_params.input2_multiplier = unswitched_params.input1_multiplier;
 | 
				
			||||||
 | 
					  switched_params.input2_shift = unswitched_params.input1_shift;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const bool use_unswitched =
 | 
				
			||||||
 | 
					      unswitched_params.broadcast_category ==
 | 
				
			||||||
 | 
					      tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const ArithmeticParams& params =
 | 
				
			||||||
 | 
					      use_unswitched ? unswitched_params : switched_params;
 | 
				
			||||||
 | 
					  const uint8_t* input1_data =
 | 
				
			||||||
 | 
					      use_unswitched ? unswitched_input1_data : unswitched_input2_data;
 | 
				
			||||||
 | 
					  const uint8_t* input2_data =
 | 
				
			||||||
 | 
					      use_unswitched ? unswitched_input2_data : unswitched_input1_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Fivefold nested loops. The second input resets its position for each
 | 
				
			||||||
 | 
					  // iteration of the second loop. The first input resets its position at the
 | 
				
			||||||
 | 
					  // beginning of the fourth loop. The innermost loop is an elementwise add of
 | 
				
			||||||
 | 
					  // sections of the arrays.
 | 
				
			||||||
 | 
					  uint8_t* output_data_ptr = output_data;
 | 
				
			||||||
 | 
					  const uint8_t* input1_data_ptr = input1_data;
 | 
				
			||||||
 | 
					  const uint8_t* input2_data_reset = input2_data;
 | 
				
			||||||
 | 
					  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
 | 
				
			||||||
 | 
					  // between input shapes. y3 for input 1 is always broadcast, and so the
 | 
				
			||||||
 | 
					  // dimension there is 1, whereas optionally y1 might be broadcast for input 2.
 | 
				
			||||||
 | 
					  // Put another way,
 | 
				
			||||||
 | 
					  // input1.shape.FlatSize = y0 * y1 * y2 * y4,
 | 
				
			||||||
 | 
					  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
 | 
				
			||||||
 | 
					  int y0 = params.broadcast_shape[0];
 | 
				
			||||||
 | 
					  int y1 = params.broadcast_shape[1];
 | 
				
			||||||
 | 
					  int y2 = params.broadcast_shape[2];
 | 
				
			||||||
 | 
					  int y3 = params.broadcast_shape[3];
 | 
				
			||||||
 | 
					  int y4 = params.broadcast_shape[4];
 | 
				
			||||||
 | 
					  if (y4 > 1) {
 | 
				
			||||||
 | 
					    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
 | 
				
			||||||
 | 
					    // dimension.
 | 
				
			||||||
 | 
					    for (int i0 = 0; i0 < y0; ++i0) {
 | 
				
			||||||
 | 
					      const uint8_t* input2_data_ptr;
 | 
				
			||||||
 | 
					      for (int i1 = 0; i1 < y1; ++i1) {
 | 
				
			||||||
 | 
					        input2_data_ptr = input2_data_reset;
 | 
				
			||||||
 | 
					        for (int i2 = 0; i2 < y2; ++i2) {
 | 
				
			||||||
 | 
					          for (int i3 = 0; i3 < y3; ++i3) {
 | 
				
			||||||
 | 
					            AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
 | 
				
			||||||
 | 
					                           output_data_ptr);
 | 
				
			||||||
 | 
					            input2_data_ptr += y4;
 | 
				
			||||||
 | 
					            output_data_ptr += y4;
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          // We have broadcast y4 of input1 data y3 times, and now move on.
 | 
				
			||||||
 | 
					          input1_data_ptr += y4;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
 | 
				
			||||||
 | 
					      input2_data_reset = input2_data_ptr;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    // Special case of y4 == 1, in which the innermost loop is a single element
 | 
				
			||||||
 | 
					    // and can be combined with the next (y3) as an inner broadcast.
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    // Note that this handles the case of pure scalar broadcast when
 | 
				
			||||||
 | 
					    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
 | 
				
			||||||
 | 
					    // broadcast with batch (as y2 > 1).
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    // NOTE The process is the same as the above general case except simplified
 | 
				
			||||||
 | 
					    // for y4 == 1 and the loop over y3 is contained within the
 | 
				
			||||||
 | 
					    // AddScalarBroadcast function.
 | 
				
			||||||
 | 
					    for (int i0 = 0; i0 < y0; ++i0) {
 | 
				
			||||||
 | 
					      const uint8_t* input2_data_ptr;
 | 
				
			||||||
 | 
					      for (int i1 = 0; i1 < y1; ++i1) {
 | 
				
			||||||
 | 
					        input2_data_ptr = input2_data_reset;
 | 
				
			||||||
 | 
					        for (int i2 = 0; i2 < y2; ++i2) {
 | 
				
			||||||
 | 
					          AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
 | 
				
			||||||
 | 
					                             output_data_ptr);
 | 
				
			||||||
 | 
					          input2_data_ptr += y3;
 | 
				
			||||||
 | 
					          output_data_ptr += y3;
 | 
				
			||||||
 | 
					          input1_data_ptr += 1;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      input2_data_reset = input2_data_ptr;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,68 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T1, typename T2, typename T3, typename Cmp>
 | 
				
			||||||
 | 
					void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
 | 
				
			||||||
 | 
					               const T3* input2_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					               T2* output_data, const Cmp& cmp) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
 | 
				
			||||||
 | 
					                   output_shape.DimensionsCount());
 | 
				
			||||||
 | 
					  int axis = input2_data[0];
 | 
				
			||||||
 | 
					  if (axis < 0) {
 | 
				
			||||||
 | 
					    axis += input1_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  const int axis_size = input1_shape.Dims(axis);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int outer_size = 1;
 | 
				
			||||||
 | 
					  for (int i = 0; i < axis; ++i) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
 | 
				
			||||||
 | 
					    outer_size *= input1_shape.Dims(i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int inner_size = 1;
 | 
				
			||||||
 | 
					  const int dims_count = input1_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  for (int i = axis + 1; i < dims_count; ++i) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
 | 
				
			||||||
 | 
					    inner_size *= input1_shape.Dims(i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  for (int outer = 0; outer < outer_size; ++outer) {
 | 
				
			||||||
 | 
					    for (int inner = 0; inner < inner_size; ++inner) {
 | 
				
			||||||
 | 
					      auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
 | 
				
			||||||
 | 
					      T2 min_max_index = 0;
 | 
				
			||||||
 | 
					      for (int i = 1; i < axis_size; ++i) {
 | 
				
			||||||
 | 
					        const auto& curr_value =
 | 
				
			||||||
 | 
					            input1_data[(outer * axis_size + i) * inner_size + inner];
 | 
				
			||||||
 | 
					        if (cmp(curr_value, min_max_value)) {
 | 
				
			||||||
 | 
					          min_max_value = curr_value;
 | 
				
			||||||
 | 
					          min_max_index = static_cast<T2>(i);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      output_data[outer * inner_size + inner] = min_max_index;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,80 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/compatibility.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Also appears to duplicate MinimumMaximum.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// R: Result type. T1: Input 1 type. T2: Input 2 type.
 | 
				
			||||||
 | 
					template <typename R, typename T1, typename T2>
 | 
				
			||||||
 | 
					inline void BroadcastBinaryFunction4DSlow(
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_input1_shape, const T1* input1_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_input2_shape, const T2* input2_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_output_shape, R* output_data,
 | 
				
			||||||
 | 
					    R (*func)(T1, T2)) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const RuntimeShape output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, unextended_output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
 | 
				
			||||||
 | 
					                                      unextended_input2_shape, &desc1, &desc2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int b = 0; b < output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          auto out_idx = Offset(output_shape, b, y, x, c);
 | 
				
			||||||
 | 
					          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
 | 
				
			||||||
 | 
					          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
 | 
				
			||||||
 | 
					          auto in1_val = input1_data[in1_idx];
 | 
				
			||||||
 | 
					          auto in2_val = input2_data[in2_idx];
 | 
				
			||||||
 | 
					          output_data[out_idx] = func(in1_val, in2_val);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// R: Result type. T1: Input 1 type. T2: Input 2 type.
 | 
				
			||||||
 | 
					template <typename R, typename T1, typename T2>
 | 
				
			||||||
 | 
					inline void BinaryFunction(const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					                           const T1* input1_data,
 | 
				
			||||||
 | 
					                           const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					                           const T2* input2_data,
 | 
				
			||||||
 | 
					                           const RuntimeShape& output_shape, R* output_data,
 | 
				
			||||||
 | 
					                           R (*func)(T1, T2)) {
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingFlatSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; ++i) {
 | 
				
			||||||
 | 
					    output_data[i] = func(input1_data[i], input2_data[i]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,37 @@
 | 
				
			||||||
 | 
					/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cmath>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Ceil(const RuntimeShape& input_shape, const float* input_data,
 | 
				
			||||||
 | 
					                 const RuntimeShape& output_shape, float* output_data) {
 | 
				
			||||||
 | 
					  const int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; ++i) {
 | 
				
			||||||
 | 
					    output_data[i] = std::ceil(input_data[i]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,280 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline bool EqualFn(T lhs, T rhs) {
 | 
				
			||||||
 | 
					  return lhs == rhs;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline bool NotEqualFn(T lhs, T rhs) {
 | 
				
			||||||
 | 
					  return lhs != rhs;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline bool GreaterFn(T lhs, T rhs) {
 | 
				
			||||||
 | 
					  return lhs > rhs;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline bool GreaterEqualFn(T lhs, T rhs) {
 | 
				
			||||||
 | 
					  return lhs >= rhs;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline bool LessFn(T lhs, T rhs) {
 | 
				
			||||||
 | 
					  return lhs < rhs;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline bool LessEqualFn(T lhs, T rhs) {
 | 
				
			||||||
 | 
					  return lhs <= rhs;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					using ComparisonFn = bool (*)(T, T);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T, ComparisonFn<T> F>
 | 
				
			||||||
 | 
					inline void ComparisonImpl(
 | 
				
			||||||
 | 
					    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					    const T* input1_data, const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
 | 
				
			||||||
 | 
					  const int64_t flatsize =
 | 
				
			||||||
 | 
					      MatchingFlatSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					  for (int64_t i = 0; i < flatsize; ++i) {
 | 
				
			||||||
 | 
					    output_data[i] = F(input1_data[i], input2_data[i]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <ComparisonFn<float> F>
 | 
				
			||||||
 | 
					inline void Comparison(const ComparisonParams& op_params,
 | 
				
			||||||
 | 
					                       const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					                       const float* input1_data,
 | 
				
			||||||
 | 
					                       const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					                       const float* input2_data,
 | 
				
			||||||
 | 
					                       const RuntimeShape& output_shape, bool* output_data) {
 | 
				
			||||||
 | 
					  ComparisonImpl<float, F>(op_params, input1_shape, input1_data, input2_shape,
 | 
				
			||||||
 | 
					                           input2_data, output_shape, output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T, ComparisonFn<int32_t> F>
 | 
				
			||||||
 | 
					inline void ComparisonWithScaling(
 | 
				
			||||||
 | 
					    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					    const T* input1_data, const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
 | 
				
			||||||
 | 
					  int left_shift = op_params.left_shift;
 | 
				
			||||||
 | 
					  int32_t input1_offset = op_params.input1_offset;
 | 
				
			||||||
 | 
					  int32_t input1_multiplier = op_params.input1_multiplier;
 | 
				
			||||||
 | 
					  int input1_shift = op_params.input1_shift;
 | 
				
			||||||
 | 
					  int32_t input2_offset = op_params.input2_offset;
 | 
				
			||||||
 | 
					  int32_t input2_multiplier = op_params.input2_multiplier;
 | 
				
			||||||
 | 
					  int input2_shift = op_params.input2_shift;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int64_t flatsize =
 | 
				
			||||||
 | 
					      MatchingFlatSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					  for (int64_t i = 0; i < flatsize; ++i) {
 | 
				
			||||||
 | 
					    const int32_t input1_val = input1_offset + input1_data[i];
 | 
				
			||||||
 | 
					    const int32_t input2_val = input2_offset + input2_data[i];
 | 
				
			||||||
 | 
					    const int32_t shifted_input1_val = input1_val * (1 << left_shift);
 | 
				
			||||||
 | 
					    const int32_t shifted_input2_val = input2_val * (1 << left_shift);
 | 
				
			||||||
 | 
					    const int32_t scaled_input1_val =
 | 
				
			||||||
 | 
					        MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					            shifted_input1_val, input1_multiplier, input1_shift);
 | 
				
			||||||
 | 
					    const int32_t scaled_input2_val =
 | 
				
			||||||
 | 
					        MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					            shifted_input2_val, input2_multiplier, input2_shift);
 | 
				
			||||||
 | 
					    output_data[i] = F(scaled_input1_val, scaled_input2_val);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct BroadcastComparison4DSlowCommon {
 | 
				
			||||||
 | 
					  const RuntimeShape output_shape;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess(
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_input1_shape,
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_input2_shape,
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_output_shape) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
 | 
				
			||||||
 | 
					                                      unextended_input2_shape, &desc1, &desc2);
 | 
				
			||||||
 | 
					  return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1,
 | 
				
			||||||
 | 
					          desc2};
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T, ComparisonFn<T> F>
 | 
				
			||||||
 | 
					inline void BroadcastComparison4DSlowImpl(
 | 
				
			||||||
 | 
					    const ComparisonParams& op_params,
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_input1_shape, const T* input1_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_input2_shape, const T* input2_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_output_shape, bool* output_data) {
 | 
				
			||||||
 | 
					  const BroadcastComparison4DSlowCommon dims =
 | 
				
			||||||
 | 
					      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
 | 
				
			||||||
 | 
					                                          unextended_input2_shape,
 | 
				
			||||||
 | 
					                                          unextended_output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          output_data[Offset(dims.output_shape, b, y, x, c)] =
 | 
				
			||||||
 | 
					              F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)],
 | 
				
			||||||
 | 
					                input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <ComparisonFn<float> F>
 | 
				
			||||||
 | 
					inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
 | 
				
			||||||
 | 
					                                      const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					                                      const float* input1_data,
 | 
				
			||||||
 | 
					                                      const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					                                      const float* input2_data,
 | 
				
			||||||
 | 
					                                      const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                                      bool* output_data) {
 | 
				
			||||||
 | 
					  BroadcastComparison4DSlowImpl<float, F>(op_params, input1_shape, input1_data,
 | 
				
			||||||
 | 
					                                          input2_shape, input2_data,
 | 
				
			||||||
 | 
					                                          output_shape, output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T, ComparisonFn<int32_t> F>
 | 
				
			||||||
 | 
					inline void BroadcastComparison4DSlowWithScaling(
 | 
				
			||||||
 | 
					    const ComparisonParams& op_params,
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_input1_shape, const T* input1_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_input2_shape, const T* input2_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& unextended_output_shape, bool* output_data) {
 | 
				
			||||||
 | 
					  const BroadcastComparison4DSlowCommon dims =
 | 
				
			||||||
 | 
					      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
 | 
				
			||||||
 | 
					                                          unextended_input2_shape,
 | 
				
			||||||
 | 
					                                          unextended_output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int left_shift = op_params.left_shift;
 | 
				
			||||||
 | 
					  int32_t input1_offset = op_params.input1_offset;
 | 
				
			||||||
 | 
					  int32_t input1_multiplier = op_params.input1_multiplier;
 | 
				
			||||||
 | 
					  int input1_shift = op_params.input1_shift;
 | 
				
			||||||
 | 
					  int32_t input2_offset = op_params.input2_offset;
 | 
				
			||||||
 | 
					  int32_t input2_multiplier = op_params.input2_multiplier;
 | 
				
			||||||
 | 
					  int input2_shift = op_params.input2_shift;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          const int32_t input1_val =
 | 
				
			||||||
 | 
					              input1_offset +
 | 
				
			||||||
 | 
					              input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
 | 
				
			||||||
 | 
					          const int32_t input2_val =
 | 
				
			||||||
 | 
					              input2_offset +
 | 
				
			||||||
 | 
					              input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
 | 
				
			||||||
 | 
					          const int32_t shifted_input1_val = input1_val * (1 << left_shift);
 | 
				
			||||||
 | 
					          const int32_t shifted_input2_val = input2_val * (1 << left_shift);
 | 
				
			||||||
 | 
					          const int32_t scaled_input1_val =
 | 
				
			||||||
 | 
					              MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					                  shifted_input1_val, input1_multiplier, input1_shift);
 | 
				
			||||||
 | 
					          const int32_t scaled_input2_val =
 | 
				
			||||||
 | 
					              MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					                  shifted_input2_val, input2_multiplier, input2_shift);
 | 
				
			||||||
 | 
					          output_data[Offset(dims.output_shape, b, y, x, c)] =
 | 
				
			||||||
 | 
					              F(scaled_input1_val, scaled_input2_val);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define TFLITE_COMPARISON_OP(name)                                             \
 | 
				
			||||||
 | 
					  inline void name(const ComparisonParams& op_params,                          \
 | 
				
			||||||
 | 
					                   const RuntimeShape& input1_shape, const float* input1_data, \
 | 
				
			||||||
 | 
					                   const RuntimeShape& input2_shape, const float* input2_data, \
 | 
				
			||||||
 | 
					                   const RuntimeShape& output_shape, bool* output_data) {      \
 | 
				
			||||||
 | 
					    Comparison<name##Fn>(op_params, input1_shape, input1_data, input2_shape,   \
 | 
				
			||||||
 | 
					                         input2_data, output_shape, output_data);              \
 | 
				
			||||||
 | 
					  }                                                                            \
 | 
				
			||||||
 | 
					  template <typename T>                                                        \
 | 
				
			||||||
 | 
					  inline void name##NoScaling(                                                 \
 | 
				
			||||||
 | 
					      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
 | 
				
			||||||
 | 
					      const T* input1_data, const RuntimeShape& input2_shape,                  \
 | 
				
			||||||
 | 
					      const T* input2_data, const RuntimeShape& output_shape,                  \
 | 
				
			||||||
 | 
					      bool* output_data) {                                                     \
 | 
				
			||||||
 | 
					    ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data,          \
 | 
				
			||||||
 | 
					                                input2_shape, input2_data, output_shape,       \
 | 
				
			||||||
 | 
					                                output_data);                                  \
 | 
				
			||||||
 | 
					  }                                                                            \
 | 
				
			||||||
 | 
					  template <typename T>                                                        \
 | 
				
			||||||
 | 
					  inline void name##WithScaling(                                               \
 | 
				
			||||||
 | 
					      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
 | 
				
			||||||
 | 
					      const T* input1_data, const RuntimeShape& input2_shape,                  \
 | 
				
			||||||
 | 
					      const T* input2_data, const RuntimeShape& output_shape,                  \
 | 
				
			||||||
 | 
					      bool* output_data) {                                                     \
 | 
				
			||||||
 | 
					    ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data,   \
 | 
				
			||||||
 | 
					                                       input2_shape, input2_data,              \
 | 
				
			||||||
 | 
					                                       output_shape, output_data);             \
 | 
				
			||||||
 | 
					  }                                                                            \
 | 
				
			||||||
 | 
					  template <typename T>                                                        \
 | 
				
			||||||
 | 
					  inline void Broadcast4DSlow##name##NoScaling(                                \
 | 
				
			||||||
 | 
					      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
 | 
				
			||||||
 | 
					      const T* input1_data, const RuntimeShape& input2_shape,                  \
 | 
				
			||||||
 | 
					      const T* input2_data, const RuntimeShape& output_shape,                  \
 | 
				
			||||||
 | 
					      bool* output_data) {                                                     \
 | 
				
			||||||
 | 
					    BroadcastComparison4DSlowImpl<T, name##Fn>(                                \
 | 
				
			||||||
 | 
					        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
 | 
				
			||||||
 | 
					        output_shape, output_data);                                            \
 | 
				
			||||||
 | 
					  }                                                                            \
 | 
				
			||||||
 | 
					  inline void Broadcast4DSlow##name(                                           \
 | 
				
			||||||
 | 
					      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
 | 
				
			||||||
 | 
					      const float* input1_data, const RuntimeShape& input2_shape,              \
 | 
				
			||||||
 | 
					      const float* input2_data, const RuntimeShape& output_shape,              \
 | 
				
			||||||
 | 
					      bool* output_data) {                                                     \
 | 
				
			||||||
 | 
					    BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data,  \
 | 
				
			||||||
 | 
					                                        input2_shape, input2_data,             \
 | 
				
			||||||
 | 
					                                        output_shape, output_data);            \
 | 
				
			||||||
 | 
					  }                                                                            \
 | 
				
			||||||
 | 
					  template <typename T>                                                        \
 | 
				
			||||||
 | 
					  inline void Broadcast4DSlow##name##WithScaling(                              \
 | 
				
			||||||
 | 
					      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
 | 
				
			||||||
 | 
					      const T* input1_data, const RuntimeShape& input2_shape,                  \
 | 
				
			||||||
 | 
					      const T* input2_data, const RuntimeShape& output_shape,                  \
 | 
				
			||||||
 | 
					      bool* output_data) {                                                     \
 | 
				
			||||||
 | 
					    BroadcastComparison4DSlowWithScaling<T, name##Fn>(                         \
 | 
				
			||||||
 | 
					        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
 | 
				
			||||||
 | 
					        output_shape, output_data);                                            \
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					TFLITE_COMPARISON_OP(Equal);
 | 
				
			||||||
 | 
					TFLITE_COMPARISON_OP(NotEqual);
 | 
				
			||||||
 | 
					TFLITE_COMPARISON_OP(Greater);
 | 
				
			||||||
 | 
					TFLITE_COMPARISON_OP(GreaterEqual);
 | 
				
			||||||
 | 
					TFLITE_COMPARISON_OP(Less);
 | 
				
			||||||
 | 
					TFLITE_COMPARISON_OP(LessEqual);
 | 
				
			||||||
 | 
					#undef TFLITE_COMPARISON_OP
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,139 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/compatibility.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/cppmath.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename Scalar>
 | 
				
			||||||
 | 
					inline void Concatenation(const ConcatenationParams& params,
 | 
				
			||||||
 | 
					                          const RuntimeShape* const* input_shapes,
 | 
				
			||||||
 | 
					                          const Scalar* const* input_data,
 | 
				
			||||||
 | 
					                          const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                          Scalar* output_data) {
 | 
				
			||||||
 | 
					  int axis = params.axis;
 | 
				
			||||||
 | 
					  int inputs_count = params.inputs_count;
 | 
				
			||||||
 | 
					  const int concat_dimensions = output_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LT(axis, concat_dimensions);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int64_t concat_size = 0;
 | 
				
			||||||
 | 
					  for (int i = 0; i < inputs_count; i++) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
 | 
				
			||||||
 | 
					    for (int j = 0; j < concat_dimensions; j++) {
 | 
				
			||||||
 | 
					      if (j != axis) {
 | 
				
			||||||
 | 
					        MatchingDim(*input_shapes[i], j, output_shape, j);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    concat_size += input_shapes[i]->Dims(axis);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
 | 
				
			||||||
 | 
					  int64_t outer_size = 1;
 | 
				
			||||||
 | 
					  for (int i = 0; i < axis; ++i) {
 | 
				
			||||||
 | 
					    outer_size *= output_shape.Dims(i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // For all input arrays,
 | 
				
			||||||
 | 
					  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
 | 
				
			||||||
 | 
					  int64_t base_inner_size = 1;
 | 
				
			||||||
 | 
					  for (int i = axis + 1; i < concat_dimensions; ++i) {
 | 
				
			||||||
 | 
					    base_inner_size *= output_shape.Dims(i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Scalar* output_ptr = output_data;
 | 
				
			||||||
 | 
					  for (int k = 0; k < outer_size; k++) {
 | 
				
			||||||
 | 
					    for (int i = 0; i < inputs_count; ++i) {
 | 
				
			||||||
 | 
					      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
 | 
				
			||||||
 | 
					      const Scalar* input_ptr = input_data[i] + k * copy_size;
 | 
				
			||||||
 | 
					      memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
 | 
				
			||||||
 | 
					      output_ptr += copy_size;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO(b/174275780): The quantized implementation of concatentation isn't fully
 | 
				
			||||||
 | 
					// quantized as it takes scale as a floating point value. This should be fixed
 | 
				
			||||||
 | 
					// when optimizng this routine further.
 | 
				
			||||||
 | 
					inline void ConcatenationWithScaling(const ConcatenationParams& params,
 | 
				
			||||||
 | 
					                                     const RuntimeShape* const* input_shapes,
 | 
				
			||||||
 | 
					                                     const uint8_t* const* input_data,
 | 
				
			||||||
 | 
					                                     const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                                     uint8_t* output_data) {
 | 
				
			||||||
 | 
					  int axis = params.axis;
 | 
				
			||||||
 | 
					  const int32_t* input_zeropoint = params.input_zeropoint;
 | 
				
			||||||
 | 
					  const float* input_scale = params.input_scale;
 | 
				
			||||||
 | 
					  int inputs_count = params.inputs_count;
 | 
				
			||||||
 | 
					  const int32_t output_zeropoint = params.output_zeropoint;
 | 
				
			||||||
 | 
					  const float output_scale = params.output_scale;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int concat_dimensions = output_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LT(axis, concat_dimensions);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int64_t concat_size = 0;
 | 
				
			||||||
 | 
					  for (int i = 0; i < inputs_count; i++) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
 | 
				
			||||||
 | 
					    for (int j = 0; j < concat_dimensions; j++) {
 | 
				
			||||||
 | 
					      if (j != axis) {
 | 
				
			||||||
 | 
					        MatchingDim(*input_shapes[i], j, output_shape, j);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    concat_size += input_shapes[i]->Dims(axis);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
 | 
				
			||||||
 | 
					  int64_t outer_size = 1;
 | 
				
			||||||
 | 
					  for (int i = 0; i < axis; ++i) {
 | 
				
			||||||
 | 
					    outer_size *= output_shape.Dims(i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // For all input arrays,
 | 
				
			||||||
 | 
					  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
 | 
				
			||||||
 | 
					  int64_t base_inner_size = 1;
 | 
				
			||||||
 | 
					  for (int i = axis + 1; i < concat_dimensions; ++i) {
 | 
				
			||||||
 | 
					    base_inner_size *= output_shape.Dims(i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const float inverse_output_scale = 1.f / output_scale;
 | 
				
			||||||
 | 
					  uint8_t* output_ptr = output_data;
 | 
				
			||||||
 | 
					  for (int k = 0; k < outer_size; k++) {
 | 
				
			||||||
 | 
					    for (int i = 0; i < inputs_count; ++i) {
 | 
				
			||||||
 | 
					      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
 | 
				
			||||||
 | 
					      const uint8_t* input_ptr = input_data[i] + k * copy_size;
 | 
				
			||||||
 | 
					      if (input_zeropoint[i] == output_zeropoint &&
 | 
				
			||||||
 | 
					          input_scale[i] == output_scale) {
 | 
				
			||||||
 | 
					        memcpy(output_ptr, input_ptr, copy_size);
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        const float scale = input_scale[i] * inverse_output_scale;
 | 
				
			||||||
 | 
					        const float bias = -input_zeropoint[i] * scale;
 | 
				
			||||||
 | 
					        for (int j = 0; j < copy_size; ++j) {
 | 
				
			||||||
 | 
					          const int32_t value = static_cast<int32_t>(tflite::TfLiteRound(
 | 
				
			||||||
 | 
					                                    input_ptr[j] * scale + bias)) +
 | 
				
			||||||
 | 
					                                output_zeropoint;
 | 
				
			||||||
 | 
					          output_ptr[j] = static_cast<uint8_t>(
 | 
				
			||||||
 | 
					              std::max<int32_t>(std::min<int32_t>(255, value), 0));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      output_ptr += copy_size;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,264 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                 const float* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					                 const float* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					                 const float* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                 float* output_data, const RuntimeShape& im2col_shape,
 | 
				
			||||||
 | 
					                 float* im2col_data) {
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					  const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					  const float output_activation_min = params.float_activation_min;
 | 
				
			||||||
 | 
					  const float output_activation_max = params.float_activation_max;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  (void)im2col_data;   // only used in optimized code.
 | 
				
			||||||
 | 
					  (void)im2col_shape;  // only used in optimized code.
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
 | 
				
			||||||
 | 
					  if (bias_data) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
 | 
				
			||||||
 | 
					          float total = 0.f;
 | 
				
			||||||
 | 
					          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					            const int in_y = in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              // Zero padding by omitting the areas outside the image.
 | 
				
			||||||
 | 
					              const bool is_point_inside_image =
 | 
				
			||||||
 | 
					                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                  (in_y < input_height);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              if (!is_point_inside_image) {
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					                float input_value = input_data[Offset(input_shape, batch, in_y,
 | 
				
			||||||
 | 
					                                                      in_x, in_channel)];
 | 
				
			||||||
 | 
					                float filter_value = filter_data[Offset(
 | 
				
			||||||
 | 
					                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
 | 
				
			||||||
 | 
					                total += (input_value * filter_value);
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          float bias_value = 0.0f;
 | 
				
			||||||
 | 
					          if (bias_data) {
 | 
				
			||||||
 | 
					            bias_value = bias_data[out_channel];
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
 | 
				
			||||||
 | 
					              ActivationFunctionWithMinMax(total + bias_value,
 | 
				
			||||||
 | 
					                                           output_activation_min,
 | 
				
			||||||
 | 
					                                           output_activation_max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                 const uint8_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					                 const uint8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					                 const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                 uint8_t* output_data, const RuntimeShape& im2col_shape,
 | 
				
			||||||
 | 
					                 uint8_t* im2col_data, void* cpu_backend_context) {
 | 
				
			||||||
 | 
					  (void)cpu_backend_context;  // only used in optimized code.
 | 
				
			||||||
 | 
					  (void)im2col_data;          // only used in optimized code.
 | 
				
			||||||
 | 
					  (void)im2col_shape;         // only used in optimized code.
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					  const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					  const int32_t input_offset = params.input_offset;
 | 
				
			||||||
 | 
					  const int32_t filter_offset = params.weights_offset;
 | 
				
			||||||
 | 
					  const int32_t output_offset = params.output_offset;
 | 
				
			||||||
 | 
					  const int32_t output_multiplier = params.output_multiplier;
 | 
				
			||||||
 | 
					  const int output_shift = params.output_shift;
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
 | 
				
			||||||
 | 
					  if (bias_data) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
 | 
				
			||||||
 | 
					          int32_t acc = 0;
 | 
				
			||||||
 | 
					          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					            const int in_y = in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              // Zero padding by omitting the areas outside the image.
 | 
				
			||||||
 | 
					              const bool is_point_inside_image =
 | 
				
			||||||
 | 
					                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                  (in_y < input_height);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              if (!is_point_inside_image) {
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					                int32_t input_val = input_data[Offset(input_shape, batch, in_y,
 | 
				
			||||||
 | 
					                                                      in_x, in_channel)];
 | 
				
			||||||
 | 
					                int32_t filter_val = filter_data[Offset(
 | 
				
			||||||
 | 
					                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
 | 
				
			||||||
 | 
					                acc +=
 | 
				
			||||||
 | 
					                    (filter_val + filter_offset) * (input_val + input_offset);
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          if (bias_data) {
 | 
				
			||||||
 | 
					            acc += bias_data[out_channel];
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
 | 
				
			||||||
 | 
					                                              output_shift);
 | 
				
			||||||
 | 
					          acc += output_offset;
 | 
				
			||||||
 | 
					          acc = std::max(acc, output_activation_min);
 | 
				
			||||||
 | 
					          acc = std::min(acc, output_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
 | 
				
			||||||
 | 
					              static_cast<uint8_t>(acc);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void HybridConvPerChannel(
 | 
				
			||||||
 | 
					    const ConvParams& params, float* scaling_factors_ptr,
 | 
				
			||||||
 | 
					    const RuntimeShape& input_shape, const int8_t* input_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& filter_shape, const int8_t* filter_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& bias_shape, const float* bias_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& output_shape, float* output_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& im2col_shape, int8_t* im2col_data,
 | 
				
			||||||
 | 
					    const float* per_channel_scale, int32_t* input_offset) {
 | 
				
			||||||
 | 
					  (void)im2col_data;   // only used in optimized code.
 | 
				
			||||||
 | 
					  (void)im2col_shape;  // only used in optimized code.
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					  const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					  const float output_activation_min = params.float_activation_min;
 | 
				
			||||||
 | 
					  const float output_activation_max = params.float_activation_max;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
 | 
				
			||||||
 | 
					  if (bias_data) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
 | 
				
			||||||
 | 
					          const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					          const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					          int32_t acc = 0;
 | 
				
			||||||
 | 
					          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					                const int in_x = in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					                const int in_y =
 | 
				
			||||||
 | 
					                    in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					                // If the location is outside the bounds of the input image,
 | 
				
			||||||
 | 
					                // use zero as a default value.
 | 
				
			||||||
 | 
					                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                    (in_y < input_height)) {
 | 
				
			||||||
 | 
					                  int32_t input_val = input_data[Offset(
 | 
				
			||||||
 | 
					                      input_shape, batch, in_y, in_x, in_channel)];
 | 
				
			||||||
 | 
					                  int32_t filter_val =
 | 
				
			||||||
 | 
					                      filter_data[Offset(filter_shape, out_channel, filter_y,
 | 
				
			||||||
 | 
					                                         filter_x, in_channel)];
 | 
				
			||||||
 | 
					                  acc += filter_val * (input_val - input_offset[batch]);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          float acc_float =
 | 
				
			||||||
 | 
					              acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
 | 
				
			||||||
 | 
					          if (bias_data) {
 | 
				
			||||||
 | 
					            acc_float += bias_data[out_channel];
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
 | 
				
			||||||
 | 
					              ActivationFunctionWithMinMax(acc_float, output_activation_min,
 | 
				
			||||||
 | 
					                                           output_activation_max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,100 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/compatibility.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void DepthwiseConv(
 | 
				
			||||||
 | 
					    const DepthwiseParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const float* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const float* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const float* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    float* output_data) {
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					  const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					  const int depth_multiplier = params.depth_multiplier;
 | 
				
			||||||
 | 
					  const float output_activation_min = params.float_activation_min;
 | 
				
			||||||
 | 
					  const float output_activation_max = params.float_activation_max;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int input_depth = input_shape.Dims(3);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int b = 0; b < batches; ++b) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int ic = 0; ic < input_depth; ++ic) {
 | 
				
			||||||
 | 
					          for (int m = 0; m < depth_multiplier; m++) {
 | 
				
			||||||
 | 
					            const int oc = m + ic * depth_multiplier;
 | 
				
			||||||
 | 
					            const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					            const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					            float total = 0.f;
 | 
				
			||||||
 | 
					            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					                const int in_x = in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					                const int in_y =
 | 
				
			||||||
 | 
					                    in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					                // If the location is outside the bounds of the input image,
 | 
				
			||||||
 | 
					                // use zero as a default value.
 | 
				
			||||||
 | 
					                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                    (in_y < input_height)) {
 | 
				
			||||||
 | 
					                  float input_value =
 | 
				
			||||||
 | 
					                      input_data[Offset(input_shape, b, in_y, in_x, ic)];
 | 
				
			||||||
 | 
					                  float filter_value = filter_data[Offset(
 | 
				
			||||||
 | 
					                      filter_shape, 0, filter_y, filter_x, oc)];
 | 
				
			||||||
 | 
					                  total += (input_value * filter_value);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            float bias_value = 0.0f;
 | 
				
			||||||
 | 
					            if (bias_data) {
 | 
				
			||||||
 | 
					              bias_value = bias_data[oc];
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            output_data[Offset(output_shape, b, out_y, out_x, oc)] =
 | 
				
			||||||
 | 
					                ActivationFunctionWithMinMax(total + bias_value,
 | 
				
			||||||
 | 
					                                             output_activation_min,
 | 
				
			||||||
 | 
					                                             output_activation_max);
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // end namespace reference_ops
 | 
				
			||||||
 | 
					}  // end namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,297 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <algorithm>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "fixedpoint/fixedpoint.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/compatibility.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Used in tests and template parameters to control which version of depthwise
 | 
				
			||||||
 | 
					// convolution is called. Primarily for reference code, and specializations
 | 
				
			||||||
 | 
					// forced in tests.
 | 
				
			||||||
 | 
					enum class DepthwiseConvImplementation {
 | 
				
			||||||
 | 
					  // Run all tests against kUseStandardEntry even if also testing another
 | 
				
			||||||
 | 
					  // kernel, since we need to be sure that the main DepthwiseConv() function in
 | 
				
			||||||
 | 
					  // optimized_ops.h dispatches to a correctly-executing kernel.
 | 
				
			||||||
 | 
					  kNone = 0,                 // The "default" option: use the normal
 | 
				
			||||||
 | 
					                             // DepthwiseConv kernel (entry) function.
 | 
				
			||||||
 | 
					  kUseGenericKernel,         // Forced use of generic kernel.
 | 
				
			||||||
 | 
					  kUseNeon3x3,               // 3x3 kernel that uses NEON when available.
 | 
				
			||||||
 | 
					  kUseNeon3x3DotProduct,     // 3x3 kernel that uses dot-product enabled NEON
 | 
				
			||||||
 | 
					                             // when available.
 | 
				
			||||||
 | 
					  kUseCModel3x3DotProduct,   // 3x3 kernel, reference C model that is intended
 | 
				
			||||||
 | 
					                             // to match overall design NEON code.
 | 
				
			||||||
 | 
					  kUseUnwound3x3DotProduct,  // 3x3 kernel, reference C model with unwound loops
 | 
				
			||||||
 | 
					                             // and some arrays.
 | 
				
			||||||
 | 
					  kUseIntrinsics3x3DotProduct,  // 3x3 kernel using NEON intrinsics.
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Category of depthwise convolution output rounding.
 | 
				
			||||||
 | 
					enum class DepthwiseConvOutputRounding {
 | 
				
			||||||
 | 
					  kNone = 0,      // Invalid: specific method must be specified.
 | 
				
			||||||
 | 
					  kAwayFromZero,  // Original method: exact halves rounded away from zero.
 | 
				
			||||||
 | 
					  kUpward,        // Halves towards +infinity: adds 0.5 before truncate.
 | 
				
			||||||
 | 
					  // This is where a future kNearestEven would be placed.
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Category of depthwise convolution depth multiplication.
 | 
				
			||||||
 | 
					enum class DepthwiseConvDepthMultiplication {
 | 
				
			||||||
 | 
					  kNoMultiplication = 0,  // Depth multiplier = 1.
 | 
				
			||||||
 | 
					  kUnitInputDepth,        // Input depth = 1, output depth = depth multiplier.
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					namespace depthwise_conv {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <DepthwiseConvOutputRounding output_rounding>
 | 
				
			||||||
 | 
					inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
 | 
				
			||||||
 | 
					                                  int shift) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
 | 
				
			||||||
 | 
					  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <>
 | 
				
			||||||
 | 
					inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
 | 
				
			||||||
 | 
					    int32_t x, int32_t quantized_multiplier, int shift) {
 | 
				
			||||||
 | 
					  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <>
 | 
				
			||||||
 | 
					inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
 | 
				
			||||||
 | 
					    int32_t x, int32_t quantized_multiplier, int shift) {
 | 
				
			||||||
 | 
					  using gemmlowp::SaturatingRoundingDoublingHighMul;
 | 
				
			||||||
 | 
					  const int left_shift = shift > 0 ? shift : 0;
 | 
				
			||||||
 | 
					  const int right_shift = shift > 0 ? 0 : -shift;
 | 
				
			||||||
 | 
					  const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
 | 
				
			||||||
 | 
					  return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
 | 
				
			||||||
 | 
					                                            quantized_multiplier) +
 | 
				
			||||||
 | 
					          rounding_offset) >>
 | 
				
			||||||
 | 
					         right_shift;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <DepthwiseConvOutputRounding output_rounding>
 | 
				
			||||||
 | 
					struct DepthwiseConvBasicKernel {
 | 
				
			||||||
 | 
					  static inline void Run(
 | 
				
			||||||
 | 
					      const DepthwiseParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					      const uint8_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					      const uint8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					      const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					      uint8_t* output_data) {
 | 
				
			||||||
 | 
					    const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					    const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					    const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					    const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					    const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					    const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					    const int depth_multiplier = params.depth_multiplier;
 | 
				
			||||||
 | 
					    const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					    const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					    const int32_t input_offset = params.input_offset;
 | 
				
			||||||
 | 
					    const int32_t filter_offset = params.weights_offset;
 | 
				
			||||||
 | 
					    const int32_t output_offset = params.output_offset;
 | 
				
			||||||
 | 
					    const int32_t output_multiplier = params.output_multiplier;
 | 
				
			||||||
 | 
					    const int output_shift = params.output_shift;
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					    const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					    const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					    const int input_depth = input_shape.Dims(3);
 | 
				
			||||||
 | 
					    const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					    const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					    const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					    const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int b = 0; b < batches; ++b) {
 | 
				
			||||||
 | 
					      for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					        for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					          for (int ic = 0; ic < input_depth; ++ic) {
 | 
				
			||||||
 | 
					            for (int m = 0; m < depth_multiplier; m++) {
 | 
				
			||||||
 | 
					              const int oc = m + ic * depth_multiplier;
 | 
				
			||||||
 | 
					              const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					              const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					              int32_t acc = 0;
 | 
				
			||||||
 | 
					              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					                  const int in_x =
 | 
				
			||||||
 | 
					                      in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					                  const int in_y =
 | 
				
			||||||
 | 
					                      in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					                  // If the location is outside the bounds of the input image,
 | 
				
			||||||
 | 
					                  // use zero as a default value.
 | 
				
			||||||
 | 
					                  if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                      (in_y < input_height)) {
 | 
				
			||||||
 | 
					                    int32_t input_val =
 | 
				
			||||||
 | 
					                        input_data[Offset(input_shape, b, in_y, in_x, ic)];
 | 
				
			||||||
 | 
					                    int32_t filter_val = filter_data[Offset(
 | 
				
			||||||
 | 
					                        filter_shape, 0, filter_y, filter_x, oc)];
 | 
				
			||||||
 | 
					                    acc += (filter_val + filter_offset) *
 | 
				
			||||||
 | 
					                           (input_val + input_offset);
 | 
				
			||||||
 | 
					                  }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					              if (bias_data) {
 | 
				
			||||||
 | 
					                acc += bias_data[oc];
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					              acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
 | 
				
			||||||
 | 
					                                                        output_shift);
 | 
				
			||||||
 | 
					              acc += output_offset;
 | 
				
			||||||
 | 
					              acc = std::max(acc, output_activation_min);
 | 
				
			||||||
 | 
					              acc = std::min(acc, output_activation_max);
 | 
				
			||||||
 | 
					              output_data[Offset(output_shape, b, out_y, out_x, oc)] =
 | 
				
			||||||
 | 
					                  static_cast<uint8_t>(acc);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // TODO(b/148596273): Reconcile reference versions, perhaps with common
 | 
				
			||||||
 | 
					  // MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
 | 
				
			||||||
 | 
					  static inline void RunPerChannel(
 | 
				
			||||||
 | 
					      const DepthwiseParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					      const int8_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					      const int8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					      const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					      int8_t* output_data) {
 | 
				
			||||||
 | 
					    // Get parameters.
 | 
				
			||||||
 | 
					    // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
 | 
				
			||||||
 | 
					    const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					    const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					    const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					    const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					    const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					    const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					    const int depth_multiplier = params.depth_multiplier;
 | 
				
			||||||
 | 
					    const int32_t input_offset = params.input_offset;
 | 
				
			||||||
 | 
					    const int32_t output_offset = params.output_offset;
 | 
				
			||||||
 | 
					    const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					    const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					    const int32_t* output_multiplier = params.output_multiplier_per_channel;
 | 
				
			||||||
 | 
					    const int32_t* output_shift = params.output_shift_per_channel;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Check dimensions of the tensors.
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					    const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					    const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					    const int input_depth = input_shape.Dims(3);
 | 
				
			||||||
 | 
					    const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					    const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					    const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					    const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					      for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					        for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					          for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					            for (int m = 0; m < depth_multiplier; ++m) {
 | 
				
			||||||
 | 
					              const int output_channel = m + in_channel * depth_multiplier;
 | 
				
			||||||
 | 
					              const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					              const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					              int32_t acc = 0;
 | 
				
			||||||
 | 
					              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					                  const int in_x =
 | 
				
			||||||
 | 
					                      in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					                  const int in_y =
 | 
				
			||||||
 | 
					                      in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					                  // Zero padding by omitting the areas outside the image.
 | 
				
			||||||
 | 
					                  const bool is_point_inside_image =
 | 
				
			||||||
 | 
					                      (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                      (in_y < input_height);
 | 
				
			||||||
 | 
					                  if (is_point_inside_image) {
 | 
				
			||||||
 | 
					                    int32_t input_val = input_data[Offset(
 | 
				
			||||||
 | 
					                        input_shape, batch, in_y, in_x, in_channel)];
 | 
				
			||||||
 | 
					                    int32_t filter_val = filter_data[Offset(
 | 
				
			||||||
 | 
					                        filter_shape, 0, filter_y, filter_x, output_channel)];
 | 
				
			||||||
 | 
					                    // Accumulate with 32 bits accumulator.
 | 
				
			||||||
 | 
					                    // In the nudging process during model quantization, we
 | 
				
			||||||
 | 
					                    // force real value of 0.0 be represented by a quantized
 | 
				
			||||||
 | 
					                    // value. This guarantees that the input_offset is a int8_t,
 | 
				
			||||||
 | 
					                    // even though it is represented using int32_t. int32_t +=
 | 
				
			||||||
 | 
					                    // int8_t
 | 
				
			||||||
 | 
					                    // * (int8_t - int8_t) so the highest value we can get from
 | 
				
			||||||
 | 
					                    // each accumulation is [-127, 127] * ([-128, 127] -
 | 
				
			||||||
 | 
					                    // [-128, 127]), which is [-32512, 32512]. log2(32512)
 | 
				
			||||||
 | 
					                    // = 14.98, which means we can accumulate at least 2^16
 | 
				
			||||||
 | 
					                    // multiplications without overflow. The accumulator is
 | 
				
			||||||
 | 
					                    // applied to a filter so the accumulation logic will hold
 | 
				
			||||||
 | 
					                    // as long as the filter size (filter_y * filter_x *
 | 
				
			||||||
 | 
					                    // in_channel) does not exceed 2^16, which is the case in
 | 
				
			||||||
 | 
					                    // all the models we have seen so far.
 | 
				
			||||||
 | 
					                    acc += filter_val * (input_val + input_offset);
 | 
				
			||||||
 | 
					                  }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					              if (bias_data) {
 | 
				
			||||||
 | 
					                acc += bias_data[output_channel];
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					              acc = DepthwiseConvRound<output_rounding>(
 | 
				
			||||||
 | 
					                  acc, output_multiplier[output_channel],
 | 
				
			||||||
 | 
					                  output_shift[output_channel]);
 | 
				
			||||||
 | 
					              acc += output_offset;
 | 
				
			||||||
 | 
					              acc = std::max(acc, output_activation_min);
 | 
				
			||||||
 | 
					              acc = std::min(acc, output_activation_max);
 | 
				
			||||||
 | 
					              output_data[Offset(output_shape, batch, out_y, out_x,
 | 
				
			||||||
 | 
					                                 output_channel)] = static_cast<int8_t>(acc);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace depthwise_conv
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void DepthwiseConv(
 | 
				
			||||||
 | 
					    const DepthwiseParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const uint8_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const uint8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    uint8_t* output_data) {
 | 
				
			||||||
 | 
					  return depthwise_conv::DepthwiseConvBasicKernel<
 | 
				
			||||||
 | 
					      DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
 | 
				
			||||||
 | 
					                                                       input_data, filter_shape,
 | 
				
			||||||
 | 
					                                                       filter_data, bias_shape,
 | 
				
			||||||
 | 
					                                                       bias_data, output_shape,
 | 
				
			||||||
 | 
					                                                       output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // end namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,78 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <limits.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <vector>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Dequantizes into a float without rounding.
 | 
				
			||||||
 | 
					template <typename InputT, typename OutputT>
 | 
				
			||||||
 | 
					inline void Dequantize(const tflite::DequantizationParams& op_params,
 | 
				
			||||||
 | 
					                       const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                       const InputT* input_data,
 | 
				
			||||||
 | 
					                       const RuntimeShape& output_shape, OutputT* output_data) {
 | 
				
			||||||
 | 
					  int32_t zero_point = op_params.zero_point;
 | 
				
			||||||
 | 
					  const double scale = op_params.scale;
 | 
				
			||||||
 | 
					  const int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; i++) {
 | 
				
			||||||
 | 
					    const int32_t val = input_data[i];
 | 
				
			||||||
 | 
					    const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
 | 
				
			||||||
 | 
					    output_data[i] = result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Dequantizes per-channel quantized tensor to float.
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void PerChannelDequantize(
 | 
				
			||||||
 | 
					    const tflite::PerChannelDequantizationParams& op_params,
 | 
				
			||||||
 | 
					    const RuntimeShape& input_shape, const T* input_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& output_shape, float* output_data) {
 | 
				
			||||||
 | 
					  // Ensure flat size is same.
 | 
				
			||||||
 | 
					  MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int32_t* zero_point = op_params.zero_point;
 | 
				
			||||||
 | 
					  const float* scale = op_params.scale;
 | 
				
			||||||
 | 
					  const int32_t quantized_dimension = op_params.quantized_dimension;
 | 
				
			||||||
 | 
					  const int32_t num_dims = input_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int32_t* dims_data = input_shape.DimsData();
 | 
				
			||||||
 | 
					  std::vector<int> current_dim(num_dims, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  do {
 | 
				
			||||||
 | 
					    size_t offset =
 | 
				
			||||||
 | 
					        ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
 | 
				
			||||||
 | 
					                            current_dim.data(), 0, nullptr);
 | 
				
			||||||
 | 
					    const int channel = current_dim[quantized_dimension];
 | 
				
			||||||
 | 
					    const int32_t val = input_data[offset];
 | 
				
			||||||
 | 
					    const float result =
 | 
				
			||||||
 | 
					        static_cast<float>(scale[channel] * (val - zero_point[channel]));
 | 
				
			||||||
 | 
					    output_data[offset] = result;
 | 
				
			||||||
 | 
					  } while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
 | 
				
			||||||
 | 
					                     current_dim.data()));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,39 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cmath>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Floor(const RuntimeShape& input_shape, const float* input_data,
 | 
				
			||||||
 | 
					                  const RuntimeShape& output_shape, float* output_data) {
 | 
				
			||||||
 | 
					  const int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; i++) {
 | 
				
			||||||
 | 
					    int offset = i;
 | 
				
			||||||
 | 
					    output_data[offset] = std::floor(input_data[offset]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,320 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/cppmath.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/quantization_util.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void FullyConnected(
 | 
				
			||||||
 | 
					    const FullyConnectedParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const float* input_data, const RuntimeShape& weights_shape,
 | 
				
			||||||
 | 
					    const float* weights_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const float* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    float* output_data) {
 | 
				
			||||||
 | 
					  const float output_activation_min = params.float_activation_min;
 | 
				
			||||||
 | 
					  const float output_activation_max = params.float_activation_max;
 | 
				
			||||||
 | 
					  // TODO(b/62193649): This really should be:
 | 
				
			||||||
 | 
					  //     const int batches = ArraySize(output_dims, 1);
 | 
				
			||||||
 | 
					  // but the current --variable_batch hack consists in overwriting the 3rd
 | 
				
			||||||
 | 
					  // dimension with the runtime batch size, as we don't keep track for each
 | 
				
			||||||
 | 
					  // array of which dimension is the batch dimension in it.
 | 
				
			||||||
 | 
					  const int output_dims_count = output_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int weights_dims_count = weights_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
 | 
				
			||||||
 | 
					                                       output_shape, output_dims_count - 1);
 | 
				
			||||||
 | 
					  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
 | 
				
			||||||
 | 
					  for (int b = 0; b < batches; ++b) {
 | 
				
			||||||
 | 
					    for (int out_c = 0; out_c < output_depth; ++out_c) {
 | 
				
			||||||
 | 
					      float total = 0.f;
 | 
				
			||||||
 | 
					      for (int d = 0; d < accum_depth; ++d) {
 | 
				
			||||||
 | 
					        total += input_data[b * accum_depth + d] *
 | 
				
			||||||
 | 
					                 weights_data[out_c * accum_depth + d];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      float bias_value = 0.0f;
 | 
				
			||||||
 | 
					      if (bias_data) {
 | 
				
			||||||
 | 
					        bias_value = bias_data[out_c];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
 | 
				
			||||||
 | 
					          total + bias_value, output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void FullyConnected(
 | 
				
			||||||
 | 
					    const FullyConnectedParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const uint8_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const uint8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    uint8_t* output_data) {
 | 
				
			||||||
 | 
					  const int32_t input_offset = params.input_offset;
 | 
				
			||||||
 | 
					  const int32_t filter_offset = params.weights_offset;
 | 
				
			||||||
 | 
					  const int32_t output_offset = params.output_offset;
 | 
				
			||||||
 | 
					  const int32_t output_multiplier = params.output_multiplier;
 | 
				
			||||||
 | 
					  const int output_shift = params.output_shift;
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					  // TODO(b/62193649): This really should be:
 | 
				
			||||||
 | 
					  //     const int batches = ArraySize(output_dims, 1);
 | 
				
			||||||
 | 
					  // but the current --variable_batch hack consists in overwriting the 3rd
 | 
				
			||||||
 | 
					  // dimension with the runtime batch size, as we don't keep track for each
 | 
				
			||||||
 | 
					  // array of which dimension is the batch dimension in it.
 | 
				
			||||||
 | 
					  const int output_dim_count = output_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int filter_dim_count = filter_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
 | 
				
			||||||
 | 
					                                       output_shape, output_dim_count - 1);
 | 
				
			||||||
 | 
					  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
 | 
				
			||||||
 | 
					  for (int b = 0; b < batches; ++b) {
 | 
				
			||||||
 | 
					    for (int out_c = 0; out_c < output_depth; ++out_c) {
 | 
				
			||||||
 | 
					      int32_t acc = 0;
 | 
				
			||||||
 | 
					      for (int d = 0; d < accum_depth; ++d) {
 | 
				
			||||||
 | 
					        int32_t input_val = input_data[b * accum_depth + d];
 | 
				
			||||||
 | 
					        int32_t filter_val = filter_data[out_c * accum_depth + d];
 | 
				
			||||||
 | 
					        acc += (filter_val + filter_offset) * (input_val + input_offset);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      if (bias_data) {
 | 
				
			||||||
 | 
					        acc += bias_data[out_c];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
 | 
				
			||||||
 | 
					      acc += output_offset;
 | 
				
			||||||
 | 
					      acc = std::max(acc, output_activation_min);
 | 
				
			||||||
 | 
					      acc = std::min(acc, output_activation_max);
 | 
				
			||||||
 | 
					      output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void FullyConnected(
 | 
				
			||||||
 | 
					    const FullyConnectedParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const uint8_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const uint8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int16_t* output_data) {
 | 
				
			||||||
 | 
					  const int32_t input_offset = params.input_offset;
 | 
				
			||||||
 | 
					  const int32_t filter_offset = params.weights_offset;
 | 
				
			||||||
 | 
					  const int32_t output_offset = params.output_offset;
 | 
				
			||||||
 | 
					  const int32_t output_multiplier = params.output_multiplier;
 | 
				
			||||||
 | 
					  const int output_shift = params.output_shift;
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_offset, 0);
 | 
				
			||||||
 | 
					  // TODO(b/62193649): This really should be:
 | 
				
			||||||
 | 
					  //     const int batches = ArraySize(output_dims, 1);
 | 
				
			||||||
 | 
					  // but the current --variable_batch hack consists in overwriting the 3rd
 | 
				
			||||||
 | 
					  // dimension with the runtime batch size, as we don't keep track for each
 | 
				
			||||||
 | 
					  // array of which dimension is the batch dimension in it.
 | 
				
			||||||
 | 
					  const int output_dim_count = output_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int filter_dim_count = filter_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
 | 
				
			||||||
 | 
					                                       output_shape, output_dim_count - 1);
 | 
				
			||||||
 | 
					  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
 | 
				
			||||||
 | 
					  for (int b = 0; b < batches; ++b) {
 | 
				
			||||||
 | 
					    for (int out_c = 0; out_c < output_depth; ++out_c) {
 | 
				
			||||||
 | 
					      // Internal accumulation.
 | 
				
			||||||
 | 
					      // Initialize accumulator with the bias-value.
 | 
				
			||||||
 | 
					      int32_t accum = bias_data[out_c];
 | 
				
			||||||
 | 
					      // Accumulation loop.
 | 
				
			||||||
 | 
					      for (int d = 0; d < accum_depth; ++d) {
 | 
				
			||||||
 | 
					        int16_t input_val = input_data[b * accum_depth + d] + input_offset;
 | 
				
			||||||
 | 
					        int16_t filter_val =
 | 
				
			||||||
 | 
					            filter_data[out_c * accum_depth + d] + filter_offset;
 | 
				
			||||||
 | 
					        accum += filter_val * input_val;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      // Down-scale the final int32_t accumulator to the scale used by our
 | 
				
			||||||
 | 
					      // (16-bit, typically 3 integer bits) fixed-point format. The quantized
 | 
				
			||||||
 | 
					      // multiplier and shift here have been pre-computed offline
 | 
				
			||||||
 | 
					      // (e.g. by toco).
 | 
				
			||||||
 | 
					      accum =
 | 
				
			||||||
 | 
					          MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
 | 
				
			||||||
 | 
					      // Saturate, cast to int16_t, and store to output array.
 | 
				
			||||||
 | 
					      accum = std::max(accum, output_activation_min - output_offset);
 | 
				
			||||||
 | 
					      accum = std::min(accum, output_activation_max - output_offset);
 | 
				
			||||||
 | 
					      accum += output_offset;
 | 
				
			||||||
 | 
					      output_data[out_c + output_depth * b] = accum;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void ShuffledFullyConnected(
 | 
				
			||||||
 | 
					    const FullyConnectedParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const uint8_t* input_data, const RuntimeShape& weights_shape,
 | 
				
			||||||
 | 
					    const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
 | 
				
			||||||
 | 
					  const int32_t output_multiplier = params.output_multiplier;
 | 
				
			||||||
 | 
					  const int output_shift = params.output_shift;
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
 | 
				
			||||||
 | 
					  // TODO(b/62193649): This really should be:
 | 
				
			||||||
 | 
					  //     const int batches = ArraySize(output_dims, 1);
 | 
				
			||||||
 | 
					  // but the current --variable_batch hack consists in overwriting the 3rd
 | 
				
			||||||
 | 
					  // dimension with the runtime batch size, as we don't keep track for each
 | 
				
			||||||
 | 
					  // array of which dimension is the batch dimension in it.
 | 
				
			||||||
 | 
					  const int output_dim_count = output_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int weights_dim_count = weights_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
 | 
				
			||||||
 | 
					                                       output_shape, output_dim_count - 1);
 | 
				
			||||||
 | 
					  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK((accum_depth % 16) == 0);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK((output_depth % 4) == 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Shuffling and xoring of input activations into the workspace buffer
 | 
				
			||||||
 | 
					  uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
 | 
				
			||||||
 | 
					  if (batches == 1) {
 | 
				
			||||||
 | 
					    for (int i = 0; i < accum_depth; i++) {
 | 
				
			||||||
 | 
					      shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else if (batches == 4) {
 | 
				
			||||||
 | 
					    for (int c = 0; c < accum_depth; c += 16) {
 | 
				
			||||||
 | 
					      for (int b = 0; b < 4; b++) {
 | 
				
			||||||
 | 
					        const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
 | 
				
			||||||
 | 
					        for (int j = 0; j < 16; j++) {
 | 
				
			||||||
 | 
					          uint8_t src_val = *src_data_ptr++;
 | 
				
			||||||
 | 
					          // Flip the sign bit, so that the kernel will only need to
 | 
				
			||||||
 | 
					          // reinterpret these uint8_t values as int8_t, getting for free the
 | 
				
			||||||
 | 
					          // subtraction of the zero_point value 128.
 | 
				
			||||||
 | 
					          uint8_t dst_val = src_val ^ 0x80;
 | 
				
			||||||
 | 
					          *shuffled_input_workspace_ptr++ = dst_val;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK(false);
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Actual computation
 | 
				
			||||||
 | 
					  if (batches == 1) {
 | 
				
			||||||
 | 
					    int16_t* output_ptr = output_data;
 | 
				
			||||||
 | 
					    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
 | 
				
			||||||
 | 
					    // so that just reinterpreting them as int8_t values is equivalent to
 | 
				
			||||||
 | 
					    // subtracting 128 from them, thus implementing for free the subtraction of
 | 
				
			||||||
 | 
					    // the zero_point value 128.
 | 
				
			||||||
 | 
					    const int8_t* shuffled_weights_ptr =
 | 
				
			||||||
 | 
					        reinterpret_cast<const int8_t*>(shuffled_weights_data);
 | 
				
			||||||
 | 
					    // Likewise, we preshuffled and pre-xored the input data above.
 | 
				
			||||||
 | 
					    const int8_t* shuffled_input_data =
 | 
				
			||||||
 | 
					        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
 | 
				
			||||||
 | 
					    for (int c = 0; c < output_depth; c += 4) {
 | 
				
			||||||
 | 
					      // Internal accumulation.
 | 
				
			||||||
 | 
					      // Initialize accumulator with the bias-value.
 | 
				
			||||||
 | 
					      int32_t accum[4] = {0};
 | 
				
			||||||
 | 
					      // Accumulation loop.
 | 
				
			||||||
 | 
					      for (int d = 0; d < accum_depth; d += 16) {
 | 
				
			||||||
 | 
					        for (int i = 0; i < 4; i++) {
 | 
				
			||||||
 | 
					          for (int j = 0; j < 16; j++) {
 | 
				
			||||||
 | 
					            int8_t input_val = shuffled_input_data[d + j];
 | 
				
			||||||
 | 
					            int8_t weights_val = *shuffled_weights_ptr++;
 | 
				
			||||||
 | 
					            accum[i] += weights_val * input_val;
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      for (int i = 0; i < 4; i++) {
 | 
				
			||||||
 | 
					        // Add bias value
 | 
				
			||||||
 | 
					        int32_t acc = accum[i] + bias_data[c + i];
 | 
				
			||||||
 | 
					        // Down-scale the final int32_t accumulator to the scale used by our
 | 
				
			||||||
 | 
					        // (16-bit, typically 3 integer bits) fixed-point format. The quantized
 | 
				
			||||||
 | 
					        // multiplier and shift here have been pre-computed offline
 | 
				
			||||||
 | 
					        // (e.g. by toco).
 | 
				
			||||||
 | 
					        acc =
 | 
				
			||||||
 | 
					            MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
 | 
				
			||||||
 | 
					        // Saturate, cast to int16_t, and store to output array.
 | 
				
			||||||
 | 
					        acc = std::max(acc, output_activation_min);
 | 
				
			||||||
 | 
					        acc = std::min(acc, output_activation_max);
 | 
				
			||||||
 | 
					        output_ptr[c + i] = acc;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else if (batches == 4) {
 | 
				
			||||||
 | 
					    int16_t* output_ptr = output_data;
 | 
				
			||||||
 | 
					    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
 | 
				
			||||||
 | 
					    // so that just reinterpreting them as int8_t values is equivalent to
 | 
				
			||||||
 | 
					    // subtracting 128 from them, thus implementing for free the subtraction of
 | 
				
			||||||
 | 
					    // the zero_point value 128.
 | 
				
			||||||
 | 
					    const int8_t* shuffled_weights_ptr =
 | 
				
			||||||
 | 
					        reinterpret_cast<const int8_t*>(shuffled_weights_data);
 | 
				
			||||||
 | 
					    // Likewise, we preshuffled and pre-xored the input data above.
 | 
				
			||||||
 | 
					    const int8_t* shuffled_input_data =
 | 
				
			||||||
 | 
					        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
 | 
				
			||||||
 | 
					    for (int c = 0; c < output_depth; c += 4) {
 | 
				
			||||||
 | 
					      const int8_t* shuffled_input_ptr = shuffled_input_data;
 | 
				
			||||||
 | 
					      // Accumulation loop.
 | 
				
			||||||
 | 
					      // Internal accumulation.
 | 
				
			||||||
 | 
					      // Initialize accumulator with the bias-value.
 | 
				
			||||||
 | 
					      int32_t accum[4][4];
 | 
				
			||||||
 | 
					      for (int i = 0; i < 4; i++) {
 | 
				
			||||||
 | 
					        for (int b = 0; b < 4; b++) {
 | 
				
			||||||
 | 
					          accum[i][b] = 0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      for (int d = 0; d < accum_depth; d += 16) {
 | 
				
			||||||
 | 
					        for (int i = 0; i < 4; i++) {
 | 
				
			||||||
 | 
					          for (int b = 0; b < 4; b++) {
 | 
				
			||||||
 | 
					            for (int j = 0; j < 16; j++) {
 | 
				
			||||||
 | 
					              int8_t input_val = shuffled_input_ptr[16 * b + j];
 | 
				
			||||||
 | 
					              int8_t weights_val = shuffled_weights_ptr[16 * i + j];
 | 
				
			||||||
 | 
					              accum[i][b] += weights_val * input_val;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        shuffled_input_ptr += 64;
 | 
				
			||||||
 | 
					        shuffled_weights_ptr += 64;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      for (int i = 0; i < 4; i++) {
 | 
				
			||||||
 | 
					        for (int b = 0; b < 4; b++) {
 | 
				
			||||||
 | 
					          // Add bias value
 | 
				
			||||||
 | 
					          int32_t acc = accum[i][b] + bias_data[c + i];
 | 
				
			||||||
 | 
					          // Down-scale the final int32_t accumulator to the scale used by our
 | 
				
			||||||
 | 
					          // (16-bit, typically 3 integer bits) fixed-point format. The
 | 
				
			||||||
 | 
					          // quantized multiplier and shift here have been pre-computed offline
 | 
				
			||||||
 | 
					          // (e.g. by toco).
 | 
				
			||||||
 | 
					          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
 | 
				
			||||||
 | 
					                                              output_shift);
 | 
				
			||||||
 | 
					          // Saturate, cast to int16_t, and store to output array.
 | 
				
			||||||
 | 
					          acc = std::max(acc, output_activation_min);
 | 
				
			||||||
 | 
					          acc = std::min(acc, output_activation_max);
 | 
				
			||||||
 | 
					          output_ptr[b * output_depth + c + i] = acc;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK(false);
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,166 @@
 | 
				
			||||||
 | 
					/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "ruy/profiler/instrumentation.h"  // from @ruy
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline int16_t SaturatingLeftShift(int16_t value, int amount) {
 | 
				
			||||||
 | 
					  int32_t result = static_cast<int32_t>(value) * (1 << amount);
 | 
				
			||||||
 | 
					  result = std::min<int32_t>(result, std::numeric_limits<int16_t>::max());
 | 
				
			||||||
 | 
					  result = std::max<int32_t>(result, std::numeric_limits<int16_t>::min());
 | 
				
			||||||
 | 
					  return result;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Similar to ARM instruction SQDMULH.
 | 
				
			||||||
 | 
					// Similar to gemmlowp::SaturatingRoundingDoublingHighMul except
 | 
				
			||||||
 | 
					// rounding to zero instead of to nearest (SQRDMULH).
 | 
				
			||||||
 | 
					inline std::int16_t SaturatingDoublingHighMul(std::int16_t a, std::int16_t b) {
 | 
				
			||||||
 | 
					  bool overflow = a == b && a == std::numeric_limits<std::int16_t>::min();
 | 
				
			||||||
 | 
					  std::int32_t a_32(a);
 | 
				
			||||||
 | 
					  std::int32_t b_32(b);
 | 
				
			||||||
 | 
					  std::int32_t ab_32 = a_32 * b_32;
 | 
				
			||||||
 | 
					  std::int16_t ab_x2_high16 = static_cast<std::int16_t>((ab_32) / (1 << 15));
 | 
				
			||||||
 | 
					  return overflow ? std::numeric_limits<std::int16_t>::max() : ab_x2_high16;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void HardSwish(const RuntimeShape& input_shape, const T* input_data,
 | 
				
			||||||
 | 
					                      const RuntimeShape& output_shape, T* output_data) {
 | 
				
			||||||
 | 
					  ruy::profiler::ScopeLabel label("ReferenceHardSwish/Float");
 | 
				
			||||||
 | 
					  auto matching_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					  const T* in_end = input_data + matching_size;
 | 
				
			||||||
 | 
					  for (; input_data < in_end; input_data++, output_data++) {
 | 
				
			||||||
 | 
					    const float in = *input_data;
 | 
				
			||||||
 | 
					    *output_data =
 | 
				
			||||||
 | 
					        in * std::min(static_cast<T>(6), std::max(static_cast<T>(0), in + 3)) /
 | 
				
			||||||
 | 
					        6;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void HardSwish(const HardSwishParams& params,
 | 
				
			||||||
 | 
					                      const RuntimeShape& input_shape, const T* input_data,
 | 
				
			||||||
 | 
					                      const RuntimeShape& output_shape, T* output_data) {
 | 
				
			||||||
 | 
					  ruy::profiler::ScopeLabel label("ReferenceHardSwish/Quantized");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; i++) {
 | 
				
			||||||
 | 
					    const int16_t input_value = input_data[i] - params.input_zero_point;
 | 
				
			||||||
 | 
					    // Left-shift as much as we can without overflow/saturation to put
 | 
				
			||||||
 | 
					    // significant bits in the high bits of our 16-bit fixedpoint values, so
 | 
				
			||||||
 | 
					    // that fixed-point approximate computations below are as accurate as
 | 
				
			||||||
 | 
					    // possible.
 | 
				
			||||||
 | 
					    const int16_t input_value_on_hires_input_scale = input_value * (1 << 7);
 | 
				
			||||||
 | 
					    // Compute the input value on essentially the output scale, just not
 | 
				
			||||||
 | 
					    // right-shifted yet. This is the value that we'll use in the (x >= +3)
 | 
				
			||||||
 | 
					    // case, and that in the general case we'll multiply against the "relu-ish"
 | 
				
			||||||
 | 
					    // fixed-point multiplier in [0, 1].
 | 
				
			||||||
 | 
					    const int16_t input_value_on_preshift_output_scale =
 | 
				
			||||||
 | 
					        gemmlowp::SaturatingRoundingDoublingHighMul(
 | 
				
			||||||
 | 
					            input_value_on_hires_input_scale,
 | 
				
			||||||
 | 
					            params.output_multiplier_fixedpoint_int16);
 | 
				
			||||||
 | 
					    // Now compute the "relu-ish multiplier". In the (-3 <= x <= +3) case, that
 | 
				
			||||||
 | 
					    // is just an affine rescaling of x from [-3, 3] to [0, 1]. In the general
 | 
				
			||||||
 | 
					    // case, it is just that plus saturation at the boundaries of [-3, 3].
 | 
				
			||||||
 | 
					    // First, we rescale from [-3, 3] to [-1, 1], saturating.
 | 
				
			||||||
 | 
					    // That is done by rescaling the input value with a fixed-point multiplier
 | 
				
			||||||
 | 
					    // (reluish_multiplier_fixedpoint) and bit-shift such that we represent
 | 
				
			||||||
 | 
					    // that input value on the scale where the real value 3.0f is represented
 | 
				
			||||||
 | 
					    // by the quantized value 32768.  (+32768 is actually not representable as
 | 
				
			||||||
 | 
					    // int16_t, so this saturates at +32767, and that is seen empirically to be
 | 
				
			||||||
 | 
					    // a negligible contribution to numerical error/bias).
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    // This code is careful to correctly implement any magnitude of multiplier,
 | 
				
			||||||
 | 
					    // involving either a right shift or a left shift, with correct saturation
 | 
				
			||||||
 | 
					    // behavior in the left-shift case. This forces this code to be more
 | 
				
			||||||
 | 
					    // complicated, but is necessary for real applications: a partially
 | 
				
			||||||
 | 
					    // trained quantized MobileNet v3-small model that motivated this code
 | 
				
			||||||
 | 
					    // exhibits some large [min, max] range boundaries, of the order of
 | 
				
			||||||
 | 
					    // magnitude of 10 or 100 depending on layers.
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    // The next few lines are basically just an ordinary
 | 
				
			||||||
 | 
					    // MultiplyByQuantizedMultiplier, except that we are more careful here
 | 
				
			||||||
 | 
					    // about the fine details of saturation when left-shifting, because here
 | 
				
			||||||
 | 
					    // overflow in left-shift is a common case, not an anomaly as
 | 
				
			||||||
 | 
					    // MultiplyByQuantizedMultiplier assumes.
 | 
				
			||||||
 | 
					    int16_t reluish_value = input_value_on_hires_input_scale;
 | 
				
			||||||
 | 
					    // Shift left, saturating, as much as we can while ensuring that this
 | 
				
			||||||
 | 
					    // saturation will not contribute to the result. That is, left shift amount
 | 
				
			||||||
 | 
					    // reduced by 1.
 | 
				
			||||||
 | 
					    if (params.reluish_multiplier_exponent > 0) {
 | 
				
			||||||
 | 
					      reluish_value = SaturatingLeftShift(
 | 
				
			||||||
 | 
					          reluish_value, params.reluish_multiplier_exponent - 1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // Apply the fixed-point multiplier, dividing the value by a divisor
 | 
				
			||||||
 | 
					    // ranging in [1, 2].
 | 
				
			||||||
 | 
					    reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul(
 | 
				
			||||||
 | 
					        reluish_value, params.reluish_multiplier_fixedpoint_int16);
 | 
				
			||||||
 | 
					    // Apply the last bit of left-shift. Thus, in the left-shifting case, if
 | 
				
			||||||
 | 
					    // any saturation affects the result, it is happening here --- any
 | 
				
			||||||
 | 
					    // saturation having occurred above is overwritten here, not affecting the
 | 
				
			||||||
 | 
					    // result.
 | 
				
			||||||
 | 
					    if (params.reluish_multiplier_exponent > 0) {
 | 
				
			||||||
 | 
					      reluish_value = SaturatingLeftShift(reluish_value, 1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // Shift right, in the right-shifting case.
 | 
				
			||||||
 | 
					    if (params.reluish_multiplier_exponent < 0) {
 | 
				
			||||||
 | 
					      reluish_value = gemmlowp::RoundingDivideByPOT(
 | 
				
			||||||
 | 
					          reluish_value, -params.reluish_multiplier_exponent);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // At this point we have rescaled the value into a 16bit fixedpoint
 | 
				
			||||||
 | 
					    // reluish_value in [-1, 1].
 | 
				
			||||||
 | 
					    // We now convert that to a 16bit fixedpoint value in [0, 1].
 | 
				
			||||||
 | 
					    reluish_value = (reluish_value + (1 << 15)) >> 1;
 | 
				
			||||||
 | 
					    // Use of SaturatingDoublingHighMul here is important to cancel the biases
 | 
				
			||||||
 | 
					    // from the above SaturatingRoundingDoublingHighMul.
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    // On a partially trained MobileNet-v3-small,
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    //                                       | bias on    |  ImageNet
 | 
				
			||||||
 | 
					    //                                       | quantized  |  Top-1
 | 
				
			||||||
 | 
					    // Operation used here                   | values     |  accuracy (50k)
 | 
				
			||||||
 | 
					    // --------------------------------------+------------+-----------
 | 
				
			||||||
 | 
					    // SaturatingDoublingHighMul             | -0.0024    |  58.920
 | 
				
			||||||
 | 
					    // SaturatingRoundingDoublingHighMul     | -0.0067    |  58.064
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    // In activations_test, this is covered by this testcase:
 | 
				
			||||||
 | 
					    //     QuantizedActivationsOpTest.HardSwishBias
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    const int16_t preshift_output_value = SaturatingDoublingHighMul(
 | 
				
			||||||
 | 
					        reluish_value, input_value_on_preshift_output_scale);
 | 
				
			||||||
 | 
					    // We were so far operating on the pre-shift output scale. Now we finally
 | 
				
			||||||
 | 
					    // apply that output shift, arriving at the final output scale.
 | 
				
			||||||
 | 
					    int16_t output_value = gemmlowp::RoundingDivideByPOT(
 | 
				
			||||||
 | 
					        preshift_output_value, -params.output_multiplier_exponent);
 | 
				
			||||||
 | 
					    output_value += params.output_zero_point;
 | 
				
			||||||
 | 
					    output_value =
 | 
				
			||||||
 | 
					        std::min<int16_t>(output_value, std::numeric_limits<T>::max());
 | 
				
			||||||
 | 
					    output_value =
 | 
				
			||||||
 | 
					        std::max<int16_t>(output_value, std::numeric_limits<T>::min());
 | 
				
			||||||
 | 
					    output_data[i] = output_value;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,144 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <limits>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void CheckArithmeticParams(const ArithmeticParams& params) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  // Input offset is negative input zero point. Activation tensors are
 | 
				
			||||||
 | 
					  // asymmetric quantized so they span the full int8 range.
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void ElementWise(
 | 
				
			||||||
 | 
					    int size, const ArithmeticParams& params, const int8_t* input1_data,
 | 
				
			||||||
 | 
					    const int8_t* input2_data, int8_t* output_data,
 | 
				
			||||||
 | 
					    void (*check_arithmetic_params)(const ArithmeticParams&),
 | 
				
			||||||
 | 
					    int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
 | 
				
			||||||
 | 
					  CheckArithmeticParams(params);
 | 
				
			||||||
 | 
					  for (int i = 0; i < size; ++i) {
 | 
				
			||||||
 | 
					    output_data[i] = binary_func(input1_data[i], input2_data[i], params);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void BroadcastBinaryFunction4DSlow(
 | 
				
			||||||
 | 
					    const ArithmeticParams& params, const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					    const int8_t* input1_data, const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					    const int8_t* input2_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int8_t* output_data,
 | 
				
			||||||
 | 
					    void (*check_arithmetic_params)(const ArithmeticParams&),
 | 
				
			||||||
 | 
					    int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
 | 
				
			||||||
 | 
					                                      &desc2);
 | 
				
			||||||
 | 
					  const RuntimeShape extended_output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // In Tensorflow, the dimensions are canonically named (batch_number, row,
 | 
				
			||||||
 | 
					  // col, channel), with extents (batches, height, width, depth), with the
 | 
				
			||||||
 | 
					  // trailing dimension changing most rapidly (channels has the smallest stride,
 | 
				
			||||||
 | 
					  // typically 1 element).
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // In generated C code, we store arrays with the dimensions reversed. The
 | 
				
			||||||
 | 
					  // first dimension has smallest stride.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // We name our variables by their Tensorflow convention, but generate C code
 | 
				
			||||||
 | 
					  // nesting loops such that the innermost loop has the smallest stride for the
 | 
				
			||||||
 | 
					  // best cache behavior.
 | 
				
			||||||
 | 
					  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          output_data[Offset(extended_output_shape, b, y, x, c)] = binary_func(
 | 
				
			||||||
 | 
					              input1_data[SubscriptToIndex(desc1, b, y, x, c)],
 | 
				
			||||||
 | 
					              input2_data[SubscriptToIndex(desc2, b, y, x, c)], params);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) {
 | 
				
			||||||
 | 
					  const int32_t input1_val = params.input1_offset + x;
 | 
				
			||||||
 | 
					  const int32_t input2_val = params.input2_offset + y;
 | 
				
			||||||
 | 
					  const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
 | 
				
			||||||
 | 
					  const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
 | 
				
			||||||
 | 
					  const int32_t scaled_input1_val =
 | 
				
			||||||
 | 
					      MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					          shifted_input1_val, params.input1_multiplier, params.input1_shift);
 | 
				
			||||||
 | 
					  const int32_t scaled_input2_val =
 | 
				
			||||||
 | 
					      MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					          shifted_input2_val, params.input2_multiplier, params.input2_shift);
 | 
				
			||||||
 | 
					  const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
 | 
				
			||||||
 | 
					  const int32_t raw_output =
 | 
				
			||||||
 | 
					      MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					          raw_sum, params.output_multiplier, params.output_shift) +
 | 
				
			||||||
 | 
					      params.output_offset;
 | 
				
			||||||
 | 
					  const int32_t clamped_output =
 | 
				
			||||||
 | 
					      std::min(params.quantized_activation_max,
 | 
				
			||||||
 | 
					               std::max(params.quantized_activation_min, raw_output));
 | 
				
			||||||
 | 
					  return static_cast<int8_t>(clamped_output);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Element-wise add that can often be used for inner loop of broadcast add as
 | 
				
			||||||
 | 
					// well as the non-broadcast add.
 | 
				
			||||||
 | 
					inline void AddElementwise(int size, const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                           const int8_t* input1_data, const int8_t* input2_data,
 | 
				
			||||||
 | 
					                           int8_t* output_data) {
 | 
				
			||||||
 | 
					  ElementWise(size, params, input1_data, input2_data, output_data,
 | 
				
			||||||
 | 
					              CheckArithmeticParams, AddFunc);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Add(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input1_shape, const int8_t* input1_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& input2_shape, const int8_t* input2_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& output_shape, int8_t* output_data) {
 | 
				
			||||||
 | 
					  CheckArithmeticParams(params);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingElementsSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                               const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					                               const int8_t* input1_data,
 | 
				
			||||||
 | 
					                               const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					                               const int8_t* input2_data,
 | 
				
			||||||
 | 
					                               const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                               int8_t* output_data) {
 | 
				
			||||||
 | 
					  BroadcastBinaryFunction4DSlow(params, input1_shape, input1_data, input2_shape,
 | 
				
			||||||
 | 
					                                input2_data, output_shape, output_data,
 | 
				
			||||||
 | 
					                                CheckArithmeticParams, AddFunc);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,221 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Fixed-point per-channel-quantization convolution reference kernel.
 | 
				
			||||||
 | 
					inline void ConvPerChannel(
 | 
				
			||||||
 | 
					    const ConvParams& params, const int32_t* output_multiplier,
 | 
				
			||||||
 | 
					    const int32_t* output_shift, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const int8_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const int8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int8_t* output_data) {
 | 
				
			||||||
 | 
					  // Get parameters.
 | 
				
			||||||
 | 
					  const int32_t input_offset = params.input_offset;  // r = s(q - Z)
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					  const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					  const int32_t output_offset = params.output_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Set min and max value of the output.
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Consistency check.
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
 | 
				
			||||||
 | 
					  if (bias_data) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Check dimensions of the tensors.
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
 | 
				
			||||||
 | 
					          int32_t acc = 0;
 | 
				
			||||||
 | 
					          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					            const int in_y = in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              // Zero padding by omitting the areas outside the image.
 | 
				
			||||||
 | 
					              const bool is_point_inside_image =
 | 
				
			||||||
 | 
					                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                  (in_y < input_height);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              if (!is_point_inside_image) {
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					                int32_t input_val = input_data[Offset(input_shape, batch, in_y,
 | 
				
			||||||
 | 
					                                                      in_x, in_channel)];
 | 
				
			||||||
 | 
					                int32_t filter_val = filter_data[Offset(
 | 
				
			||||||
 | 
					                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
 | 
				
			||||||
 | 
					                // Accumulate with 32 bits accumulator.
 | 
				
			||||||
 | 
					                // In the nudging process during model quantization, we force
 | 
				
			||||||
 | 
					                // real value of 0.0 be represented by a quantized value. This
 | 
				
			||||||
 | 
					                // guarantees that the input_offset is a int8_t, even though
 | 
				
			||||||
 | 
					                // it is represented using int32_t. int32_t += int8_t *
 | 
				
			||||||
 | 
					                // (int8_t - int8_t) so the highest value we can get from each
 | 
				
			||||||
 | 
					                // accumulation is [-127, 127] * ([-128, 127] -
 | 
				
			||||||
 | 
					                // [-128, 127]), which is [-32512, 32512]. log2(32512)
 | 
				
			||||||
 | 
					                // = 14.98, which means we can accumulate at least 2^16
 | 
				
			||||||
 | 
					                // multiplications without overflow. The accumulator is
 | 
				
			||||||
 | 
					                // applied to a filter so the accumulation logic will hold as
 | 
				
			||||||
 | 
					                // long as the filter size (filter_y * filter_x * in_channel)
 | 
				
			||||||
 | 
					                // does not exceed 2^16, which is the case in all the models
 | 
				
			||||||
 | 
					                // we have seen so far.
 | 
				
			||||||
 | 
					                // TODO(b/174275578): Add a check to make sure the
 | 
				
			||||||
 | 
					                // accumulator depth is smaller than 2^16.
 | 
				
			||||||
 | 
					                acc += filter_val * (input_val + input_offset);
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          if (bias_data) {
 | 
				
			||||||
 | 
					            acc += bias_data[out_channel];
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          acc = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					              acc, output_multiplier[out_channel], output_shift[out_channel]);
 | 
				
			||||||
 | 
					          acc += output_offset;
 | 
				
			||||||
 | 
					          acc = std::max(acc, output_activation_min);
 | 
				
			||||||
 | 
					          acc = std::min(acc, output_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
 | 
				
			||||||
 | 
					              static_cast<int8_t>(acc);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Fixed-point per-channel-quantization convolution reference kernel.
 | 
				
			||||||
 | 
					// 16-bit data and 8-bit filter
 | 
				
			||||||
 | 
					inline void ConvPerChannel(
 | 
				
			||||||
 | 
					    const ConvParams& params, const int32_t* output_multiplier,
 | 
				
			||||||
 | 
					    const int32_t* output_shift, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const int16_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const int8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const std::int64_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int16_t* output_data) {
 | 
				
			||||||
 | 
					  // Get parameters.
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					  const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Set min and max value of the output.
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Consistency check.
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
 | 
				
			||||||
 | 
					  if (bias_data) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Check dimensions of the tensors.
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
 | 
				
			||||||
 | 
					          std::int64_t acc = 0;
 | 
				
			||||||
 | 
					          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					            const int in_y = in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              // Zero padding by omitting the areas outside the image.
 | 
				
			||||||
 | 
					              const bool is_point_inside_image =
 | 
				
			||||||
 | 
					                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                  (in_y < input_height);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              if (!is_point_inside_image) {
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					                int32_t input_val = input_data[Offset(input_shape, batch, in_y,
 | 
				
			||||||
 | 
					                                                      in_x, in_channel)];
 | 
				
			||||||
 | 
					                int32_t filter_val = filter_data[Offset(
 | 
				
			||||||
 | 
					                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
 | 
				
			||||||
 | 
					                // Accumulate with 64 bits accumulator.
 | 
				
			||||||
 | 
					                // int64_t += int8_t * int16_t so the highest value we can
 | 
				
			||||||
 | 
					                // get from each accumulation is [-127, 127] * ([-32768,
 | 
				
			||||||
 | 
					                // 32767] -
 | 
				
			||||||
 | 
					                // [-32768, 32767]), which is [-8322945, 8322945].
 | 
				
			||||||
 | 
					                // log2(8322945) = 22.99.
 | 
				
			||||||
 | 
					                acc += filter_val * input_val;
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          if (bias_data) {
 | 
				
			||||||
 | 
					            acc += bias_data[out_channel];
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          int32_t scaled_acc = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					              acc, output_multiplier[out_channel], output_shift[out_channel]);
 | 
				
			||||||
 | 
					          scaled_acc = std::max(scaled_acc, output_activation_min);
 | 
				
			||||||
 | 
					          scaled_acc = std::min(scaled_acc, output_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
 | 
				
			||||||
 | 
					              static_cast<int16_t>(scaled_acc);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,289 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					inline void DepthwiseConvPerChannel(
 | 
				
			||||||
 | 
					    const DepthwiseParams& params, const int32_t* output_multiplier,
 | 
				
			||||||
 | 
					    const int32_t* output_shift, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const int8_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const int8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int8_t* output_data) {
 | 
				
			||||||
 | 
					  // Get parameters.
 | 
				
			||||||
 | 
					  // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					  const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					  const int depth_multiplier = params.depth_multiplier;
 | 
				
			||||||
 | 
					  const int32_t input_offset = params.input_offset;
 | 
				
			||||||
 | 
					  const int32_t output_offset = params.output_offset;
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Check dimensions of the tensors.
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int input_depth = input_shape.Dims(3);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					          for (int m = 0; m < depth_multiplier; ++m) {
 | 
				
			||||||
 | 
					            const int output_channel = m + in_channel * depth_multiplier;
 | 
				
			||||||
 | 
					            const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					            const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					            int32_t acc = 0;
 | 
				
			||||||
 | 
					            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					                const int in_x = in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					                const int in_y =
 | 
				
			||||||
 | 
					                    in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					                // Zero padding by omitting the areas outside the image.
 | 
				
			||||||
 | 
					                const bool is_point_inside_image =
 | 
				
			||||||
 | 
					                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                    (in_y < input_height);
 | 
				
			||||||
 | 
					                if (is_point_inside_image) {
 | 
				
			||||||
 | 
					                  int32_t input_val = input_data[Offset(
 | 
				
			||||||
 | 
					                      input_shape, batch, in_y, in_x, in_channel)];
 | 
				
			||||||
 | 
					                  int32_t filter_val = filter_data[Offset(
 | 
				
			||||||
 | 
					                      filter_shape, 0, filter_y, filter_x, output_channel)];
 | 
				
			||||||
 | 
					                  // Accumulate with 32 bits accumulator.
 | 
				
			||||||
 | 
					                  // In the nudging process during model quantization, we force
 | 
				
			||||||
 | 
					                  // real value of 0.0 be represented by a quantized value. This
 | 
				
			||||||
 | 
					                  // guarantees that the input_offset is a int8_t, even though
 | 
				
			||||||
 | 
					                  // it is represented using int32_t. int32_t += int8_t *
 | 
				
			||||||
 | 
					                  // (int8_t - int8_t) so the highest value we can get from each
 | 
				
			||||||
 | 
					                  // accumulation is [-127, 127] * ([-128, 127] -
 | 
				
			||||||
 | 
					                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
 | 
				
			||||||
 | 
					                  // = 14.98, which means we can accumulate at least 2^16
 | 
				
			||||||
 | 
					                  // multiplications without overflow. The accumulator is
 | 
				
			||||||
 | 
					                  // applied to a filter so the accumulation logic will hold as
 | 
				
			||||||
 | 
					                  // long as the filter size (filter_y * filter_x * in_channel)
 | 
				
			||||||
 | 
					                  // does not exceed 2^16, which is the case in all the models
 | 
				
			||||||
 | 
					                  // we have seen so far.
 | 
				
			||||||
 | 
					                  // TODO(b/174275578): Add a check to make sure the
 | 
				
			||||||
 | 
					                  // accumulator depth is smaller than 2^16.
 | 
				
			||||||
 | 
					                  acc += filter_val * (input_val + input_offset);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if (bias_data) {
 | 
				
			||||||
 | 
					              acc += bias_data[output_channel];
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            acc = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					                acc, output_multiplier[output_channel],
 | 
				
			||||||
 | 
					                output_shift[output_channel]);
 | 
				
			||||||
 | 
					            acc += output_offset;
 | 
				
			||||||
 | 
					            acc = std::max(acc, output_activation_min);
 | 
				
			||||||
 | 
					            acc = std::min(acc, output_activation_max);
 | 
				
			||||||
 | 
					            output_data[Offset(output_shape, batch, out_y, out_x,
 | 
				
			||||||
 | 
					                               output_channel)] = static_cast<int8_t>(acc);
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void DepthwiseConvPerChannel(
 | 
				
			||||||
 | 
					    const DepthwiseParams& params, const int32_t* output_multiplier,
 | 
				
			||||||
 | 
					    const int32_t* output_shift, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const int16_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const int8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const std::int64_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int16_t* output_data) {
 | 
				
			||||||
 | 
					  // Get parameters.
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					  const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					  const int depth_multiplier = params.depth_multiplier;
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Check dimensions of the tensors.
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int input_depth = input_shape.Dims(3);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					          for (int m = 0; m < depth_multiplier; ++m) {
 | 
				
			||||||
 | 
					            const int output_channel = m + in_channel * depth_multiplier;
 | 
				
			||||||
 | 
					            const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					            const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					            std::int64_t acc = 0;
 | 
				
			||||||
 | 
					            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					                const int in_x = in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					                const int in_y =
 | 
				
			||||||
 | 
					                    in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					                // Zero padding by omitting the areas outside the image.
 | 
				
			||||||
 | 
					                const bool is_point_inside_image =
 | 
				
			||||||
 | 
					                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                    (in_y < input_height);
 | 
				
			||||||
 | 
					                if (is_point_inside_image) {
 | 
				
			||||||
 | 
					                  int32_t input_val = input_data[Offset(
 | 
				
			||||||
 | 
					                      input_shape, batch, in_y, in_x, in_channel)];
 | 
				
			||||||
 | 
					                  int32_t filter_val = filter_data[Offset(
 | 
				
			||||||
 | 
					                      filter_shape, 0, filter_y, filter_x, output_channel)];
 | 
				
			||||||
 | 
					                  // Accumulate with 64 bits accumulator.
 | 
				
			||||||
 | 
					                  // We assume maximum of 2^16 accumulations as with the 8-bit
 | 
				
			||||||
 | 
					                  // case so actually the value in the accumulator should not
 | 
				
			||||||
 | 
					                  // exceed 40 bits
 | 
				
			||||||
 | 
					                  acc += static_cast<int64_t>(filter_val) *
 | 
				
			||||||
 | 
					                         static_cast<int64_t>(input_val);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if (bias_data) {
 | 
				
			||||||
 | 
					              acc += bias_data[output_channel];
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            int32_t scaled_acc = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					                acc, output_multiplier[output_channel],
 | 
				
			||||||
 | 
					                output_shift[output_channel]);
 | 
				
			||||||
 | 
					            scaled_acc = std::max(scaled_acc, output_activation_min);
 | 
				
			||||||
 | 
					            scaled_acc = std::min(scaled_acc, output_activation_max);
 | 
				
			||||||
 | 
					            output_data[Offset(output_shape, batch, out_y, out_x,
 | 
				
			||||||
 | 
					                               output_channel)] =
 | 
				
			||||||
 | 
					                static_cast<int16_t>(scaled_acc);
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void DepthwiseConvHybridPerChannel(
 | 
				
			||||||
 | 
					    const DepthwiseParams& params, float* scaling_factors_ptr,
 | 
				
			||||||
 | 
					    const RuntimeShape& input_shape, const int8_t* input_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& filter_shape, const int8_t* filter_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& bias_shape, const float* bias_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& output_shape, float* output_data,
 | 
				
			||||||
 | 
					    const float* per_channel_scale, int32_t* input_offset) {
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int dilation_width_factor = params.dilation_width_factor;
 | 
				
			||||||
 | 
					  const int dilation_height_factor = params.dilation_height_factor;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					  const int depth_multiplier = params.depth_multiplier;
 | 
				
			||||||
 | 
					  const float output_activation_min = params.float_activation_min;
 | 
				
			||||||
 | 
					  const float output_activation_max = params.float_activation_max;
 | 
				
			||||||
 | 
					  // Check dimensions of the tensors.
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int input_depth = input_shape.Dims(3);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int bias_depth = bias_shape.FlatSize();
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(bias_depth, output_depth);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					          for (int m = 0; m < depth_multiplier; ++m) {
 | 
				
			||||||
 | 
					            const int output_channel = m + in_channel * depth_multiplier;
 | 
				
			||||||
 | 
					            const int in_x_origin = (out_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					            const int in_y_origin = (out_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					            int32_t acc = 0;
 | 
				
			||||||
 | 
					            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					                const int in_x = in_x_origin + dilation_width_factor * filter_x;
 | 
				
			||||||
 | 
					                const int in_y =
 | 
				
			||||||
 | 
					                    in_y_origin + dilation_height_factor * filter_y;
 | 
				
			||||||
 | 
					                // Zero padding by omitting the areas outside the image.
 | 
				
			||||||
 | 
					                const bool is_point_inside_image =
 | 
				
			||||||
 | 
					                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
 | 
				
			||||||
 | 
					                    (in_y < input_height);
 | 
				
			||||||
 | 
					                if (is_point_inside_image) {
 | 
				
			||||||
 | 
					                  int32_t input_val = input_data[Offset(
 | 
				
			||||||
 | 
					                      input_shape, batch, in_y, in_x, in_channel)];
 | 
				
			||||||
 | 
					                  int32_t filter_val = filter_data[Offset(
 | 
				
			||||||
 | 
					                      filter_shape, 0, filter_y, filter_x, output_channel)];
 | 
				
			||||||
 | 
					                  acc += filter_val * (input_val - input_offset[batch]);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            float acc_float = static_cast<float>(acc);
 | 
				
			||||||
 | 
					            acc_float *=
 | 
				
			||||||
 | 
					                per_channel_scale[output_channel] * scaling_factors_ptr[batch];
 | 
				
			||||||
 | 
					            if (bias_data && output_channel < bias_depth) {
 | 
				
			||||||
 | 
					              acc_float += bias_data[output_channel];
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            output_data[Offset(output_shape, batch, out_y, out_x,
 | 
				
			||||||
 | 
					                               output_channel)] =
 | 
				
			||||||
 | 
					                ActivationFunctionWithMinMax(acc_float, output_activation_min,
 | 
				
			||||||
 | 
					                                             output_activation_max);
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,108 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void FullyConnected(
 | 
				
			||||||
 | 
					    const FullyConnectedParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const int8_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const int8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int8_t* output_data) {
 | 
				
			||||||
 | 
					  const int32_t input_offset = params.input_offset;
 | 
				
			||||||
 | 
					  const int32_t filter_offset = params.weights_offset;
 | 
				
			||||||
 | 
					  const int32_t output_offset = params.output_offset;
 | 
				
			||||||
 | 
					  const int32_t output_multiplier = params.output_multiplier;
 | 
				
			||||||
 | 
					  const int output_shift = params.output_shift;
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					  const int filter_dim_count = filter_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int batches = output_shape.Dims(0);
 | 
				
			||||||
 | 
					  const int output_depth = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
 | 
				
			||||||
 | 
					  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
 | 
				
			||||||
 | 
					  for (int b = 0; b < batches; ++b) {
 | 
				
			||||||
 | 
					    for (int out_c = 0; out_c < output_depth; ++out_c) {
 | 
				
			||||||
 | 
					      int32_t acc = 0;
 | 
				
			||||||
 | 
					      for (int d = 0; d < accum_depth; ++d) {
 | 
				
			||||||
 | 
					        int32_t input_val = input_data[b * accum_depth + d];
 | 
				
			||||||
 | 
					        int32_t filter_val = filter_data[out_c * accum_depth + d];
 | 
				
			||||||
 | 
					        acc += (filter_val + filter_offset) * (input_val + input_offset);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      if (bias_data) {
 | 
				
			||||||
 | 
					        acc += bias_data[out_c];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
 | 
				
			||||||
 | 
					      acc += output_offset;
 | 
				
			||||||
 | 
					      acc = std::max(acc, output_activation_min);
 | 
				
			||||||
 | 
					      acc = std::min(acc, output_activation_max);
 | 
				
			||||||
 | 
					      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void FullyConnected(
 | 
				
			||||||
 | 
					    const FullyConnectedParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const int16_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const int8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const int64_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int16_t* output_data) {
 | 
				
			||||||
 | 
					  const int32_t filter_offset = params.weights_offset;
 | 
				
			||||||
 | 
					  const int32_t output_multiplier = params.output_multiplier;
 | 
				
			||||||
 | 
					  const int output_shift = params.output_shift;
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					  const int filter_dim_count = filter_shape.DimensionsCount();
 | 
				
			||||||
 | 
					  const int batches = output_shape.Dims(0);
 | 
				
			||||||
 | 
					  const int output_depth = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
 | 
				
			||||||
 | 
					  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
 | 
				
			||||||
 | 
					  for (int b = 0; b < batches; ++b) {
 | 
				
			||||||
 | 
					    for (int out_c = 0; out_c < output_depth; ++out_c) {
 | 
				
			||||||
 | 
					      int64_t acc = 0;
 | 
				
			||||||
 | 
					      for (int d = 0; d < accum_depth; ++d) {
 | 
				
			||||||
 | 
					        int32_t input_val = input_data[b * accum_depth + d];
 | 
				
			||||||
 | 
					        int32_t filter_val = filter_data[out_c * accum_depth + d];
 | 
				
			||||||
 | 
					        acc += (filter_val + filter_offset) * input_val;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      if (bias_data) {
 | 
				
			||||||
 | 
					        acc += bias_data[out_c];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      int32_t acc_scaled =
 | 
				
			||||||
 | 
					          MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
 | 
				
			||||||
 | 
					      acc_scaled = std::max(acc_scaled, output_activation_min);
 | 
				
			||||||
 | 
					      acc_scaled = std::min(acc_scaled, output_activation_max);
 | 
				
			||||||
 | 
					      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,65 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
 | 
				
			||||||
 | 
					                            int32_t depth, const int8_t* input_data,
 | 
				
			||||||
 | 
					                            int8_t* output_data) {
 | 
				
			||||||
 | 
					  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
 | 
				
			||||||
 | 
					  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
 | 
				
			||||||
 | 
					  // The output scale must be in sync with Prepare().
 | 
				
			||||||
 | 
					  // Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
 | 
				
			||||||
 | 
					  // to [-1, 127/128].
 | 
				
			||||||
 | 
					  static constexpr int32_t kOutputScale = 7;
 | 
				
			||||||
 | 
					  for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
 | 
				
			||||||
 | 
					    // int32_t = (int8_t - int8_t) ^ 2.
 | 
				
			||||||
 | 
					    // ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
 | 
				
			||||||
 | 
					    // safe from overflowing in at least 2^16 steps.
 | 
				
			||||||
 | 
					    int32_t acc = 0;
 | 
				
			||||||
 | 
					    for (int inner_index = 0; inner_index < depth; ++inner_index) {
 | 
				
			||||||
 | 
					      int32_t input =
 | 
				
			||||||
 | 
					          input_data[depth * outer_index + inner_index] - input_zero_point;
 | 
				
			||||||
 | 
					      acc += input * input;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    int32_t inv_l2norm_multiplier;
 | 
				
			||||||
 | 
					    int inv_l2norm_shift;
 | 
				
			||||||
 | 
					    GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
 | 
				
			||||||
 | 
					                                     &inv_l2norm_shift);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int inner_index = 0; inner_index < depth; ++inner_index) {
 | 
				
			||||||
 | 
					      int32_t input =
 | 
				
			||||||
 | 
					          input_data[depth * outer_index + inner_index] - input_zero_point;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Rescale and downcast. Rescale is folded into the division.
 | 
				
			||||||
 | 
					      int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					          input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
 | 
				
			||||||
 | 
					      output_in_q24 =
 | 
				
			||||||
 | 
					          std::min(static_cast<int32_t>(kMaxInt8),
 | 
				
			||||||
 | 
					                   std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
 | 
				
			||||||
 | 
					      output_data[depth * outer_index + inner_index] =
 | 
				
			||||||
 | 
					          static_cast<int8_t>(output_in_q24);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,106 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <limits>
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
 | 
				
			||||||
 | 
					                     int32_t input_multiplier, int32_t input_left_shift,
 | 
				
			||||||
 | 
					                     int32_t input_size, const int8_t* input_data,
 | 
				
			||||||
 | 
					                     int8_t* output_data) {
 | 
				
			||||||
 | 
					  // Integer bits must be in sync with Prepare() function.
 | 
				
			||||||
 | 
					  static constexpr int32_t kInputIntegerBits = 4;
 | 
				
			||||||
 | 
					  static constexpr int32_t kOutputIntegerBits = 8;
 | 
				
			||||||
 | 
					  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
 | 
				
			||||||
 | 
					  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
 | 
				
			||||||
 | 
					  static constexpr int32_t kOutputZeroPoint = -128;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < input_size; ++i) {
 | 
				
			||||||
 | 
					    const int32_t input =
 | 
				
			||||||
 | 
					        static_cast<int32_t>(input_data[i]) - input_zero_point;
 | 
				
			||||||
 | 
					    if (input <= -input_range_radius) {
 | 
				
			||||||
 | 
					      output_data[i] = kMinInt8;
 | 
				
			||||||
 | 
					    } else if (input >= input_range_radius) {
 | 
				
			||||||
 | 
					      output_data[i] = kMaxInt8;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					          input, input_multiplier, input_left_shift);
 | 
				
			||||||
 | 
					      using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
 | 
				
			||||||
 | 
					      const int32_t output_in_q0 =
 | 
				
			||||||
 | 
					          gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Rescale and downcast.
 | 
				
			||||||
 | 
					      using gemmlowp::RoundingDivideByPOT;
 | 
				
			||||||
 | 
					      int32_t output_in_q23 =
 | 
				
			||||||
 | 
					          RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
 | 
				
			||||||
 | 
					      output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
 | 
				
			||||||
 | 
					                                        static_cast<int32_t>(kMinInt8)),
 | 
				
			||||||
 | 
					                               static_cast<int32_t>(kMaxInt8));
 | 
				
			||||||
 | 
					      output_data[i] = static_cast<int8_t>(output_in_q23);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Logistic(int32_t input_multiplier, int32_t input_size,
 | 
				
			||||||
 | 
					                     const int16_t* ptr_input_data, int16_t* ptr_output_data) {
 | 
				
			||||||
 | 
					  // We use the LUT for sigmoid and take into account, that
 | 
				
			||||||
 | 
					  // tanh(x) = 2*sigmoid(2*x) - 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
 | 
				
			||||||
 | 
					    int32_t input_data = (*ptr_input_data) * input_data_mul;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7] and
 | 
				
			||||||
 | 
					    // we do interpolation on unsigned values.
 | 
				
			||||||
 | 
					    uint32_t abs_input_data = 3 * abs(input_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // We divide by 2 power of 9, because
 | 
				
			||||||
 | 
					    // we need to divide by 2 in power of 7 for
 | 
				
			||||||
 | 
					    // the input conversion + 1/4 from the scale above.
 | 
				
			||||||
 | 
					    // Define uh as uint32_t type not to make this function overflow.
 | 
				
			||||||
 | 
					    uint32_t uh = abs_input_data >> 9;
 | 
				
			||||||
 | 
					    uint32_t result;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (uh >= 255) {
 | 
				
			||||||
 | 
					      // Saturate to maximum.
 | 
				
			||||||
 | 
					      result = 0x7FFF << 10;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      uint32_t ua = sigmoid_table_uint16[uh];
 | 
				
			||||||
 | 
					      uint32_t ub = sigmoid_table_uint16[uh + 1];
 | 
				
			||||||
 | 
					      uint32_t ut = abs_input_data & 0x1ff;
 | 
				
			||||||
 | 
					      // Interpolation is done using the fractional bit.
 | 
				
			||||||
 | 
					      result = (ua << 9) + ut * (ub - ua);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    result = (input_data >= 0) ? (result + (1 << 9))
 | 
				
			||||||
 | 
					                               : ((1 << (16 + 9)) - result + (1 << 9) - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Back to 16-bit.
 | 
				
			||||||
 | 
					    result >>= 10;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    *ptr_output_data = result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,77 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename integer_type>
 | 
				
			||||||
 | 
					inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
 | 
				
			||||||
 | 
					                 int32_t shift, const RuntimeShape& unextended_input_shape,
 | 
				
			||||||
 | 
					                 const integer_type* input_data, int32_t input_zero_point,
 | 
				
			||||||
 | 
					                 const RuntimeShape& unextended_output_shape,
 | 
				
			||||||
 | 
					                 integer_type* output_data, int32_t output_zero_point) {
 | 
				
			||||||
 | 
					  // Current implementation only supports dimension equals 4 and simultaneous
 | 
				
			||||||
 | 
					  // reduction over width and height.
 | 
				
			||||||
 | 
					  TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const RuntimeShape input_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, unextended_input_shape);
 | 
				
			||||||
 | 
					  const RuntimeShape output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, unextended_output_shape);
 | 
				
			||||||
 | 
					  const int output_batch = output_shape.Dims(0);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_depth = output_shape.Dims(3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int num_elements_in_axis = input_width * input_height;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_CHECK_EQ(op_params.axis_count, 2);
 | 
				
			||||||
 | 
					  TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
 | 
				
			||||||
 | 
					               (op_params.axis[0] == 2 && op_params.axis[1] == 1));
 | 
				
			||||||
 | 
					  TFLITE_CHECK_EQ(output_height, 1);
 | 
				
			||||||
 | 
					  TFLITE_CHECK_EQ(output_width, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static constexpr int32_t kMinInt = std::numeric_limits<integer_type>::min();
 | 
				
			||||||
 | 
					  static constexpr int32_t kMaxInt = std::numeric_limits<integer_type>::max();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int out_b = 0; out_b < output_batch; ++out_b) {
 | 
				
			||||||
 | 
					    for (int out_d = 0; out_d < output_depth; ++out_d) {
 | 
				
			||||||
 | 
					      int32_t acc = 0;
 | 
				
			||||||
 | 
					      for (int in_h = 0; in_h < input_height; ++in_h) {
 | 
				
			||||||
 | 
					        for (int in_w = 0; in_w < input_width; ++in_w) {
 | 
				
			||||||
 | 
					          acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] -
 | 
				
			||||||
 | 
					                 input_zero_point;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
 | 
				
			||||||
 | 
					      acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
 | 
				
			||||||
 | 
					                    : (acc - num_elements_in_axis / 2) / num_elements_in_axis;
 | 
				
			||||||
 | 
					      acc += output_zero_point;
 | 
				
			||||||
 | 
					      acc = std::min(std::max(acc, kMinInt), kMaxInt);
 | 
				
			||||||
 | 
					      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
 | 
				
			||||||
 | 
					          static_cast<integer_type>(acc);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,131 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "fixedpoint/fixedpoint.h"
 | 
				
			||||||
 | 
					#include "ruy/profiler/instrumentation.h"  // from @ruy
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void MulElementwise(int size, const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                           const T* input1_data, const T* input2_data,
 | 
				
			||||||
 | 
					                           T* output_data) {
 | 
				
			||||||
 | 
					  for (int i = 0; i < size; ++i) {
 | 
				
			||||||
 | 
					    const int32_t input1_val = params.input1_offset + input1_data[i];
 | 
				
			||||||
 | 
					    const int32_t input2_val = params.input2_offset + input2_data[i];
 | 
				
			||||||
 | 
					    const int32_t unclamped_result =
 | 
				
			||||||
 | 
					        params.output_offset +
 | 
				
			||||||
 | 
					        MultiplyByQuantizedMultiplier(input1_val * input2_val,
 | 
				
			||||||
 | 
					                                      params.output_multiplier,
 | 
				
			||||||
 | 
					                                      params.output_shift);
 | 
				
			||||||
 | 
					    const int32_t clamped_output =
 | 
				
			||||||
 | 
					        std::min(params.quantized_activation_max,
 | 
				
			||||||
 | 
					                 std::max(params.quantized_activation_min, unclamped_result));
 | 
				
			||||||
 | 
					    output_data[i] = static_cast<T>(clamped_output);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void Mul(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input1_shape, const T* input1_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& input2_shape, const T* input2_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& output_shape, T* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  ruy::profiler::ScopeLabel label("Mul/8bit");
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingElementsSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Mul with 16 bit inputs and int8_t outputs.
 | 
				
			||||||
 | 
					inline void Mul(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input1_shape, const int16_t* input1_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& input2_shape, const int16_t* input2_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& output_shape, int8_t* output_data) {
 | 
				
			||||||
 | 
					  ruy::profiler::ScopeLabel label("Mul/Int16Int8");
 | 
				
			||||||
 | 
					  int32_t output_offset = params.output_offset;
 | 
				
			||||||
 | 
					  int32_t output_activation_min = params.quantized_activation_min;
 | 
				
			||||||
 | 
					  int32_t output_activation_max = params.quantized_activation_max;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingElementsSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; i++) {
 | 
				
			||||||
 | 
					    // F0 uses 0 integer bits, range [-1, 1].
 | 
				
			||||||
 | 
					    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    F0 unclamped_result =
 | 
				
			||||||
 | 
					        F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
 | 
				
			||||||
 | 
					    int16_t rescaled_result =
 | 
				
			||||||
 | 
					        gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
 | 
				
			||||||
 | 
					    int16_t clamped_result = std::min<int16_t>(
 | 
				
			||||||
 | 
					        output_activation_max - output_offset, rescaled_result);
 | 
				
			||||||
 | 
					    clamped_result = std::max<int16_t>(output_activation_min - output_offset,
 | 
				
			||||||
 | 
					                                       clamped_result);
 | 
				
			||||||
 | 
					    output_data[i] = output_offset + clamped_result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void BroadcastMul4DSlow(
 | 
				
			||||||
 | 
					    const ArithmeticParams& params, const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					    const T* input1_data, const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
 | 
				
			||||||
 | 
					  ruy::profiler::ScopeLabel label("BroadcastMul4DSlow");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					  // The input shapes are extended as part of NdArrayDesc initialization.
 | 
				
			||||||
 | 
					  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
 | 
				
			||||||
 | 
					                                      &desc2);
 | 
				
			||||||
 | 
					  const RuntimeShape extended_output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          const int32_t input1_val =
 | 
				
			||||||
 | 
					              params.input1_offset +
 | 
				
			||||||
 | 
					              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
 | 
				
			||||||
 | 
					          const int32_t input2_val =
 | 
				
			||||||
 | 
					              params.input2_offset +
 | 
				
			||||||
 | 
					              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
 | 
				
			||||||
 | 
					          const int32_t unclamped_result =
 | 
				
			||||||
 | 
					              params.output_offset +
 | 
				
			||||||
 | 
					              MultiplyByQuantizedMultiplier(input1_val * input2_val,
 | 
				
			||||||
 | 
					                                            params.output_multiplier,
 | 
				
			||||||
 | 
					                                            params.output_shift);
 | 
				
			||||||
 | 
					          const int32_t clamped_output = std::min(
 | 
				
			||||||
 | 
					              params.quantized_activation_max,
 | 
				
			||||||
 | 
					              std::max(params.quantized_activation_min, unclamped_result));
 | 
				
			||||||
 | 
					          output_data[Offset(extended_output_shape, b, y, x, c)] =
 | 
				
			||||||
 | 
					              static_cast<T>(clamped_output);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,258 @@
 | 
				
			||||||
 | 
					/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <limits>
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void AveragePool(const PoolParams& params,
 | 
				
			||||||
 | 
					                        const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                        const int8_t* input_data,
 | 
				
			||||||
 | 
					                        const RuntimeShape& output_shape, int8_t* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int channel = 0; channel < depth; ++channel) {
 | 
				
			||||||
 | 
					          const int in_x_origin =
 | 
				
			||||||
 | 
					              (out_x * stride_width) - params.padding_values.width;
 | 
				
			||||||
 | 
					          const int in_y_origin =
 | 
				
			||||||
 | 
					              (out_y * stride_height) - params.padding_values.height;
 | 
				
			||||||
 | 
					          // Compute the boundaries of the filter region clamped so as to
 | 
				
			||||||
 | 
					          // ensure that the filter window fits in the input array.
 | 
				
			||||||
 | 
					          const int filter_x_start = std::max(0, -in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_x_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_width, input_width - in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_y_start = std::max(0, -in_y_origin);
 | 
				
			||||||
 | 
					          const int filter_y_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_height, input_height - in_y_origin);
 | 
				
			||||||
 | 
					          int32_t acc = 0;
 | 
				
			||||||
 | 
					          int filter_count = 0;
 | 
				
			||||||
 | 
					          for (int filter_y = filter_y_start; filter_y < filter_y_end;
 | 
				
			||||||
 | 
					               ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = filter_x_start; filter_x < filter_x_end;
 | 
				
			||||||
 | 
					                 ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + filter_x;
 | 
				
			||||||
 | 
					              const int in_y = in_y_origin + filter_y;
 | 
				
			||||||
 | 
					              acc +=
 | 
				
			||||||
 | 
					                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
 | 
				
			||||||
 | 
					              filter_count++;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          // Round to the closest integer value.
 | 
				
			||||||
 | 
					          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
 | 
				
			||||||
 | 
					                        : (acc - filter_count / 2) / filter_count;
 | 
				
			||||||
 | 
					          acc = std::max(acc, params.quantized_activation_min);
 | 
				
			||||||
 | 
					          acc = std::min(acc, params.quantized_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
 | 
				
			||||||
 | 
					              static_cast<int8_t>(acc);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                    const int8_t* input_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                    int8_t* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   std::numeric_limits<int8_t>::min());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_max,
 | 
				
			||||||
 | 
					                   std::numeric_limits<int8_t>::max());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int channel = 0; channel < depth; ++channel) {
 | 
				
			||||||
 | 
					          const int in_x_origin =
 | 
				
			||||||
 | 
					              (out_x * stride_width) - params.padding_values.width;
 | 
				
			||||||
 | 
					          const int in_y_origin =
 | 
				
			||||||
 | 
					              (out_y * stride_height) - params.padding_values.height;
 | 
				
			||||||
 | 
					          // Compute the boundaries of the filter region clamped so as to
 | 
				
			||||||
 | 
					          // ensure that the filter window fits in the input array.
 | 
				
			||||||
 | 
					          const int filter_x_start = std::max(0, -in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_x_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_width, input_width - in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_y_start = std::max(0, -in_y_origin);
 | 
				
			||||||
 | 
					          const int filter_y_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_height, input_height - in_y_origin);
 | 
				
			||||||
 | 
					          int8_t max = std::numeric_limits<int8_t>::lowest();
 | 
				
			||||||
 | 
					          for (int filter_y = filter_y_start; filter_y < filter_y_end;
 | 
				
			||||||
 | 
					               ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = filter_x_start; filter_x < filter_x_end;
 | 
				
			||||||
 | 
					                 ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + filter_x;
 | 
				
			||||||
 | 
					              const int in_y = in_y_origin + filter_y;
 | 
				
			||||||
 | 
					              max = std::max(
 | 
				
			||||||
 | 
					                  max,
 | 
				
			||||||
 | 
					                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          max = std::max<int8_t>(max, params.quantized_activation_min);
 | 
				
			||||||
 | 
					          max = std::min<int8_t>(max, params.quantized_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
 | 
				
			||||||
 | 
					              static_cast<int8_t>(max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void AveragePool(const PoolParams& params,
 | 
				
			||||||
 | 
					                        const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                        const int16_t* input_data,
 | 
				
			||||||
 | 
					                        const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                        int16_t* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int channel = 0; channel < depth; ++channel) {
 | 
				
			||||||
 | 
					          const int in_x_origin =
 | 
				
			||||||
 | 
					              (out_x * stride_width) - params.padding_values.width;
 | 
				
			||||||
 | 
					          const int in_y_origin =
 | 
				
			||||||
 | 
					              (out_y * stride_height) - params.padding_values.height;
 | 
				
			||||||
 | 
					          // Compute the boundaries of the filter region clamped so as to
 | 
				
			||||||
 | 
					          // ensure that the filter window fits in the input array.
 | 
				
			||||||
 | 
					          const int filter_x_start = std::max(0, -in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_x_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_width, input_width - in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_y_start = std::max(0, -in_y_origin);
 | 
				
			||||||
 | 
					          const int filter_y_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_height, input_height - in_y_origin);
 | 
				
			||||||
 | 
					          int32_t acc = 0;
 | 
				
			||||||
 | 
					          int filter_count = 0;
 | 
				
			||||||
 | 
					          for (int filter_y = filter_y_start; filter_y < filter_y_end;
 | 
				
			||||||
 | 
					               ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = filter_x_start; filter_x < filter_x_end;
 | 
				
			||||||
 | 
					                 ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + filter_x;
 | 
				
			||||||
 | 
					              const int in_y = in_y_origin + filter_y;
 | 
				
			||||||
 | 
					              acc +=
 | 
				
			||||||
 | 
					                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
 | 
				
			||||||
 | 
					              filter_count++;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          // Round to the closest integer value.
 | 
				
			||||||
 | 
					          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
 | 
				
			||||||
 | 
					                        : (acc - filter_count / 2) / filter_count;
 | 
				
			||||||
 | 
					          acc = std::max(acc, params.quantized_activation_min);
 | 
				
			||||||
 | 
					          acc = std::min(acc, params.quantized_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
 | 
				
			||||||
 | 
					              static_cast<int16_t>(acc);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                    const int16_t* input_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                    int16_t* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   std::numeric_limits<int16_t>::min());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_max,
 | 
				
			||||||
 | 
					                   std::numeric_limits<int16_t>::max());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int channel = 0; channel < depth; ++channel) {
 | 
				
			||||||
 | 
					          const int in_x_origin =
 | 
				
			||||||
 | 
					              (out_x * stride_width) - params.padding_values.width;
 | 
				
			||||||
 | 
					          const int in_y_origin =
 | 
				
			||||||
 | 
					              (out_y * stride_height) - params.padding_values.height;
 | 
				
			||||||
 | 
					          // Compute the boundaries of the filter region clamped so as to
 | 
				
			||||||
 | 
					          // ensure that the filter window fits in the input array.
 | 
				
			||||||
 | 
					          const int filter_x_start = std::max(0, -in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_x_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_width, input_width - in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_y_start = std::max(0, -in_y_origin);
 | 
				
			||||||
 | 
					          const int filter_y_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_height, input_height - in_y_origin);
 | 
				
			||||||
 | 
					          int16_t max = std::numeric_limits<int16_t>::lowest();
 | 
				
			||||||
 | 
					          for (int filter_y = filter_y_start; filter_y < filter_y_end;
 | 
				
			||||||
 | 
					               ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = filter_x_start; filter_x < filter_x_end;
 | 
				
			||||||
 | 
					                 ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + filter_x;
 | 
				
			||||||
 | 
					              const int in_y = in_y_origin + filter_y;
 | 
				
			||||||
 | 
					              max = std::max(
 | 
				
			||||||
 | 
					                  max,
 | 
				
			||||||
 | 
					                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          max = std::max<int16_t>(max, params.quantized_activation_min);
 | 
				
			||||||
 | 
					          max = std::min<int16_t>(max, params.quantized_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
 | 
				
			||||||
 | 
					              static_cast<int16_t>(max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,110 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <limits>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "fixedpoint/fixedpoint.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
 | 
				
			||||||
 | 
					                 int32_t input_multiplier, int32_t input_shift,
 | 
				
			||||||
 | 
					                 const RuntimeShape& input_shape, const int8_t* input_data,
 | 
				
			||||||
 | 
					                 const RuntimeShape& output_shape, int8_t* output_data) {
 | 
				
			||||||
 | 
					  // Integer bits must be in sync with Prepare() function.
 | 
				
			||||||
 | 
					  static constexpr int32_t kInputIntegerBits = 4;
 | 
				
			||||||
 | 
					  static constexpr int32_t kOutputScale = 7;
 | 
				
			||||||
 | 
					  static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
 | 
				
			||||||
 | 
					  static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
 | 
				
			||||||
 | 
					  using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; ++i) {
 | 
				
			||||||
 | 
					    const int32_t input =
 | 
				
			||||||
 | 
					        static_cast<int32_t>(input_data[i]) - input_zero_point;
 | 
				
			||||||
 | 
					    if (input <= -input_range_radius) {
 | 
				
			||||||
 | 
					      output_data[i] = kMinInt8;
 | 
				
			||||||
 | 
					    } else if (input >= input_range_radius) {
 | 
				
			||||||
 | 
					      output_data[i] = kMaxInt8;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      const int32_t input_in_q4 =
 | 
				
			||||||
 | 
					          MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
 | 
				
			||||||
 | 
					      const int32_t output_in_q0 =
 | 
				
			||||||
 | 
					          gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Rescale and downcast.
 | 
				
			||||||
 | 
					      using gemmlowp::RoundingDivideByPOT;
 | 
				
			||||||
 | 
					      int32_t output_in_q24 =
 | 
				
			||||||
 | 
					          RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
 | 
				
			||||||
 | 
					      output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
 | 
				
			||||||
 | 
					      output_data[i] = static_cast<int8_t>(output_in_q24);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
 | 
				
			||||||
 | 
					                 const RuntimeShape& input_shape, const int16_t* ptr_input_data,
 | 
				
			||||||
 | 
					                 const RuntimeShape& output_shape, int16_t* ptr_output_data) {
 | 
				
			||||||
 | 
					  // We use the LUT for sigmoid and take into account, that
 | 
				
			||||||
 | 
					  // tanh(x) = 2*sigmoid(2*x) - 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) {
 | 
				
			||||||
 | 
					    int32_t input_data = (*ptr_input_data) * input_data_mul;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (input_left_shift == 1) {
 | 
				
			||||||
 | 
					      input_data <<= 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
 | 
				
			||||||
 | 
					    uint32_t abs_input_data = 3 * abs(input_data);
 | 
				
			||||||
 | 
					    uint32_t uh = abs_input_data >> 8;
 | 
				
			||||||
 | 
					    int32_t result;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (uh >= 255) {
 | 
				
			||||||
 | 
					      // Saturate to maximum.
 | 
				
			||||||
 | 
					      result = 0xFFFF << 8;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      uint32_t ua = sigmoid_table_uint16[uh];
 | 
				
			||||||
 | 
					      uint32_t ub = sigmoid_table_uint16[uh + 1];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      uint8_t ut = abs_input_data & 0xFF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      result = (ua << 8) + ut * (ub - ua);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    result = (input_data >= 0)
 | 
				
			||||||
 | 
					                 ? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
 | 
				
			||||||
 | 
					                 : (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Convert back to 16-bit.
 | 
				
			||||||
 | 
					    result >>= (9 - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    *ptr_output_data = result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,221 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_integer_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Fixed-point per-channel-quantization transpose convolution reference kernel.
 | 
				
			||||||
 | 
					inline void TransposeConv(
 | 
				
			||||||
 | 
					    const ConvParams& params, const int32_t* output_multiplier,
 | 
				
			||||||
 | 
					    const int32_t* output_shift, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const int8_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const int8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const int32_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int8_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
 | 
				
			||||||
 | 
					    int32_t* scratch_buffer) {
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  (void)im2col_data;   // only used in optimized code.
 | 
				
			||||||
 | 
					  (void)im2col_shape;  // only used in optimized code.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
 | 
				
			||||||
 | 
					  if (bias_data) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int32_t input_offset = params.input_offset;
 | 
				
			||||||
 | 
					  const int32_t output_offset = params.output_offset;
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = std::numeric_limits<int8_t>::min();
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = std::numeric_limits<int8_t>::max();
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int num_elements = output_shape.FlatSize();
 | 
				
			||||||
 | 
					  // We need to initialize scratch_buffer to all 0s, as we apply the same
 | 
				
			||||||
 | 
					  // 'scatter' based trick as in float version.
 | 
				
			||||||
 | 
					  memset(scratch_buffer, 0, num_elements * sizeof(int32_t));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Loop through input elements one at a time.
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int in_y = 0; in_y < input_height; ++in_y) {
 | 
				
			||||||
 | 
					      for (int in_x = 0; in_x < input_width; ++in_x) {
 | 
				
			||||||
 | 
					        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					          // Loop through the output elements it will influence.
 | 
				
			||||||
 | 
					          const int out_x_origin = (in_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					          const int out_y_origin = (in_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					              for (int out_channel = 0; out_channel < output_depth;
 | 
				
			||||||
 | 
					                   ++out_channel) {
 | 
				
			||||||
 | 
					                // Compute output element location.
 | 
				
			||||||
 | 
					                const int out_x = out_x_origin + filter_x;
 | 
				
			||||||
 | 
					                const int out_y = out_y_origin + filter_y;
 | 
				
			||||||
 | 
					                // We cannot accumulate out of bounds.
 | 
				
			||||||
 | 
					                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
 | 
				
			||||||
 | 
					                    (out_y < output_height)) {
 | 
				
			||||||
 | 
					                  const int8_t input_value = input_data[Offset(
 | 
				
			||||||
 | 
					                      input_shape, batch, in_y, in_x, in_channel)];
 | 
				
			||||||
 | 
					                  const int8_t filter_value =
 | 
				
			||||||
 | 
					                      filter_data[Offset(filter_shape, out_channel, filter_y,
 | 
				
			||||||
 | 
					                                         filter_x, in_channel)];
 | 
				
			||||||
 | 
					                  scratch_buffer[Offset(output_shape, batch, out_y, out_x,
 | 
				
			||||||
 | 
					                                        out_channel)] +=
 | 
				
			||||||
 | 
					                      (input_value + input_offset) * filter_value;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
 | 
				
			||||||
 | 
					          int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
 | 
				
			||||||
 | 
					                                              out_channel)];
 | 
				
			||||||
 | 
					          if (bias_data) {
 | 
				
			||||||
 | 
					            acc += bias_data[out_channel];
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          acc = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					              acc, output_multiplier[out_channel], output_shift[out_channel]);
 | 
				
			||||||
 | 
					          acc += output_offset;
 | 
				
			||||||
 | 
					          acc = std::max(acc, output_activation_min);
 | 
				
			||||||
 | 
					          acc = std::min(acc, output_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
 | 
				
			||||||
 | 
					              static_cast<int8_t>(acc);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// int16_t input (zero_point=0), int8_t filter, int64 accumulator
 | 
				
			||||||
 | 
					inline void TransposeConv(
 | 
				
			||||||
 | 
					    const ConvParams& params, const int32_t* output_multiplier,
 | 
				
			||||||
 | 
					    const int32_t* output_shift, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const int16_t* input_data, const RuntimeShape& filter_shape,
 | 
				
			||||||
 | 
					    const int8_t* filter_data, const RuntimeShape& bias_shape,
 | 
				
			||||||
 | 
					    const std::int64_t* bias_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					    int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
 | 
				
			||||||
 | 
					    std::int64_t* scratch_buffer) {
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int pad_width = params.padding_values.width;
 | 
				
			||||||
 | 
					  const int pad_height = params.padding_values.height;
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  (void)im2col_data;   // only used in optimized code.
 | 
				
			||||||
 | 
					  (void)im2col_shape;  // only used in optimized code.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
 | 
				
			||||||
 | 
					  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
 | 
				
			||||||
 | 
					  if (bias_data) {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int filter_height = filter_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int filter_width = filter_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int32_t output_activation_min = std::numeric_limits<int16_t>::min();
 | 
				
			||||||
 | 
					  const int32_t output_activation_max = std::numeric_limits<int16_t>::max();
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int num_elements = output_shape.FlatSize();
 | 
				
			||||||
 | 
					  // We need to initialize scratch_buffer to all 0s, as we apply the same
 | 
				
			||||||
 | 
					  // 'scatter' based trick as in float version.
 | 
				
			||||||
 | 
					  memset(scratch_buffer, 0, num_elements * sizeof(std::int64_t));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Loop through input elements one at a time.
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int in_y = 0; in_y < input_height; ++in_y) {
 | 
				
			||||||
 | 
					      for (int in_x = 0; in_x < input_width; ++in_x) {
 | 
				
			||||||
 | 
					        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
 | 
				
			||||||
 | 
					          // Loop through the output elements it will influence.
 | 
				
			||||||
 | 
					          const int out_x_origin = (in_x * stride_width) - pad_width;
 | 
				
			||||||
 | 
					          const int out_y_origin = (in_y * stride_height) - pad_height;
 | 
				
			||||||
 | 
					          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
 | 
				
			||||||
 | 
					              for (int out_channel = 0; out_channel < output_depth;
 | 
				
			||||||
 | 
					                   ++out_channel) {
 | 
				
			||||||
 | 
					                // Compute output element location.
 | 
				
			||||||
 | 
					                const int out_x = out_x_origin + filter_x;
 | 
				
			||||||
 | 
					                const int out_y = out_y_origin + filter_y;
 | 
				
			||||||
 | 
					                // We cannot accumulate out of bounds.
 | 
				
			||||||
 | 
					                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
 | 
				
			||||||
 | 
					                    (out_y < output_height)) {
 | 
				
			||||||
 | 
					                  const int32_t input_value = input_data[Offset(
 | 
				
			||||||
 | 
					                      input_shape, batch, in_y, in_x, in_channel)];
 | 
				
			||||||
 | 
					                  const int32_t filter_value =
 | 
				
			||||||
 | 
					                      filter_data[Offset(filter_shape, out_channel, filter_y,
 | 
				
			||||||
 | 
					                                         filter_x, in_channel)];
 | 
				
			||||||
 | 
					                  scratch_buffer[Offset(output_shape, batch, out_y, out_x,
 | 
				
			||||||
 | 
					                                        out_channel)] +=
 | 
				
			||||||
 | 
					                      input_value * filter_value;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
 | 
				
			||||||
 | 
					          std::int64_t acc = scratch_buffer[Offset(output_shape, batch, out_y,
 | 
				
			||||||
 | 
					                                                   out_x, out_channel)];
 | 
				
			||||||
 | 
					          if (bias_data) {
 | 
				
			||||||
 | 
					            acc += bias_data[out_channel];
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          int32_t scaled_acc = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					              acc, output_multiplier[out_channel], output_shift[out_channel]);
 | 
				
			||||||
 | 
					          scaled_acc = std::max(scaled_acc, output_activation_min);
 | 
				
			||||||
 | 
					          scaled_acc = std::min(scaled_acc, output_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
 | 
				
			||||||
 | 
					              static_cast<int16_t>(scaled_acc);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_integer_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,90 @@
 | 
				
			||||||
 | 
					/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <algorithm>
 | 
				
			||||||
 | 
					#include <cmath>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/c/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
 | 
				
			||||||
 | 
					                            const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                            const float* input_data,
 | 
				
			||||||
 | 
					                            const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                            float* output_data, float epsilon = 1e-6) {
 | 
				
			||||||
 | 
					  const int trailing_dim = input_shape.DimensionsCount() - 1;
 | 
				
			||||||
 | 
					  const int outer_size =
 | 
				
			||||||
 | 
					      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
 | 
				
			||||||
 | 
					  const int depth =
 | 
				
			||||||
 | 
					      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
 | 
				
			||||||
 | 
					  for (int i = 0; i < outer_size; ++i) {
 | 
				
			||||||
 | 
					    float squared_l2_norm = 0;
 | 
				
			||||||
 | 
					    for (int c = 0; c < depth; ++c) {
 | 
				
			||||||
 | 
					      const float val = input_data[depth * i + c];
 | 
				
			||||||
 | 
					      squared_l2_norm += val * val;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    float l2_norm = std::sqrt(squared_l2_norm);
 | 
				
			||||||
 | 
					    l2_norm = std::max(l2_norm, epsilon);
 | 
				
			||||||
 | 
					    for (int c = 0; c < depth; ++c) {
 | 
				
			||||||
 | 
					      output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
 | 
				
			||||||
 | 
					                            const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                            const uint8_t* input_data,
 | 
				
			||||||
 | 
					                            const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                            uint8_t* output_data) {
 | 
				
			||||||
 | 
					  const int trailing_dim = input_shape.DimensionsCount() - 1;
 | 
				
			||||||
 | 
					  const int depth =
 | 
				
			||||||
 | 
					      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
 | 
				
			||||||
 | 
					  const int outer_size =
 | 
				
			||||||
 | 
					      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
 | 
				
			||||||
 | 
					  const int32_t input_zero_point = op_params.input_zero_point;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < outer_size; ++i) {
 | 
				
			||||||
 | 
					    int32_t square_l2_norm = 0;
 | 
				
			||||||
 | 
					    for (int c = 0; c < depth; c++) {
 | 
				
			||||||
 | 
					      int32_t diff = input_data[depth * i + c] - input_zero_point;
 | 
				
			||||||
 | 
					      square_l2_norm += diff * diff;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    int32_t inv_l2norm_multiplier;
 | 
				
			||||||
 | 
					    int inv_l2norm_shift;
 | 
				
			||||||
 | 
					    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
 | 
				
			||||||
 | 
					                                     &inv_l2norm_multiplier, &inv_l2norm_shift);
 | 
				
			||||||
 | 
					    for (int c = 0; c < depth; c++) {
 | 
				
			||||||
 | 
					      int32_t diff = input_data[depth * i + c] - input_zero_point;
 | 
				
			||||||
 | 
					      int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
 | 
				
			||||||
 | 
					          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
 | 
				
			||||||
 | 
					      int32_t unclamped_output_val = 128 + rescaled_diff;
 | 
				
			||||||
 | 
					      int32_t output_val =
 | 
				
			||||||
 | 
					          std::min(static_cast<int32_t>(255),
 | 
				
			||||||
 | 
					                   std::max(static_cast<int32_t>(0), unclamped_output_val));
 | 
				
			||||||
 | 
					      output_data[depth * i + c] = static_cast<uint8_t>(output_val);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,132 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cmath>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "fixedpoint/fixedpoint.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/cppmath.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/quantization_util.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/op_macros.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
 | 
				
			||||||
 | 
					                     const RuntimeShape& output_shape, float* output_data) {
 | 
				
			||||||
 | 
					  const float cutoff_upper = 16.619047164916992188f;
 | 
				
			||||||
 | 
					  const float cutoff_lower = -9.f;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Rational for using approximation in reference kernel.
 | 
				
			||||||
 | 
					  // 0. This approximation gives enough precision for float.
 | 
				
			||||||
 | 
					  // 1. This works around an issue on an embedded chipset where exp() does not
 | 
				
			||||||
 | 
					  // return correctly as expected - exp(x) should return inf when overflown
 | 
				
			||||||
 | 
					  // not 1.701417   IEEE 754 defines representation for inf.
 | 
				
			||||||
 | 
					  // 2. This will speed up calculation and is matching the behavior in the
 | 
				
			||||||
 | 
					  // optimized kernels. (check the definition of scalar_logistic_op<float>)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; i++) {
 | 
				
			||||||
 | 
					    float val = input_data[i];
 | 
				
			||||||
 | 
					    float result;
 | 
				
			||||||
 | 
					    if (val > cutoff_upper) {
 | 
				
			||||||
 | 
					      result = 1.0f;
 | 
				
			||||||
 | 
					    } else if (val < cutoff_lower) {
 | 
				
			||||||
 | 
					      result = std::exp(val);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      result = 1.f / (1.f + std::exp(-val));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    output_data[i] = result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Convenience version that allows, for example, generated-code calls to be
 | 
				
			||||||
 | 
					// uniform between data types.
 | 
				
			||||||
 | 
					inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                     const float* input_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                     float* output_data) {
 | 
				
			||||||
 | 
					  // Drop params: not needed.
 | 
				
			||||||
 | 
					  Logistic(input_shape, input_data, output_shape, output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Logistic(const LogisticParams& params,
 | 
				
			||||||
 | 
					                     const RuntimeShape& input_shape, const int16_t* input_data,
 | 
				
			||||||
 | 
					                     const RuntimeShape& output_shape, int16_t* output_data) {
 | 
				
			||||||
 | 
					  const int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; i++) {
 | 
				
			||||||
 | 
					    // F0 uses 0 integer bits, range [-1, 1].
 | 
				
			||||||
 | 
					    // This is the return type of math functions such as tanh, logistic,
 | 
				
			||||||
 | 
					    // whose range is in [-1, 1].
 | 
				
			||||||
 | 
					    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
 | 
				
			||||||
 | 
					    // F3 uses 3 integer bits, range [-8, 8], the input range expected here.
 | 
				
			||||||
 | 
					    using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const F3 input = F3::FromRaw(input_data[i]);
 | 
				
			||||||
 | 
					    F0 output = gemmlowp::logistic(input);
 | 
				
			||||||
 | 
					    output_data[i] = output.raw();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Quantized int8_t logistic activation.  Cheats by dequantizing and
 | 
				
			||||||
 | 
					// requantizing around the floating point logistic method.  This implementation
 | 
				
			||||||
 | 
					// is slow on platforms without a floating point unit.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
 | 
				
			||||||
 | 
					// approach used in TFLite for int8_t Logistic.
 | 
				
			||||||
 | 
					inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
 | 
				
			||||||
 | 
					                     float input_scale, int input_zero_point,
 | 
				
			||||||
 | 
					                     const RuntimeShape& output_shape, int8_t* output_data,
 | 
				
			||||||
 | 
					                     float output_scale, int output_zero_point) {
 | 
				
			||||||
 | 
					  const float cutoff_upper = 16.619047164916992188f;
 | 
				
			||||||
 | 
					  const float cutoff_lower = -9.f;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Rational for using approximation in reference kernel.
 | 
				
			||||||
 | 
					  // 0. This approximation gives enough precision for float.
 | 
				
			||||||
 | 
					  // 1. This works around an issue on an embedded chipset where exp() does not
 | 
				
			||||||
 | 
					  // return correctly as expected - exp(x) should return inf when overflown
 | 
				
			||||||
 | 
					  // not 1.701417   IEEE 754 defines representation for inf.
 | 
				
			||||||
 | 
					  // 2. This will speed up calculation and is matching the behavior in the
 | 
				
			||||||
 | 
					  // optimized kernels. (check the definition of scalar_logistic_op<float>)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; i++) {
 | 
				
			||||||
 | 
					    // Dequantize.
 | 
				
			||||||
 | 
					    float val =
 | 
				
			||||||
 | 
					        static_cast<float>((input_data[i] - input_zero_point) * input_scale);
 | 
				
			||||||
 | 
					    float result;
 | 
				
			||||||
 | 
					    if (val > cutoff_upper) {
 | 
				
			||||||
 | 
					      result = 1.0f;
 | 
				
			||||||
 | 
					    } else if (val < cutoff_lower) {
 | 
				
			||||||
 | 
					      result = std::exp(val);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      result = 1.f / (1.f + std::exp(-val));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // Requantize
 | 
				
			||||||
 | 
					    int8_t output =
 | 
				
			||||||
 | 
					        static_cast<int8_t>(result / output_scale + output_zero_point);
 | 
				
			||||||
 | 
					    output_data[i] = output;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,64 @@
 | 
				
			||||||
 | 
					/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T, typename Op, int N = 5>
 | 
				
			||||||
 | 
					void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape,
 | 
				
			||||||
 | 
					                                 const T* input1_data,
 | 
				
			||||||
 | 
					                                 const RuntimeShape& unextended_input2_shape,
 | 
				
			||||||
 | 
					                                 const T* input2_data,
 | 
				
			||||||
 | 
					                                 const RuntimeShape& unextended_output_shape,
 | 
				
			||||||
 | 
					                                 T* output_data, Op op) {
 | 
				
			||||||
 | 
					  // Uses element-wise calculation if broadcast is not required.
 | 
				
			||||||
 | 
					  if (unextended_input1_shape == unextended_input2_shape) {
 | 
				
			||||||
 | 
					    const int flat_size =
 | 
				
			||||||
 | 
					        MatchingElementsSize(unextended_input1_shape, unextended_input2_shape,
 | 
				
			||||||
 | 
					                             unextended_output_shape);
 | 
				
			||||||
 | 
					    for (int i = 0; i < flat_size; ++i) {
 | 
				
			||||||
 | 
					      output_data[i] = op(input1_data[i], input2_data[i]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
 | 
				
			||||||
 | 
					    TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    NdArrayDesc<N> desc1;
 | 
				
			||||||
 | 
					    NdArrayDesc<N> desc2;
 | 
				
			||||||
 | 
					    NdArrayDesc<N> output_desc;
 | 
				
			||||||
 | 
					    NdArrayDescsForElementwiseBroadcast(
 | 
				
			||||||
 | 
					        unextended_input1_shape, unextended_input2_shape, &desc1, &desc2);
 | 
				
			||||||
 | 
					    CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
 | 
				
			||||||
 | 
					                   &output_desc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto maxmin_func = [&](int indexes[N]) {
 | 
				
			||||||
 | 
					      output_data[SubscriptToIndex(output_desc, indexes)] =
 | 
				
			||||||
 | 
					          op(input1_data[SubscriptToIndex(desc1, indexes)],
 | 
				
			||||||
 | 
					             input2_data[SubscriptToIndex(desc2, indexes)]);
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    NDOpsHelper<N>(output_desc, maxmin_func);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,166 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Element-wise mul that can often be used for inner loop of broadcast Mul as
 | 
				
			||||||
 | 
					// well as the non-broadcast Mul.
 | 
				
			||||||
 | 
					inline void MulElementwise(int size, const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                           const uint8_t* input1_data,
 | 
				
			||||||
 | 
					                           const uint8_t* input2_data, uint8_t* output_data) {
 | 
				
			||||||
 | 
					  for (int i = 0; i < size; ++i) {
 | 
				
			||||||
 | 
					    const int32_t input1_val = params.input1_offset + input1_data[i];
 | 
				
			||||||
 | 
					    const int32_t input2_val = params.input2_offset + input2_data[i];
 | 
				
			||||||
 | 
					    const int32_t unclamped_result =
 | 
				
			||||||
 | 
					        params.output_offset +
 | 
				
			||||||
 | 
					        MultiplyByQuantizedMultiplier(input1_val * input2_val,
 | 
				
			||||||
 | 
					                                      params.output_multiplier,
 | 
				
			||||||
 | 
					                                      params.output_shift);
 | 
				
			||||||
 | 
					    const int32_t clamped_output =
 | 
				
			||||||
 | 
					        std::min(params.quantized_activation_max,
 | 
				
			||||||
 | 
					                 std::max(params.quantized_activation_min, unclamped_result));
 | 
				
			||||||
 | 
					    output_data[i] = static_cast<uint8_t>(clamped_output);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void Mul(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input1_shape, const T* input1_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& input2_shape, const T* input2_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& output_shape, T* output_data) {
 | 
				
			||||||
 | 
					  T output_activation_min;
 | 
				
			||||||
 | 
					  T output_activation_max;
 | 
				
			||||||
 | 
					  GetActivationParams(params, &output_activation_min, &output_activation_max);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingFlatSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; ++i) {
 | 
				
			||||||
 | 
					    output_data[i] = ActivationFunctionWithMinMax(
 | 
				
			||||||
 | 
					        input1_data[i] * input2_data[i], output_activation_min,
 | 
				
			||||||
 | 
					        output_activation_max);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void Mul(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input1_shape, const uint8_t* input1_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& input2_shape, const uint8_t* input2_data,
 | 
				
			||||||
 | 
					                const RuntimeShape& output_shape, uint8_t* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingFlatSize(input1_shape, input2_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void BroadcastMul4DSlow(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                               const RuntimeShape& input1_shape,
 | 
				
			||||||
 | 
					                               const uint8_t* input1_data,
 | 
				
			||||||
 | 
					                               const RuntimeShape& input2_shape,
 | 
				
			||||||
 | 
					                               const uint8_t* input2_data,
 | 
				
			||||||
 | 
					                               const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                               uint8_t* output_data) {
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
 | 
				
			||||||
 | 
					                                      &desc2);
 | 
				
			||||||
 | 
					  const RuntimeShape extended_output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          const int32_t input1_val =
 | 
				
			||||||
 | 
					              params.input1_offset +
 | 
				
			||||||
 | 
					              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
 | 
				
			||||||
 | 
					          const int32_t input2_val =
 | 
				
			||||||
 | 
					              params.input2_offset +
 | 
				
			||||||
 | 
					              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
 | 
				
			||||||
 | 
					          const int32_t unclamped_result =
 | 
				
			||||||
 | 
					              params.output_offset +
 | 
				
			||||||
 | 
					              MultiplyByQuantizedMultiplier(input1_val * input2_val,
 | 
				
			||||||
 | 
					                                            params.output_multiplier,
 | 
				
			||||||
 | 
					                                            params.output_shift);
 | 
				
			||||||
 | 
					          const int32_t clamped_output = std::min(
 | 
				
			||||||
 | 
					              params.quantized_activation_max,
 | 
				
			||||||
 | 
					              std::max(params.quantized_activation_min, unclamped_result));
 | 
				
			||||||
 | 
					          output_data[Offset(extended_output_shape, b, y, x, c)] =
 | 
				
			||||||
 | 
					              static_cast<uint8_t>(clamped_output);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					void BroadcastMul4DSlow(const ArithmeticParams& params,
 | 
				
			||||||
 | 
					                        const RuntimeShape& unextended_input1_shape,
 | 
				
			||||||
 | 
					                        const T* input1_data,
 | 
				
			||||||
 | 
					                        const RuntimeShape& unextended_input2_shape,
 | 
				
			||||||
 | 
					                        const T* input2_data,
 | 
				
			||||||
 | 
					                        const RuntimeShape& unextended_output_shape,
 | 
				
			||||||
 | 
					                        T* output_data) {
 | 
				
			||||||
 | 
					  T output_activation_min;
 | 
				
			||||||
 | 
					  T output_activation_max;
 | 
				
			||||||
 | 
					  GetActivationParams(params, &output_activation_min, &output_activation_max);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const RuntimeShape output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, unextended_output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
 | 
				
			||||||
 | 
					                                      unextended_input2_shape, &desc1, &desc2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // In Tensorflow, the dimensions are canonically named (batch_number, row,
 | 
				
			||||||
 | 
					  // col, channel), with extents (batches, height, width, depth), with the
 | 
				
			||||||
 | 
					  // trailing dimension changing most rapidly (channels has the smallest stride,
 | 
				
			||||||
 | 
					  // typically 1 element).
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // In generated C code, we store arrays with the dimensions reversed. The
 | 
				
			||||||
 | 
					  // first dimension has smallest stride.
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  // We name our variables by their Tensorflow convention, but generate C code
 | 
				
			||||||
 | 
					  // nesting loops such that the innermost loop has the smallest stride for the
 | 
				
			||||||
 | 
					  // best cache behavior.
 | 
				
			||||||
 | 
					  for (int b = 0; b < output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, b, y, x, c)] =
 | 
				
			||||||
 | 
					              ActivationFunctionWithMinMax(
 | 
				
			||||||
 | 
					                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] *
 | 
				
			||||||
 | 
					                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
 | 
				
			||||||
 | 
					                  output_activation_min, output_activation_max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,37 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void Negate(const RuntimeShape& input_shape, const T* input_data,
 | 
				
			||||||
 | 
					                   const RuntimeShape& output_shape, T* output_data) {
 | 
				
			||||||
 | 
					  const int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; ++i) {
 | 
				
			||||||
 | 
					    output_data[i] = -input_data[i];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,162 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <vector>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TFLite Pad supports activation tensors with up to 4 dimensions.
 | 
				
			||||||
 | 
					constexpr int PadKernelMaxDimensionCount() { return 4; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// There are two versions of pad: Pad and PadV2.  In PadV2 there is a second
 | 
				
			||||||
 | 
					// scalar input that provides the padding value.  Therefore pad_value_ptr can be
 | 
				
			||||||
 | 
					// equivalent to a simple input1_data.  For Pad, it should point to a zero
 | 
				
			||||||
 | 
					// value.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Note that two typenames are required, so that T=P=int32_t is considered a
 | 
				
			||||||
 | 
					// specialization distinct from P=int32_t.
 | 
				
			||||||
 | 
					template <typename T, typename P>
 | 
				
			||||||
 | 
					inline void PadImpl(const tflite::PadParams& op_params,
 | 
				
			||||||
 | 
					                    const RuntimeShape& input_shape, const T* input_data,
 | 
				
			||||||
 | 
					                    const P* pad_value_ptr, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                    T* output_data) {
 | 
				
			||||||
 | 
					  const RuntimeShape ext_input_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
 | 
				
			||||||
 | 
					  const RuntimeShape ext_output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
 | 
				
			||||||
 | 
					  // pad them to 4 dims (yes, we are "padding the padding").
 | 
				
			||||||
 | 
					  int left_padding_copy[PadKernelMaxDimensionCount()];
 | 
				
			||||||
 | 
					  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
 | 
				
			||||||
 | 
					    left_padding_copy[i] = 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  for (int i = 0; i < op_params.left_padding_count; ++i) {
 | 
				
			||||||
 | 
					    left_padding_copy[i + PadKernelMaxDimensionCount() -
 | 
				
			||||||
 | 
					                      op_params.left_padding_count] = op_params.left_padding[i];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  int right_padding_copy[PadKernelMaxDimensionCount()];
 | 
				
			||||||
 | 
					  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
 | 
				
			||||||
 | 
					    right_padding_copy[i] = 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  for (int i = 0; i < op_params.right_padding_count; ++i) {
 | 
				
			||||||
 | 
					    right_padding_copy[i + PadKernelMaxDimensionCount() -
 | 
				
			||||||
 | 
					                       op_params.right_padding_count] =
 | 
				
			||||||
 | 
					        op_params.right_padding[i];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int output_batch = ext_output_shape.Dims(0);
 | 
				
			||||||
 | 
					  const int output_height = ext_output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = ext_output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_depth = ext_output_shape.Dims(3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int left_b_padding = left_padding_copy[0];
 | 
				
			||||||
 | 
					  const int left_h_padding = left_padding_copy[1];
 | 
				
			||||||
 | 
					  const int left_w_padding = left_padding_copy[2];
 | 
				
			||||||
 | 
					  const int left_d_padding = left_padding_copy[3];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int right_b_padding = right_padding_copy[0];
 | 
				
			||||||
 | 
					  const int right_h_padding = right_padding_copy[1];
 | 
				
			||||||
 | 
					  const int right_w_padding = right_padding_copy[2];
 | 
				
			||||||
 | 
					  const int right_d_padding = right_padding_copy[3];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const T pad_value = *pad_value_ptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const T* in_ptr = input_data;
 | 
				
			||||||
 | 
					  T* out_ptr = output_data;
 | 
				
			||||||
 | 
					  for (int out_b = 0; out_b < output_batch; ++out_b) {
 | 
				
			||||||
 | 
					    for (int out_h = 0; out_h < output_height; ++out_h) {
 | 
				
			||||||
 | 
					      for (int out_w = 0; out_w < output_width; ++out_w) {
 | 
				
			||||||
 | 
					        for (int out_d = 0; out_d < output_depth; ++out_d) {
 | 
				
			||||||
 | 
					          if (out_b < left_b_padding ||
 | 
				
			||||||
 | 
					              out_b >= output_batch - right_b_padding ||
 | 
				
			||||||
 | 
					              out_h < left_h_padding ||
 | 
				
			||||||
 | 
					              out_h >= output_height - right_h_padding ||
 | 
				
			||||||
 | 
					              out_w < left_w_padding ||
 | 
				
			||||||
 | 
					              out_w >= output_width - right_w_padding ||
 | 
				
			||||||
 | 
					              out_d < left_d_padding ||
 | 
				
			||||||
 | 
					              out_d >= output_depth - right_d_padding) {
 | 
				
			||||||
 | 
					            *out_ptr++ = pad_value;
 | 
				
			||||||
 | 
					          } else {
 | 
				
			||||||
 | 
					            *out_ptr++ = *in_ptr++;
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T, typename P>
 | 
				
			||||||
 | 
					inline void Pad(const tflite::PadParams& op_params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input_shape, const T* input_data,
 | 
				
			||||||
 | 
					                const P* pad_value_ptr, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                T* output_data) {
 | 
				
			||||||
 | 
					  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
 | 
				
			||||||
 | 
					          output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void Pad(const tflite::PadParams& op_params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input_shape, const T* input_data,
 | 
				
			||||||
 | 
					                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                T* output_data) {
 | 
				
			||||||
 | 
					  const T converted_pad_value = static_cast<T>(*pad_value_ptr);
 | 
				
			||||||
 | 
					  PadImpl(op_params, input_shape, input_data, &converted_pad_value,
 | 
				
			||||||
 | 
					          output_shape, output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This version avoids conflicting template matching.
 | 
				
			||||||
 | 
					template <>
 | 
				
			||||||
 | 
					inline void Pad(const tflite::PadParams& op_params,
 | 
				
			||||||
 | 
					                const RuntimeShape& input_shape, const int32_t* input_data,
 | 
				
			||||||
 | 
					                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                int32_t* output_data) {
 | 
				
			||||||
 | 
					  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
 | 
				
			||||||
 | 
					          output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T, typename P>
 | 
				
			||||||
 | 
					inline void PadImageStyle(const tflite::PadParams& op_params,
 | 
				
			||||||
 | 
					                          const RuntimeShape& input_shape, const T* input_data,
 | 
				
			||||||
 | 
					                          const P* pad_value_ptr,
 | 
				
			||||||
 | 
					                          const RuntimeShape& output_shape, T* output_data) {
 | 
				
			||||||
 | 
					  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
 | 
				
			||||||
 | 
					      output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename P>
 | 
				
			||||||
 | 
					inline void PadImageStyle(const tflite::PadParams& op_params,
 | 
				
			||||||
 | 
					                          const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                          const float* input_data, const P* pad_value_ptr,
 | 
				
			||||||
 | 
					                          const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                          float* output_data) {
 | 
				
			||||||
 | 
					  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
 | 
				
			||||||
 | 
					      output_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,297 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/cppmath.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/quantization_util.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void AveragePool(const PoolParams& params,
 | 
				
			||||||
 | 
					                        const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                        const float* input_data,
 | 
				
			||||||
 | 
					                        const RuntimeShape& output_shape, float* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int channel = 0; channel < depth; ++channel) {
 | 
				
			||||||
 | 
					          const int in_x_origin =
 | 
				
			||||||
 | 
					              (out_x * stride_width) - params.padding_values.width;
 | 
				
			||||||
 | 
					          const int in_y_origin =
 | 
				
			||||||
 | 
					              (out_y * stride_height) - params.padding_values.height;
 | 
				
			||||||
 | 
					          // Compute the boundaries of the filter region clamped so as to
 | 
				
			||||||
 | 
					          // ensure that the filter window fits in the input array.
 | 
				
			||||||
 | 
					          const int filter_x_start = std::max(0, -in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_x_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_width, input_width - in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_y_start = std::max(0, -in_y_origin);
 | 
				
			||||||
 | 
					          const int filter_y_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_height, input_height - in_y_origin);
 | 
				
			||||||
 | 
					          float total = 0.f;
 | 
				
			||||||
 | 
					          float filter_count = 0;
 | 
				
			||||||
 | 
					          for (int filter_y = filter_y_start; filter_y < filter_y_end;
 | 
				
			||||||
 | 
					               ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = filter_x_start; filter_x < filter_x_end;
 | 
				
			||||||
 | 
					                 ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + filter_x;
 | 
				
			||||||
 | 
					              const int in_y = in_y_origin + filter_y;
 | 
				
			||||||
 | 
					              total +=
 | 
				
			||||||
 | 
					                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
 | 
				
			||||||
 | 
					              filter_count++;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          const float average = total / filter_count;
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
 | 
				
			||||||
 | 
					              ActivationFunctionWithMinMax(average, params.float_activation_min,
 | 
				
			||||||
 | 
					                                           params.float_activation_max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void AveragePool(const PoolParams& params,
 | 
				
			||||||
 | 
					                        const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                        const uint8_t* input_data,
 | 
				
			||||||
 | 
					                        const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                        uint8_t* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int channel = 0; channel < depth; ++channel) {
 | 
				
			||||||
 | 
					          const int in_x_origin =
 | 
				
			||||||
 | 
					              (out_x * stride_width) - params.padding_values.width;
 | 
				
			||||||
 | 
					          const int in_y_origin =
 | 
				
			||||||
 | 
					              (out_y * stride_height) - params.padding_values.height;
 | 
				
			||||||
 | 
					          // Compute the boundaries of the filter region clamped so as to
 | 
				
			||||||
 | 
					          // ensure that the filter window fits in the input array.
 | 
				
			||||||
 | 
					          const int filter_x_start = std::max(0, -in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_x_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_width, input_width - in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_y_start = std::max(0, -in_y_origin);
 | 
				
			||||||
 | 
					          const int filter_y_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_height, input_height - in_y_origin);
 | 
				
			||||||
 | 
					          int32_t acc = 0;
 | 
				
			||||||
 | 
					          int filter_count = 0;
 | 
				
			||||||
 | 
					          for (int filter_y = filter_y_start; filter_y < filter_y_end;
 | 
				
			||||||
 | 
					               ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = filter_x_start; filter_x < filter_x_end;
 | 
				
			||||||
 | 
					                 ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + filter_x;
 | 
				
			||||||
 | 
					              const int in_y = in_y_origin + filter_y;
 | 
				
			||||||
 | 
					              acc +=
 | 
				
			||||||
 | 
					                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
 | 
				
			||||||
 | 
					              filter_count++;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          acc = (acc + filter_count / 2) / filter_count;
 | 
				
			||||||
 | 
					          acc = std::max(acc, params.quantized_activation_min);
 | 
				
			||||||
 | 
					          acc = std::min(acc, params.quantized_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
 | 
				
			||||||
 | 
					              static_cast<uint8_t>(acc);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                   const float* input_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                   float* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int channel = 0; channel < depth; ++channel) {
 | 
				
			||||||
 | 
					          const int in_x_origin =
 | 
				
			||||||
 | 
					              (out_x * stride_width) - params.padding_values.width;
 | 
				
			||||||
 | 
					          const int in_y_origin =
 | 
				
			||||||
 | 
					              (out_y * stride_height) - params.padding_values.height;
 | 
				
			||||||
 | 
					          // Compute the boundaries of the filter region clamped so as to
 | 
				
			||||||
 | 
					          // ensure that the filter window fits in the input array.
 | 
				
			||||||
 | 
					          const int filter_x_start = std::max(0, -in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_x_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_width, input_width - in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_y_start = std::max(0, -in_y_origin);
 | 
				
			||||||
 | 
					          const int filter_y_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_height, input_height - in_y_origin);
 | 
				
			||||||
 | 
					          float sum_squares = 0.f;
 | 
				
			||||||
 | 
					          int filter_count = 0;
 | 
				
			||||||
 | 
					          for (int filter_y = filter_y_start; filter_y < filter_y_end;
 | 
				
			||||||
 | 
					               ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = filter_x_start; filter_x < filter_x_end;
 | 
				
			||||||
 | 
					                 ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + filter_x;
 | 
				
			||||||
 | 
					              const int in_y = in_y_origin + filter_y;
 | 
				
			||||||
 | 
					              const float val =
 | 
				
			||||||
 | 
					                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
 | 
				
			||||||
 | 
					              sum_squares += val * val;
 | 
				
			||||||
 | 
					              filter_count++;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          const float l2pool_result = std::sqrt(sum_squares / filter_count);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
 | 
				
			||||||
 | 
					              ActivationFunctionWithMinMax(l2pool_result,
 | 
				
			||||||
 | 
					                                           params.float_activation_min,
 | 
				
			||||||
 | 
					                                           params.float_activation_max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                    const float* input_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                    float* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int channel = 0; channel < depth; ++channel) {
 | 
				
			||||||
 | 
					          const int in_x_origin =
 | 
				
			||||||
 | 
					              (out_x * stride_width) - params.padding_values.width;
 | 
				
			||||||
 | 
					          const int in_y_origin =
 | 
				
			||||||
 | 
					              (out_y * stride_height) - params.padding_values.height;
 | 
				
			||||||
 | 
					          // Compute the boundaries of the filter region clamped so as to
 | 
				
			||||||
 | 
					          // ensure that the filter window fits in the input array.
 | 
				
			||||||
 | 
					          const int filter_x_start = std::max(0, -in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_x_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_width, input_width - in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_y_start = std::max(0, -in_y_origin);
 | 
				
			||||||
 | 
					          const int filter_y_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_height, input_height - in_y_origin);
 | 
				
			||||||
 | 
					          float max = std::numeric_limits<float>::lowest();
 | 
				
			||||||
 | 
					          for (int filter_y = filter_y_start; filter_y < filter_y_end;
 | 
				
			||||||
 | 
					               ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = filter_x_start; filter_x < filter_x_end;
 | 
				
			||||||
 | 
					                 ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + filter_x;
 | 
				
			||||||
 | 
					              const int in_y = in_y_origin + filter_y;
 | 
				
			||||||
 | 
					              max = std::max(
 | 
				
			||||||
 | 
					                  max,
 | 
				
			||||||
 | 
					                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
 | 
				
			||||||
 | 
					              ActivationFunctionWithMinMax(max, params.float_activation_min,
 | 
				
			||||||
 | 
					                                           params.float_activation_max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                    const uint8_t* input_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                    uint8_t* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_min,
 | 
				
			||||||
 | 
					                   params.quantized_activation_max);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(params.quantized_activation_max, 255);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
 | 
				
			||||||
 | 
					  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
 | 
				
			||||||
 | 
					  const int input_height = input_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int input_width = input_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int output_height = output_shape.Dims(1);
 | 
				
			||||||
 | 
					  const int output_width = output_shape.Dims(2);
 | 
				
			||||||
 | 
					  const int stride_height = params.stride_height;
 | 
				
			||||||
 | 
					  const int stride_width = params.stride_width;
 | 
				
			||||||
 | 
					  for (int batch = 0; batch < batches; ++batch) {
 | 
				
			||||||
 | 
					    for (int out_y = 0; out_y < output_height; ++out_y) {
 | 
				
			||||||
 | 
					      for (int out_x = 0; out_x < output_width; ++out_x) {
 | 
				
			||||||
 | 
					        for (int channel = 0; channel < depth; ++channel) {
 | 
				
			||||||
 | 
					          const int in_x_origin =
 | 
				
			||||||
 | 
					              (out_x * stride_width) - params.padding_values.width;
 | 
				
			||||||
 | 
					          const int in_y_origin =
 | 
				
			||||||
 | 
					              (out_y * stride_height) - params.padding_values.height;
 | 
				
			||||||
 | 
					          // Compute the boundaries of the filter region clamped so as to
 | 
				
			||||||
 | 
					          // ensure that the filter window fits in the input array.
 | 
				
			||||||
 | 
					          const int filter_x_start = std::max(0, -in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_x_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_width, input_width - in_x_origin);
 | 
				
			||||||
 | 
					          const int filter_y_start = std::max(0, -in_y_origin);
 | 
				
			||||||
 | 
					          const int filter_y_end =
 | 
				
			||||||
 | 
					              std::min(params.filter_height, input_height - in_y_origin);
 | 
				
			||||||
 | 
					          uint8_t max = 0;
 | 
				
			||||||
 | 
					          for (int filter_y = filter_y_start; filter_y < filter_y_end;
 | 
				
			||||||
 | 
					               ++filter_y) {
 | 
				
			||||||
 | 
					            for (int filter_x = filter_x_start; filter_x < filter_x_end;
 | 
				
			||||||
 | 
					                 ++filter_x) {
 | 
				
			||||||
 | 
					              const int in_x = in_x_origin + filter_x;
 | 
				
			||||||
 | 
					              const int in_y = in_y_origin + filter_y;
 | 
				
			||||||
 | 
					              max = std::max(
 | 
				
			||||||
 | 
					                  max,
 | 
				
			||||||
 | 
					                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          max = std::max<uint8_t>(max, params.quantized_activation_min);
 | 
				
			||||||
 | 
					          max = std::min<uint8_t>(max, params.quantized_activation_max);
 | 
				
			||||||
 | 
					          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
 | 
				
			||||||
 | 
					              static_cast<uint8_t>(max);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,109 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/compatibility.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Broadcast prelu to output_shape for quantized uint8_t/int8_t data.
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void BroadcastPrelu4DSlow(
 | 
				
			||||||
 | 
					    const PreluParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					    const T* input_data, const RuntimeShape& alpha_shape, const T* alpha_data,
 | 
				
			||||||
 | 
					    const RuntimeShape& output_shape, T* output_data) {
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(input_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(alpha_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
 | 
				
			||||||
 | 
					  const RuntimeShape extended_output_shape =
 | 
				
			||||||
 | 
					      RuntimeShape::ExtendedShape(4, output_shape);
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc1;
 | 
				
			||||||
 | 
					  NdArrayDesc<4> desc2;
 | 
				
			||||||
 | 
					  NdArrayDescsForElementwiseBroadcast(input_shape, alpha_shape, &desc1, &desc2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
 | 
				
			||||||
 | 
					    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
 | 
				
			||||||
 | 
					      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
 | 
				
			||||||
 | 
					        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
 | 
				
			||||||
 | 
					          int output_index = Offset(extended_output_shape, b, y, x, c);
 | 
				
			||||||
 | 
					          int input_index = SubscriptToIndex(desc1, b, y, x, c);
 | 
				
			||||||
 | 
					          const int32_t input_value =
 | 
				
			||||||
 | 
					              params.input_offset + input_data[input_index];
 | 
				
			||||||
 | 
					          int32_t output_value;
 | 
				
			||||||
 | 
					          if (input_value >= 0) {
 | 
				
			||||||
 | 
					            output_value = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					                input_value, params.output_multiplier_1, params.output_shift_1);
 | 
				
			||||||
 | 
					          } else {
 | 
				
			||||||
 | 
					            auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
 | 
				
			||||||
 | 
					            const int32_t alpha_value =
 | 
				
			||||||
 | 
					                params.alpha_offset + alpha_data[alpha_index];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            output_value = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					                input_value * alpha_value, params.output_multiplier_2,
 | 
				
			||||||
 | 
					                params.output_shift_2);
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          output_value += params.output_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          const int32_t quantized_min = std::numeric_limits<T>::min();
 | 
				
			||||||
 | 
					          const int32_t quantized_max = std::numeric_limits<T>::max();
 | 
				
			||||||
 | 
					          const int32_t clamped_output =
 | 
				
			||||||
 | 
					              std::min(quantized_max, std::max(quantized_min, output_value));
 | 
				
			||||||
 | 
					          output_data[output_index] = static_cast<T>(clamped_output);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                  const T* input_data, const RuntimeShape& alpha_shape,
 | 
				
			||||||
 | 
					                  const T* alpha_data, const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                  T* output_data) {
 | 
				
			||||||
 | 
					  const int32_t quantized_min = std::numeric_limits<T>::min();
 | 
				
			||||||
 | 
					  const int32_t quantized_max = std::numeric_limits<T>::max();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int flat_size =
 | 
				
			||||||
 | 
					      MatchingElementsSize(input_shape, alpha_shape, output_shape);
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; ++i) {
 | 
				
			||||||
 | 
					    const int32_t input_value = params.input_offset + input_data[i];
 | 
				
			||||||
 | 
					    int32_t output_value;
 | 
				
			||||||
 | 
					    if (input_value >= 0) {
 | 
				
			||||||
 | 
					      output_value = MultiplyByQuantizedMultiplier(
 | 
				
			||||||
 | 
					          input_value, params.output_multiplier_1, params.output_shift_1);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      const int32_t alpha_value = params.alpha_offset + alpha_data[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value,
 | 
				
			||||||
 | 
					                                                   params.output_multiplier_2,
 | 
				
			||||||
 | 
					                                                   params.output_shift_2);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    output_value += params.output_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int32_t clamped_output =
 | 
				
			||||||
 | 
					        std::min(quantized_max, std::max(quantized_min, output_value));
 | 
				
			||||||
 | 
					    output_data[i] = static_cast<T>(clamped_output);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,138 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Consolidates dimensions in broadcast inputs, checks for five-fold pattern.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// For example, if sequence of dimensions of one input is
 | 
				
			||||||
 | 
					// ..., 1, 3, 1, 7, 9, 5,... and the other is ..., 2, 3, 1, 7, 1, 1, ...
 | 
				
			||||||
 | 
					// we can consolidate these as
 | 
				
			||||||
 | 
					// ..., 1, 3*7, 9*5, ... and 2, 3*7, 1.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// The category is updated in the less-frequent case of shapes that are
 | 
				
			||||||
 | 
					// not suited to a fivefold-loop broadcast.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Falls back to generic pattern when it does not know how to process properly.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Returns true iff there is some sort of broadcast, which includes five-fold
 | 
				
			||||||
 | 
					// patterns and falling back to generic broadcast.
 | 
				
			||||||
 | 
					inline bool ProcessBroadcastShapes(const RuntimeShape& shape0,
 | 
				
			||||||
 | 
					                                   const RuntimeShape& shape1,
 | 
				
			||||||
 | 
					                                   tflite::ArithmeticParams* params) {
 | 
				
			||||||
 | 
					  const int dims_count =
 | 
				
			||||||
 | 
					      std::max(shape0.DimensionsCount(), shape1.DimensionsCount());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
 | 
				
			||||||
 | 
					  RuntimeShape scalar_shape(dims_count, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  auto extended_shape0 = RuntimeShape::ExtendedShape(dims_count, shape0);
 | 
				
			||||||
 | 
					  auto extended_shape1 = RuntimeShape::ExtendedShape(dims_count, shape1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Check for "exact" match, implicitly accepting any scalar shapes.
 | 
				
			||||||
 | 
					  if (extended_shape0 == extended_shape1) {
 | 
				
			||||||
 | 
					    params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
 | 
				
			||||||
 | 
					    return false;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = dims_count - 1; i >= 0; --i) {
 | 
				
			||||||
 | 
					    if (extended_shape0.Dims(i) == extended_shape1.Dims(i)) {
 | 
				
			||||||
 | 
					      continue;
 | 
				
			||||||
 | 
					    } else if (extended_shape0.Dims(i) == 1) {
 | 
				
			||||||
 | 
					      params->broadcast_category =
 | 
				
			||||||
 | 
					          BroadcastableOpCategory::kFirstInputBroadcastsFast;
 | 
				
			||||||
 | 
					      break;
 | 
				
			||||||
 | 
					    } else if (extended_shape1.Dims(i) == 1) {
 | 
				
			||||||
 | 
					      params->broadcast_category =
 | 
				
			||||||
 | 
					          BroadcastableOpCategory::kSecondInputBroadcastsFast;
 | 
				
			||||||
 | 
					      break;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      // This case is erroneous: there is a dimension that does not match and
 | 
				
			||||||
 | 
					      // is not a broadcast from one shape to the other.
 | 
				
			||||||
 | 
					      params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
 | 
				
			||||||
 | 
					      return true;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (params->broadcast_category !=
 | 
				
			||||||
 | 
					          BroadcastableOpCategory::kFirstInputBroadcastsFast &&
 | 
				
			||||||
 | 
					      params->broadcast_category !=
 | 
				
			||||||
 | 
					          BroadcastableOpCategory::kSecondInputBroadcastsFast) {
 | 
				
			||||||
 | 
					    // This is unreachable because at least one else clause in the above loop
 | 
				
			||||||
 | 
					    // must be reached.
 | 
				
			||||||
 | 
					    TFLITE_DCHECK(false);
 | 
				
			||||||
 | 
					    params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
 | 
				
			||||||
 | 
					    return false;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // From this point it is assumed contractually that corresponding dimensions
 | 
				
			||||||
 | 
					  // in shape0 and shape1 are either (a) equal or (b) one or other equals 1.
 | 
				
			||||||
 | 
					  const bool swap_inputs = params->broadcast_category ==
 | 
				
			||||||
 | 
					                           BroadcastableOpCategory::kSecondInputBroadcastsFast;
 | 
				
			||||||
 | 
					  const RuntimeShape* shape_a =
 | 
				
			||||||
 | 
					      swap_inputs ? &extended_shape1 : &extended_shape0;
 | 
				
			||||||
 | 
					  const RuntimeShape* shape_b =
 | 
				
			||||||
 | 
					      swap_inputs ? &extended_shape0 : &extended_shape1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int i = dims_count - 1;
 | 
				
			||||||
 | 
					  params->broadcast_shape[0] = 1;
 | 
				
			||||||
 | 
					  params->broadcast_shape[1] = 1;
 | 
				
			||||||
 | 
					  params->broadcast_shape[2] = 1;
 | 
				
			||||||
 | 
					  params->broadcast_shape[3] = 1;
 | 
				
			||||||
 | 
					  params->broadcast_shape[4] = 1;
 | 
				
			||||||
 | 
					  // y_0 is greedy: include dims if both or neither equal 1: in other words,
 | 
				
			||||||
 | 
					  // test for equality rather than (shape_a->Dims(i) != 1).
 | 
				
			||||||
 | 
					  while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
 | 
				
			||||||
 | 
					    params->broadcast_shape[4] *= shape_b->Dims(i);
 | 
				
			||||||
 | 
					    --i;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // Here either input_a or input_b has dim of 1 (if i >= 0).  If it is input_b
 | 
				
			||||||
 | 
					  // that has the unit dimension, the next two loops are not entered.
 | 
				
			||||||
 | 
					  while (i >= 0 && shape_a->Dims(i) == 1) {
 | 
				
			||||||
 | 
					    params->broadcast_shape[3] *= shape_b->Dims(i);
 | 
				
			||||||
 | 
					    --i;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
 | 
				
			||||||
 | 
					    params->broadcast_shape[2] *= shape_a->Dims(i);
 | 
				
			||||||
 | 
					    --i;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // Here either input_a or input_b has dim of 1 (if i >= 0).
 | 
				
			||||||
 | 
					  while (i >= 0 && shape_b->Dims(i) == 1) {
 | 
				
			||||||
 | 
					    params->broadcast_shape[1] *= shape_a->Dims(i);
 | 
				
			||||||
 | 
					    --i;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
 | 
				
			||||||
 | 
					    params->broadcast_shape[0] *= shape_b->Dims(i);
 | 
				
			||||||
 | 
					    --i;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Rarer case is when the broadcast dimensions cannot be handled by a fivefold
 | 
				
			||||||
 | 
					  // loop.
 | 
				
			||||||
 | 
					  if (i >= 0) {
 | 
				
			||||||
 | 
					    params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,55 @@
 | 
				
			||||||
 | 
					/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					==============================================================================*/
 | 
				
			||||||
 | 
					#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
 | 
				
			||||||
 | 
					#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <algorithm>
 | 
				
			||||||
 | 
					#include <limits>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/common.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/compatibility.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/cppmath.h"
 | 
				
			||||||
 | 
					#include "tensorflow/lite/kernels/internal/types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace tflite {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace reference_ops {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename InputT, typename OutputT>
 | 
				
			||||||
 | 
					inline void AffineQuantize(const tflite::QuantizationParams& op_params,
 | 
				
			||||||
 | 
					                           const RuntimeShape& input_shape,
 | 
				
			||||||
 | 
					                           const InputT* input_data,
 | 
				
			||||||
 | 
					                           const RuntimeShape& output_shape,
 | 
				
			||||||
 | 
					                           OutputT* output_data) {
 | 
				
			||||||
 | 
					  const int32_t zero_point = op_params.zero_point;
 | 
				
			||||||
 | 
					  const double scale = op_params.scale;
 | 
				
			||||||
 | 
					  const int flat_size = MatchingFlatSize(input_shape, output_shape);
 | 
				
			||||||
 | 
					  static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
 | 
				
			||||||
 | 
					  static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < flat_size; i++) {
 | 
				
			||||||
 | 
					    const InputT val = input_data[i];
 | 
				
			||||||
 | 
					    int32_t unclamped =
 | 
				
			||||||
 | 
					        static_cast<int32_t>(TfLiteRound(val / static_cast<float>(scale))) +
 | 
				
			||||||
 | 
					        zero_point;
 | 
				
			||||||
 | 
					    int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
 | 
				
			||||||
 | 
					    output_data[i] = clamped;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace reference_ops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace tflite
 | 
				
			||||||
 | 
					#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
 | 
				
			||||||
Some files were not shown because too many files have changed in this diff Show More
		Loading…
	
		Reference in New Issue