forked from xuos/xiuos
				
			
		
			
				
	
	
		
			1199 lines
		
	
	
		
			57 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			1199 lines
		
	
	
		
			57 KiB
		
	
	
	
		
			Python
		
	
	
	
'''
 | 
						||
    Copyright (c) 2018-2020
 | 
						||
    Jianjia Ma
 | 
						||
    majianjia@live.com
 | 
						||
 | 
						||
    SPDX-License-Identifier: Apache-2.0
 | 
						||
 | 
						||
    Change Logs:
 | 
						||
    Date           Author       Notes
 | 
						||
    2019-02-05     Jianjia Ma   The first version
 | 
						||
'''
 | 
						||
 | 
						||
import sklearn.metrics as skmetrics
 | 
						||
import matplotlib.pyplot as plt
 | 
						||
import tensorflow as tf
 | 
						||
import tensorflow.keras.backend as K
 | 
						||
from tensorflow.keras import *
 | 
						||
from tensorflow.keras.layers import *
 | 
						||
from fully_connected_opt_weight_generation import *
 | 
						||
from gen_config import *
 | 
						||
import scipy.stats
 | 
						||
import time
 | 
						||
import warnings
 | 
						||
 | 
						||
model_major_version = 0
 | 
						||
model_sub_version = 4
 | 
						||
model_reversion = 3
 | 
						||
 | 
						||
#define NNOM_MAJORVERSION     0L              /**< major version number */
 | 
						||
#define NNOM_SUBVERSION       4L              /**< minor version number */
 | 
						||
#define NNOM_REVISION         3L              /**< revise version number */
 | 
						||
#define NNOM_VERSION          (NNOM_MAJORVERSION * 10000) + (NNOM_SUBVERSION * 100) + NNOM_REVISION)
 | 
						||
 | 
						||
def fuse_bn_to_conv(layer):
 | 
						||
    # try to fuse BN layer to convolutional
 | 
						||
    if ('conv' in layer.name) and \
 | 
						||
            ('batch_normalization' in layer.outbound_nodes[0].outbound_layer.name):
 | 
						||
        print("fusing batch normalization to", layer.name)
 | 
						||
        bn_layer = layer._outbound_nodes[0].outbound_layer
 | 
						||
        c_w = layer.get_weights()[0]
 | 
						||
        c_b = layer.get_weights()[1]
 | 
						||
        print('original weight max', c_w.max(), 'min', c_w.min())
 | 
						||
        print('original bias max', c_b.max(), 'min', c_b.min())
 | 
						||
        bn_gamma = bn_layer.get_weights()[0]
 | 
						||
        bn_beta = bn_layer.get_weights()[1]
 | 
						||
        bn_mean = bn_layer.get_weights()[2]
 | 
						||
        bn_variance = bn_layer.get_weights()[3]
 | 
						||
        epsilon = 1e-3  # default epsilon for tf.slim.batch_norm
 | 
						||
        if ('conv2d' in layer.name):
 | 
						||
            if "depthwise" in layer.name:  # depthwise batchnorm params are ordered differently
 | 
						||
                for l in range(c_w.shape[3]):
 | 
						||
                    for k in range(c_w.shape[2]):
 | 
						||
                        for j in range(c_w.shape[1]):
 | 
						||
                            for i in range(c_w.shape[0]):
 | 
						||
                                c_w[i][j][k][l] *= bn_gamma[k*c_w.shape[3]+l] / np.sqrt(bn_variance[k*c_w.shape[3]+l] + epsilon)
 | 
						||
                depth_dim = c_w.shape[2] * c_w.shape[3]  # test needed
 | 
						||
            # normal conv
 | 
						||
            else:
 | 
						||
                for l in range(c_w.shape[3]):
 | 
						||
                    for k in range(c_w.shape[2]):
 | 
						||
                        for j in range(c_w.shape[1]):
 | 
						||
                            for i in range(c_w.shape[0]):
 | 
						||
                                c_w[i][j][k][l] *= bn_gamma[l] / np.sqrt(bn_variance[l] + epsilon)
 | 
						||
                depth_dim = c_w.shape[3]
 | 
						||
            for l in range(depth_dim):
 | 
						||
                c_b[l] = (bn_gamma[l] * (c_b[l] - bn_mean[l]) / np.sqrt(bn_variance[l] + epsilon)) + bn_beta[l]
 | 
						||
        # conv1d
 | 
						||
        else:
 | 
						||
            epsilon = 1e-3  # default epsilon for tf.slim.batch_norm
 | 
						||
            for k in range(c_w.shape[2]):
 | 
						||
                for j in range(c_w.shape[1]):
 | 
						||
                    for i in range(c_w.shape[0]):
 | 
						||
                        if "depthwise" in layer.name:  # depthwise batchnorm params are ordered differently
 | 
						||
                            c_w[i][j][k] *= bn_gamma[j] / np.sqrt(bn_variance[j] + epsilon)
 | 
						||
                        else:
 | 
						||
                            c_w[i][j][k] *= bn_gamma[k] / np.sqrt(bn_variance[k] + epsilon)
 | 
						||
 | 
						||
            if "depthwise" in layer.name:
 | 
						||
                depth_dim = c_w.shape[1]*c_w.shape[2] # need to be tested
 | 
						||
            else:
 | 
						||
                depth_dim = c_w.shape[2]
 | 
						||
            for l in range(depth_dim):
 | 
						||
                c_b[l] = (bn_gamma[l] * (c_b[l] - bn_mean[l]) / np.sqrt(bn_variance[l] + epsilon)) + bn_beta[l]
 | 
						||
 | 
						||
        print('fused weight max', c_w.max(), 'min', c_w.min())
 | 
						||
        print('fused bias max', c_b.max(), 'min', c_b.min())
 | 
						||
        # write the weights back to the layer
 | 
						||
        # after that, the model will be destroyed.. need a better way to pass the new weight
 | 
						||
        layer.set_weights([c_w, c_b])
 | 
						||
 | 
						||
def generate_test_bin(x, y, name='test_data_with_label.bin'):
 | 
						||
    '''
 | 
						||
    this method generate the
 | 
						||
    :param x:  input x data size
 | 
						||
    :param y:  input label (one hot label)
 | 
						||
    :return:
 | 
						||
    '''
 | 
						||
    # quantize input x
 | 
						||
    dec_bits = find_dec_bits_max_min(x, bit_width=8)
 | 
						||
    x = np.round(x*2**dec_bits).clip(-128, 127).astype(np.int8)
 | 
						||
    # get label
 | 
						||
    if(len(y.shape) >1):
 | 
						||
        test_label = np.argwhere(y == 1).astype(np.int8)  # test data
 | 
						||
        test_label = test_label[:, 1]
 | 
						||
    else:
 | 
						||
        test_label = y
 | 
						||
 | 
						||
    # get data
 | 
						||
    dat = x.astype(dtype="byte")  # test data
 | 
						||
    batch_size = dat.shape[0]     # total pices of data
 | 
						||
    dat = dat.flatten()           # flatten to get the total size.
 | 
						||
    block_size = int(dat.size / batch_size) # this must be integer but... just to confirm
 | 
						||
 | 
						||
    # write (label x 128) (data_block x 128)
 | 
						||
    label_batch = 128       # the Y-modem example uses 128 batch
 | 
						||
    with open(name, 'wb') as f:
 | 
						||
        start = 0
 | 
						||
        while start <= (test_label.size - label_batch):
 | 
						||
            test_label[start: start + label_batch].tofile(f)
 | 
						||
            dat[block_size * start: block_size * (start + label_batch)].tofile(f)
 | 
						||
            start += label_batch
 | 
						||
 | 
						||
        # the rest data
 | 
						||
        if (start < test_label.size):
 | 
						||
            rest_len = test_label.size - start
 | 
						||
            new_labls = test_label[start:]
 | 
						||
            new_labls = np.pad(new_labls, (0, label_batch - rest_len), mode='constant')
 | 
						||
            new_labls.tofile(f)
 | 
						||
            dat[block_size * start:].tofile(f)
 | 
						||
 | 
						||
    print("binary test file generated:", name)
 | 
						||
    print("test data length:", test_label.size)
 | 
						||
    return
 | 
						||
 | 
						||
def is_shift_layer(layer):
 | 
						||
    ''' layer which can change the output encoding'''
 | 
						||
    #FIXME: add more which will change the output shift
 | 
						||
    if('input' in layer.name or
 | 
						||
       'conv2d' in layer.name or
 | 
						||
       'conv1d' in layer.name or
 | 
						||
       'dense' in layer.name or
 | 
						||
       'softmax' in layer.name or
 | 
						||
        'sigmoid' in layer.name or
 | 
						||
        'tanh' in layer.name or
 | 
						||
        ('add' in layer.name and 'zero' not in layer.name) or # the name, zero_padding contains 'add'
 | 
						||
        'subtract' in layer.name or
 | 
						||
        'multiply' in layer.name or
 | 
						||
       ('activation' in layer.name and layer.get_config()['activation'] == 'softmax')or
 | 
						||
        ('activation' in layer.name and layer.get_config()['activation'] == 'hard_sigmoid') or
 | 
						||
        ('activation' in layer.name and layer.get_config()['activation'] == 'tanh') or
 | 
						||
        ('activation' in layer.name and layer.get_config()['activation'] == 'hard_tanh') or
 | 
						||
        is_rnn_layer(layer)
 | 
						||
    ):
 | 
						||
        return True
 | 
						||
    return False
 | 
						||
 | 
						||
def is_shift_fixed(layer):
 | 
						||
    ''' layer which shift to a fixed value'''
 | 
						||
    #FIXME: add more which will change the output shift
 | 
						||
    if('softmax' in layer.name or
 | 
						||
        'sigmoid' in layer.name or
 | 
						||
        'tanh' in layer.name or
 | 
						||
        ('activation' in layer.name and layer.get_config()['activation'] == 'softmax') or
 | 
						||
        ('activation' in layer.name and layer.get_config()['activation'] == 'sigmoid') or
 | 
						||
        ('activation' in layer.name and layer.get_config()['activation'] == 'hard_sigmoid') or
 | 
						||
        ('activation' in layer.name and layer.get_config()['activation'] == 'tanh') or
 | 
						||
        ('activation' in layer.name and layer.get_config()['activation'] == 'hard_tanh') or
 | 
						||
        is_rnn_layer(layer)
 | 
						||
    ):
 | 
						||
        return True
 | 
						||
    return  False
 | 
						||
 | 
						||
def is_lstm_layer(layer):
 | 
						||
    if type(layer) is LSTM or 'lstm' in layer.name:
 | 
						||
        return True
 | 
						||
    if(type(layer) is RNN or 'rnn' in layer.name):
 | 
						||
        if(type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
 | 
						||
            return True
 | 
						||
    return False
 | 
						||
 | 
						||
def is_gru_layer(layer):
 | 
						||
    if type(layer) is GRU or 'gru' in layer.name:
 | 
						||
        return True
 | 
						||
    if(type(layer) is RNN or 'rnn' in layer.name):
 | 
						||
        if(type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
 | 
						||
            return True
 | 
						||
    return False
 | 
						||
 | 
						||
def is_rnn_layer(layer):
 | 
						||
    if( 'rnn' in layer.name or
 | 
						||
        is_lstm_layer(layer) or
 | 
						||
        is_gru_layer(layer)
 | 
						||
    ):
 | 
						||
        return True
 | 
						||
    return  False
 | 
						||
 | 
						||
def find_offset(data):
 | 
						||
    """
 | 
						||
    Offset of the original data before quantisation
 | 
						||
    :param data:
 | 
						||
    :return: offset of the data block
 | 
						||
    """
 | 
						||
    return np.average(data)
 | 
						||
 | 
						||
 | 
						||
def find_dec_bits_max_min(data, bit_width=8, maximum_bit=32):
 | 
						||
    """
 | 
						||
    A ragular non-saturated shift-based quantisation mathod. Using max/min values
 | 
						||
    :param data:
 | 
						||
    :param bit_width:
 | 
						||
    :param maximum_bit: maximum decimal bit. Incase sometime bias is too small lead to very large size dec bit
 | 
						||
    :return:
 | 
						||
    """
 | 
						||
    max_val = abs(data.max()) - abs(data.max()/pow(2, bit_width)) # allow very small saturation.
 | 
						||
    min_val = abs(data.min()) - abs(data.min()/pow(2, bit_width))
 | 
						||
    int_bits = int(np.ceil(np.log2(max(max_val, min_val))))
 | 
						||
    dec_bits = (bit_width-1) - int_bits
 | 
						||
    return min(dec_bits, maximum_bit)
 | 
						||
 | 
						||
def find_dec_bits_max_min_axis(data, axis=-1,bit_width=8, maximum_bit=32):
 | 
						||
    """
 | 
						||
    A ragular non-saturated shift-based quantisation mathod. Using max/min values
 | 
						||
    :param data:
 | 
						||
    :param axis:
 | 
						||
    :param bit_width:
 | 
						||
    :return:
 | 
						||
    """
 | 
						||
    dec_bits = []
 | 
						||
    # if(len(data.shape) < np.abs(axis)): # for depthwise with axis = -2 while len(shape) =1
 | 
						||
    #     size = data.shape[0]
 | 
						||
    #     axis = 0 #
 | 
						||
    # else:
 | 
						||
    #     size = data.shape[axis]
 | 
						||
    for i in np.arange(0, data.shape[axis]):
 | 
						||
        d = np.take(data, indices=i, axis=axis)
 | 
						||
        max_val = abs(d.max()) - abs(d.max() / pow(2, bit_width))  # allow very small saturation.
 | 
						||
        min_val = abs(d.min()) - abs(d.min() / pow(2, bit_width))
 | 
						||
        int_bit = int(np.ceil(np.log2(max(abs(max_val), abs(min_val)))))
 | 
						||
        dec_bit = (bit_width-1) - int_bit
 | 
						||
        dec_bits.append(min(dec_bit, maximum_bit))
 | 
						||
    return dec_bits
 | 
						||
 | 
						||
def find_dec_bits_kld(data, bit_width=8, scan_times=4, maximum_bit=16):
 | 
						||
    """
 | 
						||
    # saturation shift, using KLD method (Kullback-Leibler divergence)
 | 
						||
    # Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
 | 
						||
    :param data: The data for looking for quantisation
 | 
						||
    :param bit_width: the bitwidth of the data
 | 
						||
    :param scan_times: the times to try the best kld (normally the second is the best.)
 | 
						||
    :return: dec bit width for this data
 | 
						||
    """
 | 
						||
    # do a regular non-saturated quantisation
 | 
						||
    max_val = data.max()
 | 
						||
    min_val = data.min()
 | 
						||
    abs_max = max(abs(max_val), abs(min_val))
 | 
						||
    int_bits = int(np.ceil(np.log2(max(abs(max_val), abs(min_val)))))
 | 
						||
    dec_bits = (bit_width-1) - int_bits
 | 
						||
 | 
						||
    # now looking for the best quantisation using KLD method
 | 
						||
    small_var = 1e-5
 | 
						||
    bins = np.arange(-abs_max, abs_max, abs_max / 2048 * 2)
 | 
						||
    q_bins = np.arange(-abs_max, abs_max, abs_max / 256 * 2)
 | 
						||
    flat_hist = np.histogram(data.flatten(), bins=bins)[0]
 | 
						||
    kl_loss = []
 | 
						||
    kl_shifts = []
 | 
						||
    for shift in range(scan_times):
 | 
						||
        t = 2 ** (dec_bits  + shift)  # 2-based threshold
 | 
						||
        act = np.round(data.flatten() * t)
 | 
						||
        act = act / t
 | 
						||
        act = np.clip(act, -128 / t, 127 / t)
 | 
						||
        act = np.histogram(act, bins=q_bins)[0]
 | 
						||
        act_hist = np.zeros(2047)
 | 
						||
        chunk = int(2048 / 256)
 | 
						||
        for i in range(int(255)):
 | 
						||
            none_zero = np.count_nonzero(flat_hist[i * chunk:(i + 1) * chunk])
 | 
						||
            if none_zero == 0:
 | 
						||
                continue
 | 
						||
            for j in range(chunk):
 | 
						||
                act_hist[i * chunk + j] = act[i] / none_zero if flat_hist[i * chunk + j] != 0 else 0
 | 
						||
        flat_hist[flat_hist == 0] = small_var
 | 
						||
        act_hist[act_hist == 0] = small_var
 | 
						||
        kl = scipy.stats.entropy(flat_hist, act_hist)
 | 
						||
        kl_loss.append(kl)
 | 
						||
        kl_shifts.append(dec_bits + shift)
 | 
						||
 | 
						||
    # now get the least loss from the scaned kld shift
 | 
						||
    dec_bits = kl_shifts[np.argmin(kl_loss)]  # set the dec_bit to the KLD results
 | 
						||
    return min(dec_bits, maximum_bit)
 | 
						||
 | 
						||
# convert to [-128,128) or int8
 | 
						||
def quantize_data(data, dec_bits, axis=-1, per_axis=False, bitwith=8):
 | 
						||
    if (per_axis):
 | 
						||
        out = []
 | 
						||
        for i in np.arange(0, data.shape[axis]):
 | 
						||
            d = np.take(data, indices=i, axis=axis)
 | 
						||
            d = np.round(d * 2 ** dec_bits[i])
 | 
						||
            d = np.clip(d, -2**(bitwith-1), 2**(bitwith-1)-1)
 | 
						||
            d = np.expand_dims(d, axis=axis)
 | 
						||
            out.append(d)
 | 
						||
        out = np.concatenate(out, axis=axis)
 | 
						||
        return out
 | 
						||
    else:
 | 
						||
        return np.clip(np.round(data * 2 ** dec_bits), -2**(bitwith-1), 2**(bitwith-1) -1)
 | 
						||
 | 
						||
def quantize_rnn_intermediate_output(layer, features):
 | 
						||
    def nnom_sigmoid(data):
 | 
						||
        return 1 / (1 + np.exp(-data))
 | 
						||
    def nnom_tanh(data):
 | 
						||
        return np.tanh(data)
 | 
						||
    def split_array(d, num):
 | 
						||
        l = len(d)
 | 
						||
        if(num==4):
 | 
						||
            return d[:int(l/4)], d[int(l/4): int(l/2)], d[int(l/2):-int(l/4)], d[-int(l/4):]
 | 
						||
        elif(num==3):
 | 
						||
            return d[:int(l/3)], d[int(l/3): -int(l/3)], d[-int(l/3):]
 | 
						||
    lcfg = layer.get_config()
 | 
						||
    if(lcfg['go_backwards']):
 | 
						||
        features = features[:,::-1,:] # reverse timestamp
 | 
						||
 | 
						||
    if(type(layer.cell) is SimpleRNNCell):
 | 
						||
        cfg = layer.cell.get_config()
 | 
						||
        state = np.zeros(cfg['units'])
 | 
						||
        kernel = layer.get_weights()[0]
 | 
						||
        recurrent_kernel = layer.get_weights()[1]
 | 
						||
        bias = layer.get_weights()[2]
 | 
						||
        # replicate keras's implementation
 | 
						||
        def simple_cell_step(inputs, state, kernel, recurrent_kernel, bias, activation):
 | 
						||
            h = np.dot(inputs, kernel)
 | 
						||
            h = np.add(h, bias)
 | 
						||
            h2 = np.dot(state, recurrent_kernel)
 | 
						||
            output = h + h2
 | 
						||
            output = activation(output)
 | 
						||
            return output, h, h2
 | 
						||
        output_arrary = []
 | 
						||
        h_array = []
 | 
						||
        h2_array = []
 | 
						||
        activation = nnom_tanh if cfg['activation'] is 'tanh' else nnom_sigmoid
 | 
						||
        state = np.zeros(cfg['units'])
 | 
						||
        for feature in features:
 | 
						||
            if(not layer.stateful):
 | 
						||
                state = np.zeros(cfg['units'])
 | 
						||
            for fe in feature:
 | 
						||
                output, h, h2 = simple_cell_step(fe, state, kernel, recurrent_kernel, bias, activation)
 | 
						||
                state = output
 | 
						||
                output_arrary.append(output)
 | 
						||
                h_array.append(h)
 | 
						||
                h2_array.append(h2)
 | 
						||
        output_arrary = np.array(output_arrary)
 | 
						||
        h_array = np.array(h_array)
 | 
						||
        h2_array = np.array(h2_array)
 | 
						||
        # qout = find_dec_bits_kld(output_arrary)
 | 
						||
        # qh = find_dec_bits_kld(h_array)
 | 
						||
        # qh2 = find_dec_bits_kld(h2_array)
 | 
						||
        qout = find_dec_bits_max_min(output_arrary)
 | 
						||
        qh = find_dec_bits_max_min(h_array)
 | 
						||
        qh2 = find_dec_bits_max_min(h2_array)
 | 
						||
        return [qout, qh, qh2]
 | 
						||
 | 
						||
    elif (type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
 | 
						||
        cfg = layer.cell.get_config()
 | 
						||
        state = np.zeros(cfg['units']*2)
 | 
						||
        kernel = layer.get_weights()[0]
 | 
						||
        recurrent_kernel = layer.get_weights()[1]
 | 
						||
        bias = layer.get_weights()[2]
 | 
						||
        def lstm_cell_step(cell_inputs, cell_states, kernel, recurrent_kernel, bias):
 | 
						||
            h_tm1 = cell_states[0]  # previous memory state
 | 
						||
            c_tm1 = cell_states[1]  # previous carry state
 | 
						||
            z1 = np.dot(cell_inputs, kernel)
 | 
						||
            z1 = np.add(z1, bias)
 | 
						||
            z2 = np.dot(h_tm1, recurrent_kernel)
 | 
						||
            z = z1+z2               # -----> q_z
 | 
						||
            z0, z1, z2, z3 = split_array(z, 4)
 | 
						||
            i = nnom_sigmoid(z0) # q0.7
 | 
						||
            f = nnom_sigmoid(z1) # q0.7
 | 
						||
            c1 = f*c_tm1
 | 
						||
            c2 = i*nnom_tanh(z2) # q0.7
 | 
						||
            c = c1 + c2          # -----> q_c
 | 
						||
            o = nnom_sigmoid(z3) # q0.7
 | 
						||
            tc = nnom_tanh(c)
 | 
						||
            h = o * tc # q0.7
 | 
						||
            return h, [h, c], z ,z0, z1, z2, z3
 | 
						||
        h_array = []
 | 
						||
        c_array = []
 | 
						||
        z_array = []
 | 
						||
        z0_array = []
 | 
						||
        z1_array = []
 | 
						||
        z2_array = []
 | 
						||
        z3_array = []
 | 
						||
        state = [np.zeros(cfg['units']), np.zeros(cfg['units'])]
 | 
						||
        for feature in features:
 | 
						||
            if(not layer.stateful):
 | 
						||
                state = [np.zeros(cfg['units']), np.zeros(cfg['units']) ]
 | 
						||
            for fe in feature:
 | 
						||
                output, state, z, z0, z1, z2, z3 = lstm_cell_step(fe, state, kernel, recurrent_kernel, bias)
 | 
						||
                h_array.append(output)
 | 
						||
                c_array.append(state[1])
 | 
						||
                z_array.append(z)
 | 
						||
                z0_array.append(z0)
 | 
						||
                z1_array.append(z1)
 | 
						||
                z2_array.append(z2)
 | 
						||
                z3_array.append(z3)
 | 
						||
        h_array = np.array(h_array)
 | 
						||
        c_array = np.array(c_array)
 | 
						||
        z_array = np.array(z_array)
 | 
						||
        z0_array = np.array(z0_array)
 | 
						||
        z1_array = np.array(z1_array)
 | 
						||
        z2_array = np.array(z2_array)
 | 
						||
        z3_array = np.array(z3_array)
 | 
						||
        # q_h = find_dec_bits_kld(h_array)
 | 
						||
        # q_c = find_dec_bits_kld(c_array)
 | 
						||
        # q_z = find_dec_bits_kld(z_array)
 | 
						||
        # q_z0 = find_dec_bits_kld(z0_array)
 | 
						||
        # q_z1 = find_dec_bits_kld(z1_array)
 | 
						||
        # q_z2 = find_dec_bits_kld(z2_array)
 | 
						||
        # q_z3 = find_dec_bits_kld(z3_array)
 | 
						||
        q_h = find_dec_bits_max_min(h_array)
 | 
						||
        q_c = find_dec_bits_max_min(c_array)
 | 
						||
        q_z = find_dec_bits_max_min(z_array)
 | 
						||
        q_z0 = find_dec_bits_max_min(z0_array)      # not needed.
 | 
						||
        q_z1 = find_dec_bits_max_min(z1_array)
 | 
						||
        q_z2 = find_dec_bits_max_min(z2_array)
 | 
						||
        q_z3 = find_dec_bits_max_min(z3_array)
 | 
						||
        return [q_h, q_c, q_z]
 | 
						||
 | 
						||
    elif (type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
 | 
						||
        cfg = layer.cell.get_config()
 | 
						||
        state = np.zeros(cfg['units'])
 | 
						||
        k = layer.get_weights()[0]
 | 
						||
        rk = layer.get_weights()[1]
 | 
						||
        bias = layer.get_weights()[2]
 | 
						||
 | 
						||
        def gru_cell_step(cell_inputs, cell_states, kernel, recurrent_kernel, input_bias, recurrent_bias):
 | 
						||
            h_tm1 = cell_states[0]
 | 
						||
            # inputs projected by all gate matrices at once
 | 
						||
            matrix_x = np.dot(cell_inputs, kernel) +  input_bias
 | 
						||
            x_z, x_r, x_h = split_array(matrix_x, 3)
 | 
						||
            # hidden state projected by all gate matrices at once
 | 
						||
            matrix_inner = np.dot(h_tm1, recurrent_kernel) + recurrent_bias
 | 
						||
            recurrent_z, recurrent_r, recurrent_h = split_array(matrix_inner, 3)
 | 
						||
            z = nnom_sigmoid(x_z + recurrent_z)
 | 
						||
            r = nnom_sigmoid(x_r + recurrent_r)
 | 
						||
            hh = nnom_tanh(x_h + r * recurrent_h)
 | 
						||
            # previous and candidate state mixed by update gate
 | 
						||
            # h = z * h_tm1 + (1 - z) * hh
 | 
						||
            h1 =  z*h_tm1
 | 
						||
            h2 = 1-z
 | 
						||
            h3 = h2 * hh
 | 
						||
            h = h1 + h3
 | 
						||
            return h, [h], matrix_x, matrix_inner
 | 
						||
        h_array = []
 | 
						||
        z_array = []
 | 
						||
        i_array=[]
 | 
						||
        state = [np.zeros(cfg['units'])]
 | 
						||
        for feature in features:
 | 
						||
            if (not layer.stateful):
 | 
						||
                state = [np.zeros(cfg['units'])]
 | 
						||
            for fe in feature:
 | 
						||
                output, state, z, i = gru_cell_step(fe, state, k, rk, bias[0], bias[1])
 | 
						||
                h_array.append(output)
 | 
						||
                z_array.append(z)
 | 
						||
                i_array.append(i)
 | 
						||
        h_array = np.array(h_array)
 | 
						||
        i_array = np.array(i_array)
 | 
						||
        z_array = np.array(z_array)
 | 
						||
        # q_h = find_dec_bits_kld(h_array)
 | 
						||
        # q_i = find_dec_bits_kld(i_array)
 | 
						||
        # q_z = find_dec_bits_kld(z_array)
 | 
						||
        q_h = find_dec_bits_max_min(h_array)
 | 
						||
        q_i = find_dec_bits_max_min(i_array)
 | 
						||
        q_z = find_dec_bits_max_min(z_array)
 | 
						||
        q_z = min(q_i, q_z)
 | 
						||
        return [q_h, q_z]
 | 
						||
    return []
 | 
						||
 | 
						||
def quantize_output(model, x_test, quantize_method='max_min', layer_offset=False, calibrate_size=None):
 | 
						||
    # limit the test data size
 | 
						||
    if(calibrate_size is not None):
 | 
						||
        if (x_test.shape[0] > calibrate_size):
 | 
						||
            x_test = x_test[:calibrate_size]
 | 
						||
    # test, show the output ranges
 | 
						||
    layer_q_list = {}
 | 
						||
    # FIXME: only support one input
 | 
						||
    if (type(model.layers[0]) != InputLayer):
 | 
						||
        L = [model.input] + model.layers
 | 
						||
    else:
 | 
						||
        L = model.layers
 | 
						||
 | 
						||
    for layer in L:  # layer loop
 | 
						||
        if ("input" in layer.name):
 | 
						||
            features = x_test
 | 
						||
        else:
 | 
						||
            # rnn need a further step to determine the intermediate q format
 | 
						||
            if (is_rnn_layer(layer)):
 | 
						||
                in_layer = layer.inbound_nodes[0].inbound_layers
 | 
						||
                layer_model = Model(inputs=model.input, outputs=in_layer.output)
 | 
						||
                bs = model.input.shape[0]
 | 
						||
                features = layer_model.predict(x_test, batch_size=bs)
 | 
						||
                intermediate_dec = quantize_rnn_intermediate_output(layer, features)
 | 
						||
                print(layer.name, 'dec bit', intermediate_dec)
 | 
						||
                layer_q_list['intermediate_' + layer.name] = intermediate_dec
 | 
						||
 | 
						||
            # batch_normalization will need to be handled differently, since we are fusing the weight to its previosu conv.
 | 
						||
            # sigmoid and tanh are different, their shift is fixed to 7
 | 
						||
            if (is_shift_layer(layer) or
 | 
						||
                    ('batch_normalization' in layer.name)):
 | 
						||
                layer_model = Model(inputs=model.input, outputs=layer.output)
 | 
						||
                bs = model.input.shape[0]
 | 
						||
                features = layer_model.predict(x_test, batch_size=bs)
 | 
						||
            else:
 | 
						||
                # leave the features not changed, so this layer shift will be the same as its inputs
 | 
						||
                pass
 | 
						||
 | 
						||
        # we currently only support one offset for a layer output.
 | 
						||
        if(layer_offset):
 | 
						||
            offset = find_offset(features)
 | 
						||
            features = features - offset
 | 
						||
        else:
 | 
						||
            offset = 0
 | 
						||
        # saturated shift using KLD method OR non saturated shift using max-min
 | 
						||
        if ("kld"  in quantize_method
 | 
						||
                and not is_shift_fixed(layer)
 | 
						||
                and "input" not in layer.name
 | 
						||
                and "dense" not in layer.name):  # test, also do not use kld in input layer
 | 
						||
            dec_bits = find_dec_bits_kld(features, bit_width=8, scan_times=4)
 | 
						||
            print(layer.name,"Quantized method:", "KLD", "Values max:", np.max(features), "min:", np.min(features), "dec bit", dec_bits)
 | 
						||
        else:
 | 
						||
            dec_bits = find_dec_bits_max_min(features, bit_width=8)
 | 
						||
            print(layer.name,"Quantized method:","max-min"," Values max:", np.max(features), "min:", np.min(features), "dec bit", dec_bits)
 | 
						||
        # quantise offset
 | 
						||
        offset = int(np.round(offset * 2 ** dec_bits))
 | 
						||
        # record the shift
 | 
						||
        if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
 | 
						||
            layer_q_list[layer.name.split(':')[0]] = [dec_bits, offset]
 | 
						||
        else:
 | 
						||
            layer_q_list[layer.name] = [dec_bits, offset]
 | 
						||
        if ('batch_normalization' in layer.name):
 | 
						||
            layer_q_list[layer.inbound_nodes[0].inbound_layers.name] = [dec_bits, offset]  # use the bn layer shift to update the last layer.
 | 
						||
 | 
						||
    # scan the layers backward, try to unify the dec bit in multiple input layers, (add, mult... concat...etc.)
 | 
						||
    LM = {}
 | 
						||
    for layer in model.layers:
 | 
						||
        LM[layer.name] = layer
 | 
						||
    L = [l for l in model.layers[1:]]
 | 
						||
    L.reverse()
 | 
						||
    def update_previous_layer_shift(layer, dec_bit):
 | 
						||
        if(type(layer.input) == list):
 | 
						||
            for inp in layer.input:
 | 
						||
                iname = inp.name.split('/')[0]
 | 
						||
                if('input' in iname):
 | 
						||
                    continue
 | 
						||
                layer_q_list[iname][0] = dec_min
 | 
						||
                if(not is_shift_layer(LM[iname])):
 | 
						||
                    update_previous_layer_shift(LM[iname], dec_bit)
 | 
						||
        else:
 | 
						||
            iname = layer.input.name.split('/')[0]
 | 
						||
            if('input' in iname):
 | 
						||
                return
 | 
						||
            layer_q_list[iname][0] = dec_min
 | 
						||
            if(not is_shift_layer(LM[iname])):
 | 
						||
                update_previous_layer_shift(LM[iname], dec_bit)
 | 
						||
    for layer in L:
 | 
						||
        if(type(layer.input) == list):
 | 
						||
            iname = layer.input[0].name.split('/')[0].split(':')[0]
 | 
						||
            dec_min = layer_q_list[iname][0]
 | 
						||
            # find min dec bit in these input
 | 
						||
            for inp in layer.input:
 | 
						||
                iname = inp.name.split('/')[0].split(':')[0]
 | 
						||
                if(layer_q_list[iname][0] < dec_min):
 | 
						||
                    dec_min = layer_q_list[iname][0]
 | 
						||
                if(layer_q_list[iname][0] != dec_min):
 | 
						||
                    bFlag = True
 | 
						||
            for inp in layer.input:
 | 
						||
                iname = inp.name.split('/')[0].split(':')[0]
 | 
						||
                layer_q_list[iname][0] = dec_min
 | 
						||
                if(not is_shift_layer(LM[iname])):
 | 
						||
                    update_previous_layer_shift(LM[iname], dec_min)
 | 
						||
            print('set dec bit', dec_min, 'for the input of', layer.name, ':', [inp.name.split('/')[0] for inp in layer.input])
 | 
						||
            if(not is_shift_layer(layer) or dec_min < layer_q_list[layer.name][0]): # update current layer's shift only when we cannot change the shift
 | 
						||
                layer_q_list[layer.name][0] = dec_min
 | 
						||
    # quantise offset
 | 
						||
    print("quantisation list", layer_q_list)
 | 
						||
    return layer_q_list
 | 
						||
 | 
						||
 | 
						||
def layer_name_from_tensor(t):
 | 
						||
    return t.name.replace(':','/').split('/')[0]
 | 
						||
 | 
						||
 | 
						||
def quantize_weights(model, name='weights.h', format='hwc', per_channel_quant=True, layer_q_list=None):
 | 
						||
    # Quantize weights to 8-bits using (min,max) and write to file
 | 
						||
    f = open(name, 'w')
 | 
						||
    f.write('#include "nnom.h"\n\n')
 | 
						||
    f.write('/* Weights, bias and Q format */\n')
 | 
						||
    f.close()
 | 
						||
    for curr_idx, layer in  enumerate(model.layers):
 | 
						||
        if (not layer.weights):
 | 
						||
            continue
 | 
						||
        # before merging bn layer, check if the bn is "legally" after Conv
 | 
						||
        if('batch_normalization' in layer.name) and \
 | 
						||
            ('conv' not in layer.inbound_nodes[0].inbound_layers.name):
 | 
						||
            raise  Exception('Only support batch_normalization placed after conv', layer.name,
 | 
						||
                            layer.inbound_nodes[0].inbound_layers.name)
 | 
						||
        # try to fuse BN layer to convolutional
 | 
						||
        if ('conv' in layer.name) and \
 | 
						||
            ('batch_normalization' in layer.outbound_nodes[0].outbound_layer.name):
 | 
						||
            fuse_bn_to_conv(layer)
 | 
						||
        # generate weights and bias now
 | 
						||
        weight_dec_shift = 0
 | 
						||
        print('quantizing weights for layer', layer.name)
 | 
						||
        layer_weights = layer.get_weights()
 | 
						||
        for idx, var in enumerate(layer_weights):
 | 
						||
            var_name = convert_tensor_name(layer.weights[idx])
 | 
						||
            var_values = var
 | 
						||
            if("kernel" not in var_name and 'bias' not in var_name): # ignore batchnormalisation's parameters
 | 
						||
                continue
 | 
						||
 | 
						||
            if (per_channel_quant and type(layer) in [Conv2D, Conv1D, DepthwiseConv2D, Conv2DTranspose]):
 | 
						||
                if(type(layer) in [DepthwiseConv2D] and "kernel" in var_name): #depthwise kernel quantised by
 | 
						||
                    shape = var_values.shape[:2] + (-1,) # need to combine the mult and channel first
 | 
						||
                    var = var_values.reshape(shape)
 | 
						||
                    dec_bits = find_dec_bits_max_min_axis(var, axis=-1, bit_width=8)
 | 
						||
                elif(type(layer) in [Conv2DTranspose]):
 | 
						||
                    dec_bits = find_dec_bits_max_min_axis(var_values, axis=-2, bit_width=8)
 | 
						||
                else:
 | 
						||
                    dec_bits = find_dec_bits_max_min_axis(var_values, bit_width=8)
 | 
						||
            else:
 | 
						||
                dec_bits = find_dec_bits_max_min(var_values, bit_width=8)
 | 
						||
            print('   ', var_name, "dec bit", dec_bits)
 | 
						||
 | 
						||
            # kernel dec, bias dec, bias shift, output shift
 | 
						||
            if(is_shift_layer(layer) and not is_rnn_layer(layer)):
 | 
						||
                inp = layer.input.name.replace(':','/').split('/')[0]
 | 
						||
                layer_input_dec = layer_q_list[inp][0]
 | 
						||
                layer_output_dec = layer_q_list[layer.name][0]
 | 
						||
                if ("kernel" in var_name):
 | 
						||
                    weight_dec_shift = dec_bits
 | 
						||
                else:
 | 
						||
                    # channel wise
 | 
						||
                    if hasattr(dec_bits, '__len__'):
 | 
						||
                        bias_shift = np.full(len(dec_bits), layer_input_dec)+weight_dec_shift-dec_bits
 | 
						||
                        layer_output_shift = np.full(len(weight_dec_shift), layer_input_dec) + weight_dec_shift \
 | 
						||
                            - np.full(len(weight_dec_shift), layer_output_dec)
 | 
						||
                        if (np.min(bias_shift) < 0):
 | 
						||
                            for i, w_dec in enumerate(weight_dec_shift):
 | 
						||
                                if (bias_shift[i] < 0):
 | 
						||
                                    dec_bits[i] = w_dec
 | 
						||
                                    bias_shift[i] = 0
 | 
						||
                    # layer wise
 | 
						||
                    else:
 | 
						||
                        bias_shift = layer_input_dec + weight_dec_shift - dec_bits
 | 
						||
                        layer_output_shift = layer_input_dec + weight_dec_shift - layer_output_dec
 | 
						||
                        if (bias_shift < 0):
 | 
						||
                            dec_bits = weight_dec_shift
 | 
						||
                            bias_shift = 0
 | 
						||
            # RNN layer's kernel dec, bias dec, bias shift, output shift
 | 
						||
            if(is_rnn_layer(layer)):
 | 
						||
                inp = layer.input.name.replace(':','/').split('/')[0]
 | 
						||
                layer_input_dec = layer_q_list[inp][0]
 | 
						||
                layer_output_dec = layer_q_list[layer.name][0]
 | 
						||
                #if (type(layer.cell) is SimpleRNNCell):
 | 
						||
                if ("kernel" in var_name and 'recurrent' not in var_name):
 | 
						||
                    weight_dec_shift = dec_bits
 | 
						||
                elif ('bias' in var_name):
 | 
						||
                    bias_shift = layer_input_dec + weight_dec_shift - dec_bits
 | 
						||
                    layer_output_shift = layer_input_dec + weight_dec_shift - layer_output_dec # this is not valid
 | 
						||
                    if (bias_shift < 0):
 | 
						||
                        dec_bits = weight_dec_shift
 | 
						||
                        bias_shift = 0
 | 
						||
 | 
						||
            # now quantise them
 | 
						||
            if(type(layer) in [Conv2D, Conv1D, DepthwiseConv2D, Conv2DTranspose]):
 | 
						||
                if(type(layer) in [DepthwiseConv2D] and "kernel" in var_name):
 | 
						||
                    old_shape = var_values.shape
 | 
						||
                    var_values = quantize_data(var_values.reshape(var_values.shape[:2] + (-1,)),
 | 
						||
                                   dec_bits, axis=-1, per_axis=per_channel_quant) # convert to [h, w, out x mult]
 | 
						||
                    var_values = var_values.reshape(old_shape) # convert the shape back to  [h, w, out, mult]
 | 
						||
                elif(type(layer) in [Conv2DTranspose] and "kernel" in var_name):
 | 
						||
                    var_values = quantize_data(var_values, dec_bits, axis=-2, per_axis=per_channel_quant) # [h, w, out, in]
 | 
						||
                else:
 | 
						||
                    var_values = quantize_data(var_values, dec_bits, per_axis=per_channel_quant) # [h, w, in, out]
 | 
						||
            else:
 | 
						||
                var_values = quantize_data(var_values, dec_bits, per_axis=False)
 | 
						||
 | 
						||
            # CHW format
 | 
						||
            if ('chw' in format):
 | 
						||
                if (is_lstm_layer(layer) or is_gru_layer(layer)):   # currently we use 16 bit intermediate, use reorder optimation
 | 
						||
                    transposed_wts = np.transpose(var_values)
 | 
						||
                    if('kernel' in var_name):
 | 
						||
                        transposed_wts = convert_q7_q15_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
 | 
						||
                # dense and rnn still working under HWC format
 | 
						||
                elif ("dense" in var_name or is_rnn_layer(layer)) and "kernel" in var_name:
 | 
						||
                    transposed_wts = np.transpose(var_values)
 | 
						||
                    transposed_wts = convert_to_x4_q7_weights(np.reshape(transposed_wts, (transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
 | 
						||
                # all other kernels, bias stay the same
 | 
						||
                else:
 | 
						||
                    transposed_wts = var_values
 | 
						||
            # HWC format (NNOM/CMSIS-NN use [out_ch, h, w, in_ch], in C order)
 | 
						||
            else:
 | 
						||
                if (len(var_values.shape) == 3):  # 1D convolution layer weights
 | 
						||
                    transposed_wts = np.transpose(var_values, (2, 0, 1))
 | 
						||
                elif (len(var_values.shape) == 4):  # 2D convolution layer weights
 | 
						||
                    if(type(layer) == Conv2DTranspose): # test
 | 
						||
                        transposed_wts = np.transpose(var_values, (2, 0, 1, 3))
 | 
						||
                    elif type(layer) == DepthwiseConv2D:
 | 
						||
                        transposed_wts = var_values#np.transpose(var_values, (0, 1, 3, 2)) # [h, w, out, mult] test for multiplier
 | 
						||
                    else:
 | 
						||
                        transposed_wts = np.transpose(var_values, (3, 0, 1, 2))
 | 
						||
                elif(is_lstm_layer(layer) or is_gru_layer(layer)):   # currently we use 16 bit intermediate, use reorder optimation
 | 
						||
                    if('kernel' in var_name):
 | 
						||
                        transposed_wts = np.transpose(var_values)
 | 
						||
                        transposed_wts = convert_q7_q15_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
 | 
						||
                    else: # bias will not need to be transposed (for GRU which has 2d bias)
 | 
						||
                        transposed_wts = var_values
 | 
						||
                else:  # fully connected layer weights or biases of any layer
 | 
						||
                    # test, use opt weight reorder
 | 
						||
                    transposed_wts = np.transpose(var_values)
 | 
						||
                    if ("dense" in var_name or is_rnn_layer(layer)) and "kernel" in var_name: # and other RNN layers
 | 
						||
                        transposed_wts = convert_to_x4_q7_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
 | 
						||
 | 
						||
            with open(name, 'a') as f:
 | 
						||
                def write_weights(f, name, value):
 | 
						||
                    f.write('#define ' + name + ' {')
 | 
						||
                    value.tofile(f, sep=", ", format="%d")
 | 
						||
                    f.write('}\n\n')
 | 
						||
                # weights or bias
 | 
						||
                write_weights(f, var_name.upper(), transposed_wts)
 | 
						||
                # dec bits
 | 
						||
                write_weights(f, var_name.upper()+'_DEC_BITS' , np.array(dec_bits))
 | 
						||
                # for test
 | 
						||
                if( "bias" in var_name):
 | 
						||
                    f.write('#define ' + layer.name.upper() + '_BIAS_LSHIFT '+to_cstyle(bias_shift) +'\n\n')
 | 
						||
                    #f.write('#define ' + layer.name.upper() + '_OUTPUT_DEC '+ to_cstyle(layer_output_dec)+'\n\n') # not here
 | 
						||
                    f.write('#define ' + layer.name.upper() + '_OUTPUT_RSHIFT ' + to_cstyle(layer_output_shift)+'\n\n')
 | 
						||
 | 
						||
 | 
						||
def generate_model(model, x_test, per_channel_quant=False, name='weights.h', format='hwc', quantize_method='max_min'):
 | 
						||
    """
 | 
						||
    :param model:
 | 
						||
    :param x_test:
 | 
						||
    :param name:
 | 
						||
    :param format:
 | 
						||
    :param quantize_method: "max_min" or "kld"
 | 
						||
    :return:
 | 
						||
    """
 | 
						||
    # get the quantize output range/format
 | 
						||
    layer_q_list = quantize_output(model, x_test, layer_offset=False, quantize_method=quantize_method)
 | 
						||
    # quantize weights and output shift
 | 
						||
    quantize_weights(model, per_channel_quant=per_channel_quant, name=name, format=format, layer_q_list=layer_q_list)
 | 
						||
    # now generate the model
 | 
						||
    if (type(model.layers[0]) != InputLayer):
 | 
						||
        L = [model.input] + model.layers
 | 
						||
    else:
 | 
						||
        L = model.layers
 | 
						||
    with open(name, 'a') as fp:
 | 
						||
        # generate the list of output
 | 
						||
        fp.write('\n/* output q format for each layer */\n')
 | 
						||
        for layer in L:
 | 
						||
            if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
 | 
						||
                iname = layer.name.split(':')[0]
 | 
						||
            else:
 | 
						||
                iname = layer.name
 | 
						||
            fp.write('#define %s_OUTPUT_DEC %s\n' % (iname.upper(), layer_q_list[iname][0]))
 | 
						||
            fp.write('#define %s_OUTPUT_OFFSET %s\n' % (iname.upper(), layer_q_list[iname][1]))
 | 
						||
        fp.write('\n/* bias shift and output shift for none-weighted layer */\n')
 | 
						||
 | 
						||
        # generate output shift for the layers without weights (weighted layers were generated in quantize_weights)
 | 
						||
        for layer in model.layers:
 | 
						||
            if (is_shift_layer(layer)):
 | 
						||
                iname = layer.name.upper()
 | 
						||
                # add, sub
 | 
						||
                if ('add' in layer.name or 'subtract' in layer.name):
 | 
						||
                    # only consider the first, they have been set to same in out_put_range()
 | 
						||
                    inp = layer.input[0].name.replace(':', '/').split('/')[0].upper()
 | 
						||
                    fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_DEC-{0}_OUTPUT_DEC)\n'.format(
 | 
						||
                        iname, inp))
 | 
						||
                    fp.write(
 | 
						||
                        '#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(
 | 
						||
                            iname))
 | 
						||
                # mult is different, Q3.4 * Q3.4 = Q6.8. if mult out is Q4.3, then shift (Q.4+q.4)-Q.3=5. Am I right?
 | 
						||
                elif ('multiply' in layer.name):
 | 
						||
                    inp = layer.input[0].name.replace(':', '/').split('/')[0].upper()
 | 
						||
                    fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_DEC*2-{0}_OUTPUT_DEC)\n'.format(
 | 
						||
                        iname, inp))
 | 
						||
                    fp.write(
 | 
						||
                        '#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(
 | 
						||
                            iname))
 | 
						||
 | 
						||
        fp.write('\n/* tensors and configurations for each layer */\n')
 | 
						||
        LI = {}
 | 
						||
        ID = 0
 | 
						||
 | 
						||
        def is_skipable_layer(layer):
 | 
						||
            # FIXME: add more that could be skiped
 | 
						||
            if ('lambda' in layer.name or
 | 
						||
                'dropout' in layer.name or
 | 
						||
                'gaussian_noise' in layer.name or
 | 
						||
                'batch_normalization' in layer.name
 | 
						||
                #or ('flatten' in layer.name and 'chw' not in format)
 | 
						||
                ): # flatten layer can be skipped in HWC but needed in CHW
 | 
						||
                return True
 | 
						||
            return False
 | 
						||
 | 
						||
        output_num = 0
 | 
						||
        for id, layer in enumerate(L):
 | 
						||
            if (is_skipable_layer(layer)):
 | 
						||
                inp = layer.input.name.replace(':', '/').split('/')[0]
 | 
						||
                LI[layer.name] = (LI[inp][0], layer)
 | 
						||
            else:
 | 
						||
                if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
 | 
						||
                    LI[layer.name.split(':')[0]] = (ID, layer)
 | 
						||
                else:
 | 
						||
                    LI[layer.name] = (ID, layer)
 | 
						||
                ID += 1
 | 
						||
 | 
						||
            def gen_weight_tensor(w, per_axis):
 | 
						||
                var_cname = convert_tensor_name(w) + '_data'
 | 
						||
                dec_bits_name = convert_tensor_name(w).upper() + '_DEC_BITS'
 | 
						||
                fp.write(gen_values(var_cname, convert_tensor_name(w).upper()))
 | 
						||
                fp.write(gen_tensor(w, dec_bits=dec_bits_name, tensor_value=var_cname, per_axis=per_axis))
 | 
						||
 | 
						||
            # output the config of all layer
 | 
						||
            if (type(layer) in [InputLayer] or 'input' in layer.name):
 | 
						||
                if(type(layer) == tf.Tensor):
 | 
						||
                    raise  Exception('Not yet support tensor as input/or Sequential model. '
 | 
						||
                                     'please use Input layer as your first layer in the model', layer.name, layer)
 | 
						||
                size = 1
 | 
						||
                for s in layer.input.shape[1:]:
 | 
						||
                    size *= s if s is not None else 1
 | 
						||
                fp.write(gen_values('nnom_input_data', '{0}', size=str(size), dtype='static int8_t'))
 | 
						||
                fp.write(gen_tensor(layer.input, layer_q_list[layer.name][0], tensor_value='nnom_input_data', is_io_tensor=True))
 | 
						||
                fp.write(gen_io_config(layer, tensor_name=convert_tensor_name(layer.input)))
 | 
						||
            elif (type(layer) in [Conv2D, Conv1D, DepthwiseConv2D]):
 | 
						||
                for w in layer.weights:
 | 
						||
                    gen_weight_tensor(w, per_axis=per_channel_quant)
 | 
						||
                fp.write(gen_conv2d_config(layer, layer.name.upper() +'_OUTPUT_RSHIFT', layer.name.upper() +'_BIAS_LSHIFT'))
 | 
						||
            elif (type(layer) in [Conv2DTranspose]):
 | 
						||
                for w in layer.weights:
 | 
						||
                    gen_weight_tensor(w, per_axis=per_channel_quant)
 | 
						||
                fp.write(gen_conv2d_trans_config(layer, layer.name.upper() +'_OUTPUT_RSHIFT', layer.name.upper() +'_BIAS_LSHIFT'))
 | 
						||
            elif (type(layer) in [Dense]):
 | 
						||
                for w in layer.weights:
 | 
						||
                    gen_weight_tensor(w, per_axis=False)
 | 
						||
                fp.write(gen_dense_config(layer, layer.name.upper() +'_OUTPUT_RSHIFT', layer.name.upper() +'_BIAS_LSHIFT'))
 | 
						||
            elif (type(layer) in [MaxPooling2D, AveragePooling2D, MaxPooling1D, AveragePooling1D]):
 | 
						||
                fp.write(gen_pooling_config(layer))
 | 
						||
            elif (type(layer) in [GlobalMaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling1D, GlobalAveragePooling1D]):
 | 
						||
                fp.write(gen_gl_pooling_config(layer))
 | 
						||
            elif (type(layer) in [Multiply, Add, Subtract]):
 | 
						||
                fp.write(gen_matrix_config(layer, output_shift_name=layer.name.upper()+'_OUTPUT_RSHIFT'))
 | 
						||
            elif (type(layer) in [ZeroPadding2D, ZeroPadding1D]):
 | 
						||
                fp.write(gen_zero_padding_config(layer))
 | 
						||
            elif (type(layer) in [Cropping2D, Cropping1D]):
 | 
						||
                fp.write(gen_cropping_config(layer))
 | 
						||
            elif (type(layer) in [Softmax]):
 | 
						||
                fp.write(gen_softmax_config(layer))
 | 
						||
            elif (type(layer) in [Flatten]):
 | 
						||
                fp.write(gen_flatten_config(layer))
 | 
						||
            elif (type(layer) in [Reshape]):
 | 
						||
                fp.write(gen_reshape_config(layer))
 | 
						||
            elif (type(layer) in [Concatenate]):
 | 
						||
                fp.write(gen_concat_config(layer))
 | 
						||
            elif (type(layer) in [Lambda]):
 | 
						||
                fp.write(gen_lambda_config(layer))
 | 
						||
            elif (type(layer) in [UpSampling2D, UpSampling1D]):
 | 
						||
                fp.write(gen_upsampling_config(layer))
 | 
						||
            elif(is_rnn_layer(layer)):
 | 
						||
                if(type(layer.cell) is SimpleRNNCell):
 | 
						||
                    for w in layer.weights:
 | 
						||
                        gen_weight_tensor(w, per_axis=False)
 | 
						||
                    fp.write(gen_simple_cell_config(layer, layer_q_list['intermediate_'+layer.name]))
 | 
						||
                elif(type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
 | 
						||
                    for w in layer.weights:
 | 
						||
                        gen_weight_tensor(w, per_axis=False)
 | 
						||
                    fp.write(gen_gru_cell_config(layer, layer_q_list['intermediate_'+layer.name]))
 | 
						||
                elif(type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
 | 
						||
                    for w in layer.weights:
 | 
						||
                        gen_weight_tensor(w, per_axis=False)
 | 
						||
                    fp.write(gen_lstm_cell_config(layer, layer_q_list['intermediate_'+layer.name]))
 | 
						||
                fp.write(gen_rnn_config(layer))
 | 
						||
 | 
						||
            # test, multiple output layer
 | 
						||
            if(len(layer.outbound_nodes) == 0):
 | 
						||
                size=1
 | 
						||
                for s in layer.output.shape[1:]:
 | 
						||
                    size *= s if s is not None else 1
 | 
						||
                if(output_num == 0): # the first output or the only output
 | 
						||
                    fp.write(gen_values('nnom_output_data', '{0}', size=str(size), dtype='static int8_t'))
 | 
						||
                    fp.write(gen_output_config(layer, dec_bits=layer.name.upper() + '_OUTPUT_DEC', output_num=output_num, value_name='nnom_output_data'))
 | 
						||
                    output_num += 1
 | 
						||
                else:
 | 
						||
                    output_value_names = 'nnom_output_data'+str(output_num)
 | 
						||
                    fp.write(gen_values(output_value_names, '{0}', size=str(size), dtype='static int8_t'))
 | 
						||
                    fp.write(gen_output_config(layer, dec_bits=layer.name.upper() + '_OUTPUT_DEC', output_num=output_num, value_name=output_value_names))
 | 
						||
                    output_num += 1
 | 
						||
 | 
						||
            # # last layer, attach the additional nnom output layer
 | 
						||
            # if(id == len(L)-1):
 | 
						||
            #     size=1
 | 
						||
            #     for s in layer.output.shape[1:]:
 | 
						||
            #         size *= s if s is not None else 1
 | 
						||
            #     fp.write(gen_values('nnom_output_data', '{0}', size=str(size), dtype='static int8_t'))
 | 
						||
            #     fp.write(gen_output_config(layer,  dec_bits=layer.name.upper()+'_OUTPUT_DEC', value_name='nnom_output_data'))
 | 
						||
 | 
						||
        # write version
 | 
						||
        fp.write('/* model version */\n')
 | 
						||
        fp.write('#define NNOM_MODEL_VERSION (10000*{0} + 100*{1} + {2})\n'.format(model_major_version, model_sub_version, model_reversion ))
 | 
						||
 | 
						||
        # model
 | 
						||
        fp.write('\n/* nnom model */\n')
 | 
						||
        fp.write('static nnom_model_t* nnom_model_create(void)\n{\n')
 | 
						||
        fp.write('\tstatic nnom_model_t model;\n')
 | 
						||
        if (ID > 32):
 | 
						||
            fp.write('\tnnom_layer_t **layer = (nnom_layer_t**)malloc(sizeof(nnom_layer_t *)*%d);\n' % (ID + 1))
 | 
						||
            fp.write('\tif(NULL == layer) return NULL;\n')
 | 
						||
        else:
 | 
						||
            fp.write('\tnnom_layer_t* layer[%d];\n' % (ID + 1))
 | 
						||
        fp.write('\n\tcheck_model_version(NNOM_MODEL_VERSION);')
 | 
						||
        fp.write('\n\tnew_model(&model);\n\n')
 | 
						||
 | 
						||
        # inverted order of output, very strange
 | 
						||
        output_num = (len(model.output) -1) if type(model.output) is list else 0
 | 
						||
        for layer in L:
 | 
						||
            if (is_skipable_layer(layer)):
 | 
						||
                continue
 | 
						||
            # FIXME: need a better solution to seperate the input 'tensor' from other layers
 | 
						||
            if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
 | 
						||
                id, _ = LI[layer.name.split(':')[0]]
 | 
						||
            else:
 | 
						||
                id, _ = LI[layer.name]
 | 
						||
 | 
						||
            if ('input' in layer.name):
 | 
						||
                fp.write('\tlayer[%d] = input_s(&%s_config);\n' % (id, layer.name))
 | 
						||
 | 
						||
            # convlutional
 | 
						||
            elif ('conv1d' in layer.name
 | 
						||
                  or 'conv2d' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                if('transpose' in layer.name):
 | 
						||
                    fp.write('\tlayer[{0}] = model.hook(conv2d_trans_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name,  LI[inp][0]))
 | 
						||
                elif('depthwise' in layer.name):
 | 
						||
                    fp.write('\tlayer[{0}] = model.hook(dw_conv2d_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
                else:
 | 
						||
                    fp.write('\tlayer[{0}] = model.hook(conv2d_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
            elif ('activation' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                cfg = layer.get_config()
 | 
						||
                if (cfg['activation'] == 'relu'):
 | 
						||
                    fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n' % (id, LI[inp][0]))
 | 
						||
                elif (cfg['activation'] == 'tanh'):
 | 
						||
                    fp.write('\tlayer[%s] = model.active(act_hard_tanh(%s_OUTPUT_DEC), layer[%s]);\n' % (
 | 
						||
                    id, inp.upper(), LI[inp][0]))
 | 
						||
                elif (cfg['activation'] == 'sigmoid'):
 | 
						||
                    fp.write('\tlayer[%s] = model.active(act_sigmoid(%s_OUTPUT_DEC), layer[%s]);\n' % (
 | 
						||
                    id, inp.upper(), LI[inp][0]))
 | 
						||
                elif (cfg['activation'] == 'hard_sigmoid'):
 | 
						||
                    fp.write('\tlayer[%s] = model.active(act_hard_sigmoid(%s_OUTPUT_DEC), layer[%s]);\n' % (
 | 
						||
                    id, inp.upper(), LI[inp][0]))
 | 
						||
                elif (cfg['activation'] == 'softmax'):
 | 
						||
                    fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n' % (id, LI[inp][0]))
 | 
						||
            elif ('leaky_re_lu' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                cfg = layer.get_config()
 | 
						||
                fp.write('\tlayer[%s] = model.active(act_leaky_relu(%ff), layer[%s]);\n' % (id, cfg["alpha"],LI[inp][0]))
 | 
						||
            elif ('re_lu' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                cfg = layer.get_config()
 | 
						||
                if(cfg['max_value'] is None and cfg['negative_slope'] == 0 and cfg['threshold'] == 0):
 | 
						||
                    fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n' % (id, LI[inp][0]))
 | 
						||
                else:
 | 
						||
                    if(cfg['max_value'] is None):
 | 
						||
                        max_v = 'INFINITY '
 | 
						||
                    else:
 | 
						||
                        max_v = str(cfg['max_value'])
 | 
						||
                    fp.write('\tlayer[%s] = model.active(act_adv_relu(%f,%s,%f), layer[%s]);\n'
 | 
						||
                             % (id, cfg['negative_slope'], max_v, cfg['threshold'], LI[inp][0]))
 | 
						||
            # pooling
 | 
						||
            elif ('max_pooling' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                if ('global' in layer.name):
 | 
						||
                    fp.write('\tlayer[{0}] = model.hook(global_maxpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
                else:
 | 
						||
                    fp.write('\tlayer[{0}] = model.hook(maxpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
            elif ('average_pooling' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                if ('global' in layer.name):
 | 
						||
                    fp.write('\tlayer[{0}] = model.hook(global_avgpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
                else:
 | 
						||
                    fp.write('\tlayer[{0}] = model.hook(avgpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
            elif ('up_sampling' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                fp.write('\tlayer[{0}] = model.hook(upsample_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
            # zero padding
 | 
						||
            elif ('zero_padding' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                fp.write('\tlayer[{0}] = model.hook(zeropadding_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
            # Cropping
 | 
						||
            elif ('cropping' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                fp.write('\tlayer[{0}] = model.hook(cropping_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
 | 
						||
            # others
 | 
						||
            elif ('flatten' in layer.name):  # flatten is needed in CHW backend but not needed in HWC
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                fp.write('\tlayer[{0}] = model.hook(flatten_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
            elif ('reshape' in layer.name):  # flatten is needed in CHW backend but not needed in HWC
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                fp.write('\tlayer[{0}] = model.hook(reshape_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
            elif ('concatenate' in layer.name):
 | 
						||
                inps = [layer_name_from_tensor(input) for input in layer.input]
 | 
						||
                inX = ''
 | 
						||
                for inp in inps:
 | 
						||
                    inX += ' ,layer[%d]' % (LI[inp][0])
 | 
						||
                fp.write('\tlayer[%s] = model.mergex(concat_s(&%s_config), %s%s);\n' % (
 | 
						||
                    id, layer.name, len(inps), inX))
 | 
						||
            elif ('add' in layer.name):
 | 
						||
                inps = [layer_name_from_tensor(input) for input in layer.input]
 | 
						||
                inX = ''
 | 
						||
                for inp in inps:
 | 
						||
                    inX += ' ,layer[%d]' % (LI[inp][0])
 | 
						||
                fp.write('\tlayer[%s] = model.mergex(add_s(&%s_config), %s%s);\n' % (
 | 
						||
                    id, layer.name, len(inps), inX))
 | 
						||
            elif ('subtract' in layer.name):
 | 
						||
                inps = [layer_name_from_tensor(input) for input in layer.input]
 | 
						||
                inX = ''
 | 
						||
                for inp in inps:
 | 
						||
                    inX += ' ,layer[%d]' % (LI[inp][0])
 | 
						||
                fp.write('\tlayer[%s] = model.mergex(sub_s(&%s_config), %s%s);\n' % (
 | 
						||
                    id, layer.name, len(inps), inX))
 | 
						||
            elif ('multiply' in layer.name):
 | 
						||
                inps = [layer_name_from_tensor(input) for input in layer.input]
 | 
						||
                inX = ''
 | 
						||
                for inp in inps:
 | 
						||
                    inX += ' ,layer[%d]' % (LI[inp][0])
 | 
						||
                fp.write('\tlayer[%s] = model.mergex(mult_s(&%s_config), %s%s);\n' % (
 | 
						||
                    id, layer.name, len(inps), inX))
 | 
						||
            elif ('dense' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                fp.write('\tlayer[{0}] = model.hook(dense_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
            elif ('softmax' in layer.name):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                fp.write('\tlayer[{0}] = model.hook(softmax_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
 | 
						||
 | 
						||
            elif (is_rnn_layer(layer)):
 | 
						||
                inp = layer_name_from_tensor(layer.input)
 | 
						||
                line = '\tlayer[{0}] = model.hook(rnn_s(<rnn_cell>, &{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0])
 | 
						||
                if (type(layer.cell) is SimpleRNNCell):
 | 
						||
                    line = line.replace('<rnn_cell>', 'simple_cell_s(&%s_simple_cell_config)' %(layer.name))
 | 
						||
                elif (type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
 | 
						||
                    line = line.replace('<rnn_cell>', 'gru_cell_s(&%s_gru_cell_config)' % (layer.name))
 | 
						||
                elif (type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
 | 
						||
                    line = line.replace('<rnn_cell>', 'lstm_cell_s(&%s_lstm_cell_config)' % (layer.name))
 | 
						||
                fp.write(line)
 | 
						||
            else:
 | 
						||
                raise Exception('unsupported layer', layer.name, layer)
 | 
						||
 | 
						||
            # test, multiple output layer (not yet working with multiple outputs)
 | 
						||
            if(len(layer.outbound_nodes) == 0):
 | 
						||
                fp.write('\tlayer[{0}] = model.hook(output_s(&{1}_config), layer[{2}]);\n'.format(id + 1, 'output'+str(output_num), LI[inp][0] + 1))
 | 
						||
                output_num -=1 # the num is inverted in keras, not a good solution yet.
 | 
						||
 | 
						||
            """
 | 
						||
            # temporary fixed for activations attached into layers in construction
 | 
						||
            def is_activation_attached(layer):
 | 
						||
                if(("Softmax" in layer.output.name and "softmax" not in layer.name)or
 | 
						||
                ("Relu" in layer.output.name and "re_lu" not in layer.name) or
 | 
						||
                ("Sigmoid" in layer.output.name and "sigmoid" not in layer.name) or
 | 
						||
                ("Tanh" in layer.output.name and "tanh" not in layer.name)):
 | 
						||
                    return True
 | 
						||
                return False
 | 
						||
            if "input" not in layer.name and is_activation_attached(layer):
 | 
						||
                inp = layer.output.name.replace(':', '/').split('/')[0]
 | 
						||
                cfg = layer.get_config()
 | 
						||
                if(cfg['activation'] == 'relu'):
 | 
						||
                    fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n'%(id, LI[inp][0]))
 | 
						||
                if(cfg['activation'] == 'tanh'):
 | 
						||
                    fp.write('\tlayer[%s] = model.active(act_tanh(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
 | 
						||
                if(cfg['activation'] == 'sigmoid'):
 | 
						||
                    fp.write('\tlayer[%s] = model.active(act_sigmoid(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
 | 
						||
                elif(cfg['activation'] == 'softmax'):
 | 
						||
                    fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n'%(id, LI[inp][0]))
 | 
						||
            """
 | 
						||
        # generate final output layer
 | 
						||
        #fp.write('\tlayer[{0}] = model.hook(output_s(&{1}_config), layer[{2}]);\n'.format(id+1, 'output', LI[inp][0]+1))
 | 
						||
        fp.write('\tmodel_compile(&model, layer[0], layer[%s]);\n' % (id + 1))
 | 
						||
        if (ID > 32):
 | 
						||
            fp.write('\tfree(layer);\n')
 | 
						||
        fp.write('\treturn &model;\n}\n')
 | 
						||
    with open('.layer_q_list', 'w') as fp:
 | 
						||
        fp.write(str(layer_q_list))
 | 
						||
 | 
						||
def evaluate_model(model, x_test, y_test, running_time=False, to_file='evaluation.txt'):
 | 
						||
    # Score trained model.
 | 
						||
    scores = model.evaluate(x_test, y_test, verbose=2)
 | 
						||
    print('Test loss:', scores[0])
 | 
						||
    print('Top 1:', scores[1])
 | 
						||
 | 
						||
    if(len(y_test.shape)>1):
 | 
						||
        bs = model.input.shape[0]
 | 
						||
        predictions = model.predict(x_test, batch_size=bs)
 | 
						||
        matrix = skmetrics.confusion_matrix(y_test.argmax(axis=1), predictions.argmax(axis=1))
 | 
						||
        print(matrix)
 | 
						||
 | 
						||
    run_time = 0
 | 
						||
    if running_time:
 | 
						||
        # try to calculate the time
 | 
						||
        T = time.time()
 | 
						||
        bs = model.input.shape[0]
 | 
						||
        for i in range(10):
 | 
						||
            model.predict(x_test, batch_size=bs)
 | 
						||
        T = time.time() - T
 | 
						||
        run_time = round((T / 10 / x_test.shape[0] * 1000 * 1000), 2)
 | 
						||
        print("Runing time:",run_time , "us" )
 | 
						||
    #
 | 
						||
    with open(to_file, 'w') as f:
 | 
						||
        f.write("Runing time: "+ str(run_time) + "us" + "\n")
 | 
						||
        f.write('Test loss:'+ str(scores[0]) + "\n")
 | 
						||
        f.write('Top 1:'+ str(scores[1])+ "\n")
 | 
						||
        if (len(y_test.shape) > 1):
 | 
						||
            for row in matrix:
 | 
						||
                row.tofile(f, sep=',')
 | 
						||
                f.write("\n")
 | 
						||
    return scores
 | 
						||
 | 
						||
def f2q(d, Q):
 | 
						||
    '''To convert a number from floating point to Qm.n format:
 | 
						||
        1. Multiply the floating point number by 2n
 | 
						||
        2. Round to the nearest integer
 | 
						||
    '''
 | 
						||
    return np.round(d*2**Q)
 | 
						||
 | 
						||
 | 
						||
def q2f(d, Q):
 | 
						||
    '''To convert a number from Qm.n format to floating point:
 | 
						||
        1. Convert the number to floating point as if it were an integer, in other words remove the binary point
 | 
						||
        2. Multiply by 2-n
 | 
						||
    '''
 | 
						||
    return d*2**-Q
 | 
						||
 | 
						||
def show_weights(w, name):
 | 
						||
    sz = 1
 | 
						||
    for s in w.shape:
 | 
						||
        sz = sz*s
 | 
						||
    aL = w.reshape(sz,)
 | 
						||
    MIN,MAX=min(aL),max(aL)
 | 
						||
    Q = int(np.ceil(np.log2(max(abs(MIN),abs(MAX)))))
 | 
						||
    Q = 7-Q
 | 
						||
    qL = f2q(aL,Q)
 | 
						||
    qL = q2f(qL,Q)
 | 
						||
    plt.figure(figsize=(18, 3))
 | 
						||
    plt.subplot(131)
 | 
						||
    plt.title(name)
 | 
						||
    plt.plot(aL)
 | 
						||
    plt.grid()
 | 
						||
    aL.sort()
 | 
						||
    plt.plot(aL,'r')
 | 
						||
    plt.grid()
 | 
						||
    plt.subplot(132)
 | 
						||
    plt.title('Q%s'%(Q))
 | 
						||
    qL.sort()
 | 
						||
    plt.plot(aL,'r')
 | 
						||
    plt.plot(qL,'g')
 | 
						||
    plt.grid()
 | 
						||
    plt.subplot(133)
 | 
						||
    plt.hist(aL,100)
 | 
						||
    plt.title('hist')
 | 
						||
    plt.grid()
 | 
						||
    plt.show()
 | 
						||
 | 
						||
def compare(a,b,name):
 | 
						||
    sz = 1
 | 
						||
    for s in a.shape:
 | 
						||
        sz = sz*s
 | 
						||
    aL = a.reshape(sz,)
 | 
						||
    bL = b.reshape(sz,)
 | 
						||
    assert(len(aL) == len(bL))
 | 
						||
    Z = list(zip(aL,bL))
 | 
						||
    Z.sort(key=lambda x: x[0])
 | 
						||
    aL1,bL1=zip(*Z)
 | 
						||
    plt.figure(figsize=(18, 3))
 | 
						||
    plt.subplot(131)
 | 
						||
    plt.plot(aL)
 | 
						||
    plt.plot(aL1,'r')
 | 
						||
    plt.grid()
 | 
						||
    plt.title('tf-%s'%(name))
 | 
						||
    plt.subplot(133)
 | 
						||
    plt.plot(bL1,'g')
 | 
						||
    plt.plot(aL1,'r')
 | 
						||
    plt.grid()
 | 
						||
    plt.title('compare')
 | 
						||
    plt.subplot(132)
 | 
						||
    bL1=list(bL1)
 | 
						||
    bL1.sort()
 | 
						||
    plt.plot(bL)
 | 
						||
    plt.plot(bL1,'g')
 | 
						||
    plt.grid()
 | 
						||
    plt.title('nn-%s'%(name))
 | 
						||
    plt.show()
 | 
						||
 |