forked from xuos/xiuos
1199 lines
57 KiB
Python
1199 lines
57 KiB
Python
'''
|
||
Copyright (c) 2018-2020
|
||
Jianjia Ma
|
||
majianjia@live.com
|
||
|
||
SPDX-License-Identifier: Apache-2.0
|
||
|
||
Change Logs:
|
||
Date Author Notes
|
||
2019-02-05 Jianjia Ma The first version
|
||
'''
|
||
|
||
import sklearn.metrics as skmetrics
|
||
import matplotlib.pyplot as plt
|
||
import tensorflow as tf
|
||
import tensorflow.keras.backend as K
|
||
from tensorflow.keras import *
|
||
from tensorflow.keras.layers import *
|
||
from fully_connected_opt_weight_generation import *
|
||
from gen_config import *
|
||
import scipy.stats
|
||
import time
|
||
import warnings
|
||
|
||
model_major_version = 0
|
||
model_sub_version = 4
|
||
model_reversion = 3
|
||
|
||
#define NNOM_MAJORVERSION 0L /**< major version number */
|
||
#define NNOM_SUBVERSION 4L /**< minor version number */
|
||
#define NNOM_REVISION 3L /**< revise version number */
|
||
#define NNOM_VERSION (NNOM_MAJORVERSION * 10000) + (NNOM_SUBVERSION * 100) + NNOM_REVISION)
|
||
|
||
def fuse_bn_to_conv(layer):
|
||
# try to fuse BN layer to convolutional
|
||
if ('conv' in layer.name) and \
|
||
('batch_normalization' in layer.outbound_nodes[0].outbound_layer.name):
|
||
print("fusing batch normalization to", layer.name)
|
||
bn_layer = layer._outbound_nodes[0].outbound_layer
|
||
c_w = layer.get_weights()[0]
|
||
c_b = layer.get_weights()[1]
|
||
print('original weight max', c_w.max(), 'min', c_w.min())
|
||
print('original bias max', c_b.max(), 'min', c_b.min())
|
||
bn_gamma = bn_layer.get_weights()[0]
|
||
bn_beta = bn_layer.get_weights()[1]
|
||
bn_mean = bn_layer.get_weights()[2]
|
||
bn_variance = bn_layer.get_weights()[3]
|
||
epsilon = 1e-3 # default epsilon for tf.slim.batch_norm
|
||
if ('conv2d' in layer.name):
|
||
if "depthwise" in layer.name: # depthwise batchnorm params are ordered differently
|
||
for l in range(c_w.shape[3]):
|
||
for k in range(c_w.shape[2]):
|
||
for j in range(c_w.shape[1]):
|
||
for i in range(c_w.shape[0]):
|
||
c_w[i][j][k][l] *= bn_gamma[k*c_w.shape[3]+l] / np.sqrt(bn_variance[k*c_w.shape[3]+l] + epsilon)
|
||
depth_dim = c_w.shape[2] * c_w.shape[3] # test needed
|
||
# normal conv
|
||
else:
|
||
for l in range(c_w.shape[3]):
|
||
for k in range(c_w.shape[2]):
|
||
for j in range(c_w.shape[1]):
|
||
for i in range(c_w.shape[0]):
|
||
c_w[i][j][k][l] *= bn_gamma[l] / np.sqrt(bn_variance[l] + epsilon)
|
||
depth_dim = c_w.shape[3]
|
||
for l in range(depth_dim):
|
||
c_b[l] = (bn_gamma[l] * (c_b[l] - bn_mean[l]) / np.sqrt(bn_variance[l] + epsilon)) + bn_beta[l]
|
||
# conv1d
|
||
else:
|
||
epsilon = 1e-3 # default epsilon for tf.slim.batch_norm
|
||
for k in range(c_w.shape[2]):
|
||
for j in range(c_w.shape[1]):
|
||
for i in range(c_w.shape[0]):
|
||
if "depthwise" in layer.name: # depthwise batchnorm params are ordered differently
|
||
c_w[i][j][k] *= bn_gamma[j] / np.sqrt(bn_variance[j] + epsilon)
|
||
else:
|
||
c_w[i][j][k] *= bn_gamma[k] / np.sqrt(bn_variance[k] + epsilon)
|
||
|
||
if "depthwise" in layer.name:
|
||
depth_dim = c_w.shape[1]*c_w.shape[2] # need to be tested
|
||
else:
|
||
depth_dim = c_w.shape[2]
|
||
for l in range(depth_dim):
|
||
c_b[l] = (bn_gamma[l] * (c_b[l] - bn_mean[l]) / np.sqrt(bn_variance[l] + epsilon)) + bn_beta[l]
|
||
|
||
print('fused weight max', c_w.max(), 'min', c_w.min())
|
||
print('fused bias max', c_b.max(), 'min', c_b.min())
|
||
# write the weights back to the layer
|
||
# after that, the model will be destroyed.. need a better way to pass the new weight
|
||
layer.set_weights([c_w, c_b])
|
||
|
||
def generate_test_bin(x, y, name='test_data_with_label.bin'):
|
||
'''
|
||
this method generate the
|
||
:param x: input x data size
|
||
:param y: input label (one hot label)
|
||
:return:
|
||
'''
|
||
# quantize input x
|
||
dec_bits = find_dec_bits_max_min(x, bit_width=8)
|
||
x = np.round(x*2**dec_bits).clip(-128, 127).astype(np.int8)
|
||
# get label
|
||
if(len(y.shape) >1):
|
||
test_label = np.argwhere(y == 1).astype(np.int8) # test data
|
||
test_label = test_label[:, 1]
|
||
else:
|
||
test_label = y
|
||
|
||
# get data
|
||
dat = x.astype(dtype="byte") # test data
|
||
batch_size = dat.shape[0] # total pices of data
|
||
dat = dat.flatten() # flatten to get the total size.
|
||
block_size = int(dat.size / batch_size) # this must be integer but... just to confirm
|
||
|
||
# write (label x 128) (data_block x 128)
|
||
label_batch = 128 # the Y-modem example uses 128 batch
|
||
with open(name, 'wb') as f:
|
||
start = 0
|
||
while start <= (test_label.size - label_batch):
|
||
test_label[start: start + label_batch].tofile(f)
|
||
dat[block_size * start: block_size * (start + label_batch)].tofile(f)
|
||
start += label_batch
|
||
|
||
# the rest data
|
||
if (start < test_label.size):
|
||
rest_len = test_label.size - start
|
||
new_labls = test_label[start:]
|
||
new_labls = np.pad(new_labls, (0, label_batch - rest_len), mode='constant')
|
||
new_labls.tofile(f)
|
||
dat[block_size * start:].tofile(f)
|
||
|
||
print("binary test file generated:", name)
|
||
print("test data length:", test_label.size)
|
||
return
|
||
|
||
def is_shift_layer(layer):
|
||
''' layer which can change the output encoding'''
|
||
#FIXME: add more which will change the output shift
|
||
if('input' in layer.name or
|
||
'conv2d' in layer.name or
|
||
'conv1d' in layer.name or
|
||
'dense' in layer.name or
|
||
'softmax' in layer.name or
|
||
'sigmoid' in layer.name or
|
||
'tanh' in layer.name or
|
||
('add' in layer.name and 'zero' not in layer.name) or # the name, zero_padding contains 'add'
|
||
'subtract' in layer.name or
|
||
'multiply' in layer.name or
|
||
('activation' in layer.name and layer.get_config()['activation'] == 'softmax')or
|
||
('activation' in layer.name and layer.get_config()['activation'] == 'hard_sigmoid') or
|
||
('activation' in layer.name and layer.get_config()['activation'] == 'tanh') or
|
||
('activation' in layer.name and layer.get_config()['activation'] == 'hard_tanh') or
|
||
is_rnn_layer(layer)
|
||
):
|
||
return True
|
||
return False
|
||
|
||
def is_shift_fixed(layer):
|
||
''' layer which shift to a fixed value'''
|
||
#FIXME: add more which will change the output shift
|
||
if('softmax' in layer.name or
|
||
'sigmoid' in layer.name or
|
||
'tanh' in layer.name or
|
||
('activation' in layer.name and layer.get_config()['activation'] == 'softmax') or
|
||
('activation' in layer.name and layer.get_config()['activation'] == 'sigmoid') or
|
||
('activation' in layer.name and layer.get_config()['activation'] == 'hard_sigmoid') or
|
||
('activation' in layer.name and layer.get_config()['activation'] == 'tanh') or
|
||
('activation' in layer.name and layer.get_config()['activation'] == 'hard_tanh') or
|
||
is_rnn_layer(layer)
|
||
):
|
||
return True
|
||
return False
|
||
|
||
def is_lstm_layer(layer):
|
||
if type(layer) is LSTM or 'lstm' in layer.name:
|
||
return True
|
||
if(type(layer) is RNN or 'rnn' in layer.name):
|
||
if(type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
|
||
return True
|
||
return False
|
||
|
||
def is_gru_layer(layer):
|
||
if type(layer) is GRU or 'gru' in layer.name:
|
||
return True
|
||
if(type(layer) is RNN or 'rnn' in layer.name):
|
||
if(type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
|
||
return True
|
||
return False
|
||
|
||
def is_rnn_layer(layer):
|
||
if( 'rnn' in layer.name or
|
||
is_lstm_layer(layer) or
|
||
is_gru_layer(layer)
|
||
):
|
||
return True
|
||
return False
|
||
|
||
def find_offset(data):
|
||
"""
|
||
Offset of the original data before quantisation
|
||
:param data:
|
||
:return: offset of the data block
|
||
"""
|
||
return np.average(data)
|
||
|
||
|
||
def find_dec_bits_max_min(data, bit_width=8, maximum_bit=32):
|
||
"""
|
||
A ragular non-saturated shift-based quantisation mathod. Using max/min values
|
||
:param data:
|
||
:param bit_width:
|
||
:param maximum_bit: maximum decimal bit. Incase sometime bias is too small lead to very large size dec bit
|
||
:return:
|
||
"""
|
||
max_val = abs(data.max()) - abs(data.max()/pow(2, bit_width)) # allow very small saturation.
|
||
min_val = abs(data.min()) - abs(data.min()/pow(2, bit_width))
|
||
int_bits = int(np.ceil(np.log2(max(max_val, min_val))))
|
||
dec_bits = (bit_width-1) - int_bits
|
||
return min(dec_bits, maximum_bit)
|
||
|
||
def find_dec_bits_max_min_axis(data, axis=-1,bit_width=8, maximum_bit=32):
|
||
"""
|
||
A ragular non-saturated shift-based quantisation mathod. Using max/min values
|
||
:param data:
|
||
:param axis:
|
||
:param bit_width:
|
||
:return:
|
||
"""
|
||
dec_bits = []
|
||
# if(len(data.shape) < np.abs(axis)): # for depthwise with axis = -2 while len(shape) =1
|
||
# size = data.shape[0]
|
||
# axis = 0 #
|
||
# else:
|
||
# size = data.shape[axis]
|
||
for i in np.arange(0, data.shape[axis]):
|
||
d = np.take(data, indices=i, axis=axis)
|
||
max_val = abs(d.max()) - abs(d.max() / pow(2, bit_width)) # allow very small saturation.
|
||
min_val = abs(d.min()) - abs(d.min() / pow(2, bit_width))
|
||
int_bit = int(np.ceil(np.log2(max(abs(max_val), abs(min_val)))))
|
||
dec_bit = (bit_width-1) - int_bit
|
||
dec_bits.append(min(dec_bit, maximum_bit))
|
||
return dec_bits
|
||
|
||
def find_dec_bits_kld(data, bit_width=8, scan_times=4, maximum_bit=16):
|
||
"""
|
||
# saturation shift, using KLD method (Kullback-Leibler divergence)
|
||
# Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
|
||
:param data: The data for looking for quantisation
|
||
:param bit_width: the bitwidth of the data
|
||
:param scan_times: the times to try the best kld (normally the second is the best.)
|
||
:return: dec bit width for this data
|
||
"""
|
||
# do a regular non-saturated quantisation
|
||
max_val = data.max()
|
||
min_val = data.min()
|
||
abs_max = max(abs(max_val), abs(min_val))
|
||
int_bits = int(np.ceil(np.log2(max(abs(max_val), abs(min_val)))))
|
||
dec_bits = (bit_width-1) - int_bits
|
||
|
||
# now looking for the best quantisation using KLD method
|
||
small_var = 1e-5
|
||
bins = np.arange(-abs_max, abs_max, abs_max / 2048 * 2)
|
||
q_bins = np.arange(-abs_max, abs_max, abs_max / 256 * 2)
|
||
flat_hist = np.histogram(data.flatten(), bins=bins)[0]
|
||
kl_loss = []
|
||
kl_shifts = []
|
||
for shift in range(scan_times):
|
||
t = 2 ** (dec_bits + shift) # 2-based threshold
|
||
act = np.round(data.flatten() * t)
|
||
act = act / t
|
||
act = np.clip(act, -128 / t, 127 / t)
|
||
act = np.histogram(act, bins=q_bins)[0]
|
||
act_hist = np.zeros(2047)
|
||
chunk = int(2048 / 256)
|
||
for i in range(int(255)):
|
||
none_zero = np.count_nonzero(flat_hist[i * chunk:(i + 1) * chunk])
|
||
if none_zero == 0:
|
||
continue
|
||
for j in range(chunk):
|
||
act_hist[i * chunk + j] = act[i] / none_zero if flat_hist[i * chunk + j] != 0 else 0
|
||
flat_hist[flat_hist == 0] = small_var
|
||
act_hist[act_hist == 0] = small_var
|
||
kl = scipy.stats.entropy(flat_hist, act_hist)
|
||
kl_loss.append(kl)
|
||
kl_shifts.append(dec_bits + shift)
|
||
|
||
# now get the least loss from the scaned kld shift
|
||
dec_bits = kl_shifts[np.argmin(kl_loss)] # set the dec_bit to the KLD results
|
||
return min(dec_bits, maximum_bit)
|
||
|
||
# convert to [-128,128) or int8
|
||
def quantize_data(data, dec_bits, axis=-1, per_axis=False, bitwith=8):
|
||
if (per_axis):
|
||
out = []
|
||
for i in np.arange(0, data.shape[axis]):
|
||
d = np.take(data, indices=i, axis=axis)
|
||
d = np.round(d * 2 ** dec_bits[i])
|
||
d = np.clip(d, -2**(bitwith-1), 2**(bitwith-1)-1)
|
||
d = np.expand_dims(d, axis=axis)
|
||
out.append(d)
|
||
out = np.concatenate(out, axis=axis)
|
||
return out
|
||
else:
|
||
return np.clip(np.round(data * 2 ** dec_bits), -2**(bitwith-1), 2**(bitwith-1) -1)
|
||
|
||
def quantize_rnn_intermediate_output(layer, features):
|
||
def nnom_sigmoid(data):
|
||
return 1 / (1 + np.exp(-data))
|
||
def nnom_tanh(data):
|
||
return np.tanh(data)
|
||
def split_array(d, num):
|
||
l = len(d)
|
||
if(num==4):
|
||
return d[:int(l/4)], d[int(l/4): int(l/2)], d[int(l/2):-int(l/4)], d[-int(l/4):]
|
||
elif(num==3):
|
||
return d[:int(l/3)], d[int(l/3): -int(l/3)], d[-int(l/3):]
|
||
lcfg = layer.get_config()
|
||
if(lcfg['go_backwards']):
|
||
features = features[:,::-1,:] # reverse timestamp
|
||
|
||
if(type(layer.cell) is SimpleRNNCell):
|
||
cfg = layer.cell.get_config()
|
||
state = np.zeros(cfg['units'])
|
||
kernel = layer.get_weights()[0]
|
||
recurrent_kernel = layer.get_weights()[1]
|
||
bias = layer.get_weights()[2]
|
||
# replicate keras's implementation
|
||
def simple_cell_step(inputs, state, kernel, recurrent_kernel, bias, activation):
|
||
h = np.dot(inputs, kernel)
|
||
h = np.add(h, bias)
|
||
h2 = np.dot(state, recurrent_kernel)
|
||
output = h + h2
|
||
output = activation(output)
|
||
return output, h, h2
|
||
output_arrary = []
|
||
h_array = []
|
||
h2_array = []
|
||
activation = nnom_tanh if cfg['activation'] is 'tanh' else nnom_sigmoid
|
||
state = np.zeros(cfg['units'])
|
||
for feature in features:
|
||
if(not layer.stateful):
|
||
state = np.zeros(cfg['units'])
|
||
for fe in feature:
|
||
output, h, h2 = simple_cell_step(fe, state, kernel, recurrent_kernel, bias, activation)
|
||
state = output
|
||
output_arrary.append(output)
|
||
h_array.append(h)
|
||
h2_array.append(h2)
|
||
output_arrary = np.array(output_arrary)
|
||
h_array = np.array(h_array)
|
||
h2_array = np.array(h2_array)
|
||
# qout = find_dec_bits_kld(output_arrary)
|
||
# qh = find_dec_bits_kld(h_array)
|
||
# qh2 = find_dec_bits_kld(h2_array)
|
||
qout = find_dec_bits_max_min(output_arrary)
|
||
qh = find_dec_bits_max_min(h_array)
|
||
qh2 = find_dec_bits_max_min(h2_array)
|
||
return [qout, qh, qh2]
|
||
|
||
elif (type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
|
||
cfg = layer.cell.get_config()
|
||
state = np.zeros(cfg['units']*2)
|
||
kernel = layer.get_weights()[0]
|
||
recurrent_kernel = layer.get_weights()[1]
|
||
bias = layer.get_weights()[2]
|
||
def lstm_cell_step(cell_inputs, cell_states, kernel, recurrent_kernel, bias):
|
||
h_tm1 = cell_states[0] # previous memory state
|
||
c_tm1 = cell_states[1] # previous carry state
|
||
z1 = np.dot(cell_inputs, kernel)
|
||
z1 = np.add(z1, bias)
|
||
z2 = np.dot(h_tm1, recurrent_kernel)
|
||
z = z1+z2 # -----> q_z
|
||
z0, z1, z2, z3 = split_array(z, 4)
|
||
i = nnom_sigmoid(z0) # q0.7
|
||
f = nnom_sigmoid(z1) # q0.7
|
||
c1 = f*c_tm1
|
||
c2 = i*nnom_tanh(z2) # q0.7
|
||
c = c1 + c2 # -----> q_c
|
||
o = nnom_sigmoid(z3) # q0.7
|
||
tc = nnom_tanh(c)
|
||
h = o * tc # q0.7
|
||
return h, [h, c], z ,z0, z1, z2, z3
|
||
h_array = []
|
||
c_array = []
|
||
z_array = []
|
||
z0_array = []
|
||
z1_array = []
|
||
z2_array = []
|
||
z3_array = []
|
||
state = [np.zeros(cfg['units']), np.zeros(cfg['units'])]
|
||
for feature in features:
|
||
if(not layer.stateful):
|
||
state = [np.zeros(cfg['units']), np.zeros(cfg['units']) ]
|
||
for fe in feature:
|
||
output, state, z, z0, z1, z2, z3 = lstm_cell_step(fe, state, kernel, recurrent_kernel, bias)
|
||
h_array.append(output)
|
||
c_array.append(state[1])
|
||
z_array.append(z)
|
||
z0_array.append(z0)
|
||
z1_array.append(z1)
|
||
z2_array.append(z2)
|
||
z3_array.append(z3)
|
||
h_array = np.array(h_array)
|
||
c_array = np.array(c_array)
|
||
z_array = np.array(z_array)
|
||
z0_array = np.array(z0_array)
|
||
z1_array = np.array(z1_array)
|
||
z2_array = np.array(z2_array)
|
||
z3_array = np.array(z3_array)
|
||
# q_h = find_dec_bits_kld(h_array)
|
||
# q_c = find_dec_bits_kld(c_array)
|
||
# q_z = find_dec_bits_kld(z_array)
|
||
# q_z0 = find_dec_bits_kld(z0_array)
|
||
# q_z1 = find_dec_bits_kld(z1_array)
|
||
# q_z2 = find_dec_bits_kld(z2_array)
|
||
# q_z3 = find_dec_bits_kld(z3_array)
|
||
q_h = find_dec_bits_max_min(h_array)
|
||
q_c = find_dec_bits_max_min(c_array)
|
||
q_z = find_dec_bits_max_min(z_array)
|
||
q_z0 = find_dec_bits_max_min(z0_array) # not needed.
|
||
q_z1 = find_dec_bits_max_min(z1_array)
|
||
q_z2 = find_dec_bits_max_min(z2_array)
|
||
q_z3 = find_dec_bits_max_min(z3_array)
|
||
return [q_h, q_c, q_z]
|
||
|
||
elif (type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
|
||
cfg = layer.cell.get_config()
|
||
state = np.zeros(cfg['units'])
|
||
k = layer.get_weights()[0]
|
||
rk = layer.get_weights()[1]
|
||
bias = layer.get_weights()[2]
|
||
|
||
def gru_cell_step(cell_inputs, cell_states, kernel, recurrent_kernel, input_bias, recurrent_bias):
|
||
h_tm1 = cell_states[0]
|
||
# inputs projected by all gate matrices at once
|
||
matrix_x = np.dot(cell_inputs, kernel) + input_bias
|
||
x_z, x_r, x_h = split_array(matrix_x, 3)
|
||
# hidden state projected by all gate matrices at once
|
||
matrix_inner = np.dot(h_tm1, recurrent_kernel) + recurrent_bias
|
||
recurrent_z, recurrent_r, recurrent_h = split_array(matrix_inner, 3)
|
||
z = nnom_sigmoid(x_z + recurrent_z)
|
||
r = nnom_sigmoid(x_r + recurrent_r)
|
||
hh = nnom_tanh(x_h + r * recurrent_h)
|
||
# previous and candidate state mixed by update gate
|
||
# h = z * h_tm1 + (1 - z) * hh
|
||
h1 = z*h_tm1
|
||
h2 = 1-z
|
||
h3 = h2 * hh
|
||
h = h1 + h3
|
||
return h, [h], matrix_x, matrix_inner
|
||
h_array = []
|
||
z_array = []
|
||
i_array=[]
|
||
state = [np.zeros(cfg['units'])]
|
||
for feature in features:
|
||
if (not layer.stateful):
|
||
state = [np.zeros(cfg['units'])]
|
||
for fe in feature:
|
||
output, state, z, i = gru_cell_step(fe, state, k, rk, bias[0], bias[1])
|
||
h_array.append(output)
|
||
z_array.append(z)
|
||
i_array.append(i)
|
||
h_array = np.array(h_array)
|
||
i_array = np.array(i_array)
|
||
z_array = np.array(z_array)
|
||
# q_h = find_dec_bits_kld(h_array)
|
||
# q_i = find_dec_bits_kld(i_array)
|
||
# q_z = find_dec_bits_kld(z_array)
|
||
q_h = find_dec_bits_max_min(h_array)
|
||
q_i = find_dec_bits_max_min(i_array)
|
||
q_z = find_dec_bits_max_min(z_array)
|
||
q_z = min(q_i, q_z)
|
||
return [q_h, q_z]
|
||
return []
|
||
|
||
def quantize_output(model, x_test, quantize_method='max_min', layer_offset=False, calibrate_size=None):
|
||
# limit the test data size
|
||
if(calibrate_size is not None):
|
||
if (x_test.shape[0] > calibrate_size):
|
||
x_test = x_test[:calibrate_size]
|
||
# test, show the output ranges
|
||
layer_q_list = {}
|
||
# FIXME: only support one input
|
||
if (type(model.layers[0]) != InputLayer):
|
||
L = [model.input] + model.layers
|
||
else:
|
||
L = model.layers
|
||
|
||
for layer in L: # layer loop
|
||
if ("input" in layer.name):
|
||
features = x_test
|
||
else:
|
||
# rnn need a further step to determine the intermediate q format
|
||
if (is_rnn_layer(layer)):
|
||
in_layer = layer.inbound_nodes[0].inbound_layers
|
||
layer_model = Model(inputs=model.input, outputs=in_layer.output)
|
||
bs = model.input.shape[0]
|
||
features = layer_model.predict(x_test, batch_size=bs)
|
||
intermediate_dec = quantize_rnn_intermediate_output(layer, features)
|
||
print(layer.name, 'dec bit', intermediate_dec)
|
||
layer_q_list['intermediate_' + layer.name] = intermediate_dec
|
||
|
||
# batch_normalization will need to be handled differently, since we are fusing the weight to its previosu conv.
|
||
# sigmoid and tanh are different, their shift is fixed to 7
|
||
if (is_shift_layer(layer) or
|
||
('batch_normalization' in layer.name)):
|
||
layer_model = Model(inputs=model.input, outputs=layer.output)
|
||
bs = model.input.shape[0]
|
||
features = layer_model.predict(x_test, batch_size=bs)
|
||
else:
|
||
# leave the features not changed, so this layer shift will be the same as its inputs
|
||
pass
|
||
|
||
# we currently only support one offset for a layer output.
|
||
if(layer_offset):
|
||
offset = find_offset(features)
|
||
features = features - offset
|
||
else:
|
||
offset = 0
|
||
# saturated shift using KLD method OR non saturated shift using max-min
|
||
if ("kld" in quantize_method
|
||
and not is_shift_fixed(layer)
|
||
and "input" not in layer.name
|
||
and "dense" not in layer.name): # test, also do not use kld in input layer
|
||
dec_bits = find_dec_bits_kld(features, bit_width=8, scan_times=4)
|
||
print(layer.name,"Quantized method:", "KLD", "Values max:", np.max(features), "min:", np.min(features), "dec bit", dec_bits)
|
||
else:
|
||
dec_bits = find_dec_bits_max_min(features, bit_width=8)
|
||
print(layer.name,"Quantized method:","max-min"," Values max:", np.max(features), "min:", np.min(features), "dec bit", dec_bits)
|
||
# quantise offset
|
||
offset = int(np.round(offset * 2 ** dec_bits))
|
||
# record the shift
|
||
if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
|
||
layer_q_list[layer.name.split(':')[0]] = [dec_bits, offset]
|
||
else:
|
||
layer_q_list[layer.name] = [dec_bits, offset]
|
||
if ('batch_normalization' in layer.name):
|
||
layer_q_list[layer.inbound_nodes[0].inbound_layers.name] = [dec_bits, offset] # use the bn layer shift to update the last layer.
|
||
|
||
# scan the layers backward, try to unify the dec bit in multiple input layers, (add, mult... concat...etc.)
|
||
LM = {}
|
||
for layer in model.layers:
|
||
LM[layer.name] = layer
|
||
L = [l for l in model.layers[1:]]
|
||
L.reverse()
|
||
def update_previous_layer_shift(layer, dec_bit):
|
||
if(type(layer.input) == list):
|
||
for inp in layer.input:
|
||
iname = inp.name.split('/')[0]
|
||
if('input' in iname):
|
||
continue
|
||
layer_q_list[iname][0] = dec_min
|
||
if(not is_shift_layer(LM[iname])):
|
||
update_previous_layer_shift(LM[iname], dec_bit)
|
||
else:
|
||
iname = layer.input.name.split('/')[0]
|
||
if('input' in iname):
|
||
return
|
||
layer_q_list[iname][0] = dec_min
|
||
if(not is_shift_layer(LM[iname])):
|
||
update_previous_layer_shift(LM[iname], dec_bit)
|
||
for layer in L:
|
||
if(type(layer.input) == list):
|
||
iname = layer.input[0].name.split('/')[0].split(':')[0]
|
||
dec_min = layer_q_list[iname][0]
|
||
# find min dec bit in these input
|
||
for inp in layer.input:
|
||
iname = inp.name.split('/')[0].split(':')[0]
|
||
if(layer_q_list[iname][0] < dec_min):
|
||
dec_min = layer_q_list[iname][0]
|
||
if(layer_q_list[iname][0] != dec_min):
|
||
bFlag = True
|
||
for inp in layer.input:
|
||
iname = inp.name.split('/')[0].split(':')[0]
|
||
layer_q_list[iname][0] = dec_min
|
||
if(not is_shift_layer(LM[iname])):
|
||
update_previous_layer_shift(LM[iname], dec_min)
|
||
print('set dec bit', dec_min, 'for the input of', layer.name, ':', [inp.name.split('/')[0] for inp in layer.input])
|
||
if(not is_shift_layer(layer) or dec_min < layer_q_list[layer.name][0]): # update current layer's shift only when we cannot change the shift
|
||
layer_q_list[layer.name][0] = dec_min
|
||
# quantise offset
|
||
print("quantisation list", layer_q_list)
|
||
return layer_q_list
|
||
|
||
|
||
def layer_name_from_tensor(t):
|
||
return t.name.replace(':','/').split('/')[0]
|
||
|
||
|
||
def quantize_weights(model, name='weights.h', format='hwc', per_channel_quant=True, layer_q_list=None):
|
||
# Quantize weights to 8-bits using (min,max) and write to file
|
||
f = open(name, 'w')
|
||
f.write('#include "nnom.h"\n\n')
|
||
f.write('/* Weights, bias and Q format */\n')
|
||
f.close()
|
||
for curr_idx, layer in enumerate(model.layers):
|
||
if (not layer.weights):
|
||
continue
|
||
# before merging bn layer, check if the bn is "legally" after Conv
|
||
if('batch_normalization' in layer.name) and \
|
||
('conv' not in layer.inbound_nodes[0].inbound_layers.name):
|
||
raise Exception('Only support batch_normalization placed after conv', layer.name,
|
||
layer.inbound_nodes[0].inbound_layers.name)
|
||
# try to fuse BN layer to convolutional
|
||
if ('conv' in layer.name) and \
|
||
('batch_normalization' in layer.outbound_nodes[0].outbound_layer.name):
|
||
fuse_bn_to_conv(layer)
|
||
# generate weights and bias now
|
||
weight_dec_shift = 0
|
||
print('quantizing weights for layer', layer.name)
|
||
layer_weights = layer.get_weights()
|
||
for idx, var in enumerate(layer_weights):
|
||
var_name = convert_tensor_name(layer.weights[idx])
|
||
var_values = var
|
||
if("kernel" not in var_name and 'bias' not in var_name): # ignore batchnormalisation's parameters
|
||
continue
|
||
|
||
if (per_channel_quant and type(layer) in [Conv2D, Conv1D, DepthwiseConv2D, Conv2DTranspose]):
|
||
if(type(layer) in [DepthwiseConv2D] and "kernel" in var_name): #depthwise kernel quantised by
|
||
shape = var_values.shape[:2] + (-1,) # need to combine the mult and channel first
|
||
var = var_values.reshape(shape)
|
||
dec_bits = find_dec_bits_max_min_axis(var, axis=-1, bit_width=8)
|
||
elif(type(layer) in [Conv2DTranspose]):
|
||
dec_bits = find_dec_bits_max_min_axis(var_values, axis=-2, bit_width=8)
|
||
else:
|
||
dec_bits = find_dec_bits_max_min_axis(var_values, bit_width=8)
|
||
else:
|
||
dec_bits = find_dec_bits_max_min(var_values, bit_width=8)
|
||
print(' ', var_name, "dec bit", dec_bits)
|
||
|
||
# kernel dec, bias dec, bias shift, output shift
|
||
if(is_shift_layer(layer) and not is_rnn_layer(layer)):
|
||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||
layer_input_dec = layer_q_list[inp][0]
|
||
layer_output_dec = layer_q_list[layer.name][0]
|
||
if ("kernel" in var_name):
|
||
weight_dec_shift = dec_bits
|
||
else:
|
||
# channel wise
|
||
if hasattr(dec_bits, '__len__'):
|
||
bias_shift = np.full(len(dec_bits), layer_input_dec)+weight_dec_shift-dec_bits
|
||
layer_output_shift = np.full(len(weight_dec_shift), layer_input_dec) + weight_dec_shift \
|
||
- np.full(len(weight_dec_shift), layer_output_dec)
|
||
if (np.min(bias_shift) < 0):
|
||
for i, w_dec in enumerate(weight_dec_shift):
|
||
if (bias_shift[i] < 0):
|
||
dec_bits[i] = w_dec
|
||
bias_shift[i] = 0
|
||
# layer wise
|
||
else:
|
||
bias_shift = layer_input_dec + weight_dec_shift - dec_bits
|
||
layer_output_shift = layer_input_dec + weight_dec_shift - layer_output_dec
|
||
if (bias_shift < 0):
|
||
dec_bits = weight_dec_shift
|
||
bias_shift = 0
|
||
# RNN layer's kernel dec, bias dec, bias shift, output shift
|
||
if(is_rnn_layer(layer)):
|
||
inp = layer.input.name.replace(':','/').split('/')[0]
|
||
layer_input_dec = layer_q_list[inp][0]
|
||
layer_output_dec = layer_q_list[layer.name][0]
|
||
#if (type(layer.cell) is SimpleRNNCell):
|
||
if ("kernel" in var_name and 'recurrent' not in var_name):
|
||
weight_dec_shift = dec_bits
|
||
elif ('bias' in var_name):
|
||
bias_shift = layer_input_dec + weight_dec_shift - dec_bits
|
||
layer_output_shift = layer_input_dec + weight_dec_shift - layer_output_dec # this is not valid
|
||
if (bias_shift < 0):
|
||
dec_bits = weight_dec_shift
|
||
bias_shift = 0
|
||
|
||
# now quantise them
|
||
if(type(layer) in [Conv2D, Conv1D, DepthwiseConv2D, Conv2DTranspose]):
|
||
if(type(layer) in [DepthwiseConv2D] and "kernel" in var_name):
|
||
old_shape = var_values.shape
|
||
var_values = quantize_data(var_values.reshape(var_values.shape[:2] + (-1,)),
|
||
dec_bits, axis=-1, per_axis=per_channel_quant) # convert to [h, w, out x mult]
|
||
var_values = var_values.reshape(old_shape) # convert the shape back to [h, w, out, mult]
|
||
elif(type(layer) in [Conv2DTranspose] and "kernel" in var_name):
|
||
var_values = quantize_data(var_values, dec_bits, axis=-2, per_axis=per_channel_quant) # [h, w, out, in]
|
||
else:
|
||
var_values = quantize_data(var_values, dec_bits, per_axis=per_channel_quant) # [h, w, in, out]
|
||
else:
|
||
var_values = quantize_data(var_values, dec_bits, per_axis=False)
|
||
|
||
# CHW format
|
||
if ('chw' in format):
|
||
if (is_lstm_layer(layer) or is_gru_layer(layer)): # currently we use 16 bit intermediate, use reorder optimation
|
||
transposed_wts = np.transpose(var_values)
|
||
if('kernel' in var_name):
|
||
transposed_wts = convert_q7_q15_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
|
||
# dense and rnn still working under HWC format
|
||
elif ("dense" in var_name or is_rnn_layer(layer)) and "kernel" in var_name:
|
||
transposed_wts = np.transpose(var_values)
|
||
transposed_wts = convert_to_x4_q7_weights(np.reshape(transposed_wts, (transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
|
||
# all other kernels, bias stay the same
|
||
else:
|
||
transposed_wts = var_values
|
||
# HWC format (NNOM/CMSIS-NN use [out_ch, h, w, in_ch], in C order)
|
||
else:
|
||
if (len(var_values.shape) == 3): # 1D convolution layer weights
|
||
transposed_wts = np.transpose(var_values, (2, 0, 1))
|
||
elif (len(var_values.shape) == 4): # 2D convolution layer weights
|
||
if(type(layer) == Conv2DTranspose): # test
|
||
transposed_wts = np.transpose(var_values, (2, 0, 1, 3))
|
||
elif type(layer) == DepthwiseConv2D:
|
||
transposed_wts = var_values#np.transpose(var_values, (0, 1, 3, 2)) # [h, w, out, mult] test for multiplier
|
||
else:
|
||
transposed_wts = np.transpose(var_values, (3, 0, 1, 2))
|
||
elif(is_lstm_layer(layer) or is_gru_layer(layer)): # currently we use 16 bit intermediate, use reorder optimation
|
||
if('kernel' in var_name):
|
||
transposed_wts = np.transpose(var_values)
|
||
transposed_wts = convert_q7_q15_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
|
||
else: # bias will not need to be transposed (for GRU which has 2d bias)
|
||
transposed_wts = var_values
|
||
else: # fully connected layer weights or biases of any layer
|
||
# test, use opt weight reorder
|
||
transposed_wts = np.transpose(var_values)
|
||
if ("dense" in var_name or is_rnn_layer(layer)) and "kernel" in var_name: # and other RNN layers
|
||
transposed_wts = convert_to_x4_q7_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
|
||
|
||
with open(name, 'a') as f:
|
||
def write_weights(f, name, value):
|
||
f.write('#define ' + name + ' {')
|
||
value.tofile(f, sep=", ", format="%d")
|
||
f.write('}\n\n')
|
||
# weights or bias
|
||
write_weights(f, var_name.upper(), transposed_wts)
|
||
# dec bits
|
||
write_weights(f, var_name.upper()+'_DEC_BITS' , np.array(dec_bits))
|
||
# for test
|
||
if( "bias" in var_name):
|
||
f.write('#define ' + layer.name.upper() + '_BIAS_LSHIFT '+to_cstyle(bias_shift) +'\n\n')
|
||
#f.write('#define ' + layer.name.upper() + '_OUTPUT_DEC '+ to_cstyle(layer_output_dec)+'\n\n') # not here
|
||
f.write('#define ' + layer.name.upper() + '_OUTPUT_RSHIFT ' + to_cstyle(layer_output_shift)+'\n\n')
|
||
|
||
|
||
def generate_model(model, x_test, per_channel_quant=False, name='weights.h', format='hwc', quantize_method='max_min'):
|
||
"""
|
||
:param model:
|
||
:param x_test:
|
||
:param name:
|
||
:param format:
|
||
:param quantize_method: "max_min" or "kld"
|
||
:return:
|
||
"""
|
||
# get the quantize output range/format
|
||
layer_q_list = quantize_output(model, x_test, layer_offset=False, quantize_method=quantize_method)
|
||
# quantize weights and output shift
|
||
quantize_weights(model, per_channel_quant=per_channel_quant, name=name, format=format, layer_q_list=layer_q_list)
|
||
# now generate the model
|
||
if (type(model.layers[0]) != InputLayer):
|
||
L = [model.input] + model.layers
|
||
else:
|
||
L = model.layers
|
||
with open(name, 'a') as fp:
|
||
# generate the list of output
|
||
fp.write('\n/* output q format for each layer */\n')
|
||
for layer in L:
|
||
if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
|
||
iname = layer.name.split(':')[0]
|
||
else:
|
||
iname = layer.name
|
||
fp.write('#define %s_OUTPUT_DEC %s\n' % (iname.upper(), layer_q_list[iname][0]))
|
||
fp.write('#define %s_OUTPUT_OFFSET %s\n' % (iname.upper(), layer_q_list[iname][1]))
|
||
fp.write('\n/* bias shift and output shift for none-weighted layer */\n')
|
||
|
||
# generate output shift for the layers without weights (weighted layers were generated in quantize_weights)
|
||
for layer in model.layers:
|
||
if (is_shift_layer(layer)):
|
||
iname = layer.name.upper()
|
||
# add, sub
|
||
if ('add' in layer.name or 'subtract' in layer.name):
|
||
# only consider the first, they have been set to same in out_put_range()
|
||
inp = layer.input[0].name.replace(':', '/').split('/')[0].upper()
|
||
fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_DEC-{0}_OUTPUT_DEC)\n'.format(
|
||
iname, inp))
|
||
fp.write(
|
||
'#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(
|
||
iname))
|
||
# mult is different, Q3.4 * Q3.4 = Q6.8. if mult out is Q4.3, then shift (Q.4+q.4)-Q.3=5. Am I right?
|
||
elif ('multiply' in layer.name):
|
||
inp = layer.input[0].name.replace(':', '/').split('/')[0].upper()
|
||
fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_DEC*2-{0}_OUTPUT_DEC)\n'.format(
|
||
iname, inp))
|
||
fp.write(
|
||
'#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(
|
||
iname))
|
||
|
||
fp.write('\n/* tensors and configurations for each layer */\n')
|
||
LI = {}
|
||
ID = 0
|
||
|
||
def is_skipable_layer(layer):
|
||
# FIXME: add more that could be skiped
|
||
if ('lambda' in layer.name or
|
||
'dropout' in layer.name or
|
||
'gaussian_noise' in layer.name or
|
||
'batch_normalization' in layer.name
|
||
#or ('flatten' in layer.name and 'chw' not in format)
|
||
): # flatten layer can be skipped in HWC but needed in CHW
|
||
return True
|
||
return False
|
||
|
||
output_num = 0
|
||
for id, layer in enumerate(L):
|
||
if (is_skipable_layer(layer)):
|
||
inp = layer.input.name.replace(':', '/').split('/')[0]
|
||
LI[layer.name] = (LI[inp][0], layer)
|
||
else:
|
||
if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
|
||
LI[layer.name.split(':')[0]] = (ID, layer)
|
||
else:
|
||
LI[layer.name] = (ID, layer)
|
||
ID += 1
|
||
|
||
def gen_weight_tensor(w, per_axis):
|
||
var_cname = convert_tensor_name(w) + '_data'
|
||
dec_bits_name = convert_tensor_name(w).upper() + '_DEC_BITS'
|
||
fp.write(gen_values(var_cname, convert_tensor_name(w).upper()))
|
||
fp.write(gen_tensor(w, dec_bits=dec_bits_name, tensor_value=var_cname, per_axis=per_axis))
|
||
|
||
# output the config of all layer
|
||
if (type(layer) in [InputLayer] or 'input' in layer.name):
|
||
if(type(layer) == tf.Tensor):
|
||
raise Exception('Not yet support tensor as input/or Sequential model. '
|
||
'please use Input layer as your first layer in the model', layer.name, layer)
|
||
size = 1
|
||
for s in layer.input.shape[1:]:
|
||
size *= s if s is not None else 1
|
||
fp.write(gen_values('nnom_input_data', '{0}', size=str(size), dtype='static int8_t'))
|
||
fp.write(gen_tensor(layer.input, layer_q_list[layer.name][0], tensor_value='nnom_input_data', is_io_tensor=True))
|
||
fp.write(gen_io_config(layer, tensor_name=convert_tensor_name(layer.input)))
|
||
elif (type(layer) in [Conv2D, Conv1D, DepthwiseConv2D]):
|
||
for w in layer.weights:
|
||
gen_weight_tensor(w, per_axis=per_channel_quant)
|
||
fp.write(gen_conv2d_config(layer, layer.name.upper() +'_OUTPUT_RSHIFT', layer.name.upper() +'_BIAS_LSHIFT'))
|
||
elif (type(layer) in [Conv2DTranspose]):
|
||
for w in layer.weights:
|
||
gen_weight_tensor(w, per_axis=per_channel_quant)
|
||
fp.write(gen_conv2d_trans_config(layer, layer.name.upper() +'_OUTPUT_RSHIFT', layer.name.upper() +'_BIAS_LSHIFT'))
|
||
elif (type(layer) in [Dense]):
|
||
for w in layer.weights:
|
||
gen_weight_tensor(w, per_axis=False)
|
||
fp.write(gen_dense_config(layer, layer.name.upper() +'_OUTPUT_RSHIFT', layer.name.upper() +'_BIAS_LSHIFT'))
|
||
elif (type(layer) in [MaxPooling2D, AveragePooling2D, MaxPooling1D, AveragePooling1D]):
|
||
fp.write(gen_pooling_config(layer))
|
||
elif (type(layer) in [GlobalMaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling1D, GlobalAveragePooling1D]):
|
||
fp.write(gen_gl_pooling_config(layer))
|
||
elif (type(layer) in [Multiply, Add, Subtract]):
|
||
fp.write(gen_matrix_config(layer, output_shift_name=layer.name.upper()+'_OUTPUT_RSHIFT'))
|
||
elif (type(layer) in [ZeroPadding2D, ZeroPadding1D]):
|
||
fp.write(gen_zero_padding_config(layer))
|
||
elif (type(layer) in [Cropping2D, Cropping1D]):
|
||
fp.write(gen_cropping_config(layer))
|
||
elif (type(layer) in [Softmax]):
|
||
fp.write(gen_softmax_config(layer))
|
||
elif (type(layer) in [Flatten]):
|
||
fp.write(gen_flatten_config(layer))
|
||
elif (type(layer) in [Reshape]):
|
||
fp.write(gen_reshape_config(layer))
|
||
elif (type(layer) in [Concatenate]):
|
||
fp.write(gen_concat_config(layer))
|
||
elif (type(layer) in [Lambda]):
|
||
fp.write(gen_lambda_config(layer))
|
||
elif (type(layer) in [UpSampling2D, UpSampling1D]):
|
||
fp.write(gen_upsampling_config(layer))
|
||
elif(is_rnn_layer(layer)):
|
||
if(type(layer.cell) is SimpleRNNCell):
|
||
for w in layer.weights:
|
||
gen_weight_tensor(w, per_axis=False)
|
||
fp.write(gen_simple_cell_config(layer, layer_q_list['intermediate_'+layer.name]))
|
||
elif(type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
|
||
for w in layer.weights:
|
||
gen_weight_tensor(w, per_axis=False)
|
||
fp.write(gen_gru_cell_config(layer, layer_q_list['intermediate_'+layer.name]))
|
||
elif(type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
|
||
for w in layer.weights:
|
||
gen_weight_tensor(w, per_axis=False)
|
||
fp.write(gen_lstm_cell_config(layer, layer_q_list['intermediate_'+layer.name]))
|
||
fp.write(gen_rnn_config(layer))
|
||
|
||
# test, multiple output layer
|
||
if(len(layer.outbound_nodes) == 0):
|
||
size=1
|
||
for s in layer.output.shape[1:]:
|
||
size *= s if s is not None else 1
|
||
if(output_num == 0): # the first output or the only output
|
||
fp.write(gen_values('nnom_output_data', '{0}', size=str(size), dtype='static int8_t'))
|
||
fp.write(gen_output_config(layer, dec_bits=layer.name.upper() + '_OUTPUT_DEC', output_num=output_num, value_name='nnom_output_data'))
|
||
output_num += 1
|
||
else:
|
||
output_value_names = 'nnom_output_data'+str(output_num)
|
||
fp.write(gen_values(output_value_names, '{0}', size=str(size), dtype='static int8_t'))
|
||
fp.write(gen_output_config(layer, dec_bits=layer.name.upper() + '_OUTPUT_DEC', output_num=output_num, value_name=output_value_names))
|
||
output_num += 1
|
||
|
||
# # last layer, attach the additional nnom output layer
|
||
# if(id == len(L)-1):
|
||
# size=1
|
||
# for s in layer.output.shape[1:]:
|
||
# size *= s if s is not None else 1
|
||
# fp.write(gen_values('nnom_output_data', '{0}', size=str(size), dtype='static int8_t'))
|
||
# fp.write(gen_output_config(layer, dec_bits=layer.name.upper()+'_OUTPUT_DEC', value_name='nnom_output_data'))
|
||
|
||
# write version
|
||
fp.write('/* model version */\n')
|
||
fp.write('#define NNOM_MODEL_VERSION (10000*{0} + 100*{1} + {2})\n'.format(model_major_version, model_sub_version, model_reversion ))
|
||
|
||
# model
|
||
fp.write('\n/* nnom model */\n')
|
||
fp.write('static nnom_model_t* nnom_model_create(void)\n{\n')
|
||
fp.write('\tstatic nnom_model_t model;\n')
|
||
if (ID > 32):
|
||
fp.write('\tnnom_layer_t **layer = (nnom_layer_t**)malloc(sizeof(nnom_layer_t *)*%d);\n' % (ID + 1))
|
||
fp.write('\tif(NULL == layer) return NULL;\n')
|
||
else:
|
||
fp.write('\tnnom_layer_t* layer[%d];\n' % (ID + 1))
|
||
fp.write('\n\tcheck_model_version(NNOM_MODEL_VERSION);')
|
||
fp.write('\n\tnew_model(&model);\n\n')
|
||
|
||
# inverted order of output, very strange
|
||
output_num = (len(model.output) -1) if type(model.output) is list else 0
|
||
for layer in L:
|
||
if (is_skipable_layer(layer)):
|
||
continue
|
||
# FIXME: need a better solution to seperate the input 'tensor' from other layers
|
||
if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
|
||
id, _ = LI[layer.name.split(':')[0]]
|
||
else:
|
||
id, _ = LI[layer.name]
|
||
|
||
if ('input' in layer.name):
|
||
fp.write('\tlayer[%d] = input_s(&%s_config);\n' % (id, layer.name))
|
||
|
||
# convlutional
|
||
elif ('conv1d' in layer.name
|
||
or 'conv2d' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
if('transpose' in layer.name):
|
||
fp.write('\tlayer[{0}] = model.hook(conv2d_trans_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
elif('depthwise' in layer.name):
|
||
fp.write('\tlayer[{0}] = model.hook(dw_conv2d_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
else:
|
||
fp.write('\tlayer[{0}] = model.hook(conv2d_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
elif ('activation' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
cfg = layer.get_config()
|
||
if (cfg['activation'] == 'relu'):
|
||
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n' % (id, LI[inp][0]))
|
||
elif (cfg['activation'] == 'tanh'):
|
||
fp.write('\tlayer[%s] = model.active(act_hard_tanh(%s_OUTPUT_DEC), layer[%s]);\n' % (
|
||
id, inp.upper(), LI[inp][0]))
|
||
elif (cfg['activation'] == 'sigmoid'):
|
||
fp.write('\tlayer[%s] = model.active(act_sigmoid(%s_OUTPUT_DEC), layer[%s]);\n' % (
|
||
id, inp.upper(), LI[inp][0]))
|
||
elif (cfg['activation'] == 'hard_sigmoid'):
|
||
fp.write('\tlayer[%s] = model.active(act_hard_sigmoid(%s_OUTPUT_DEC), layer[%s]);\n' % (
|
||
id, inp.upper(), LI[inp][0]))
|
||
elif (cfg['activation'] == 'softmax'):
|
||
fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n' % (id, LI[inp][0]))
|
||
elif ('leaky_re_lu' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
cfg = layer.get_config()
|
||
fp.write('\tlayer[%s] = model.active(act_leaky_relu(%ff), layer[%s]);\n' % (id, cfg["alpha"],LI[inp][0]))
|
||
elif ('re_lu' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
cfg = layer.get_config()
|
||
if(cfg['max_value'] is None and cfg['negative_slope'] == 0 and cfg['threshold'] == 0):
|
||
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n' % (id, LI[inp][0]))
|
||
else:
|
||
if(cfg['max_value'] is None):
|
||
max_v = 'INFINITY '
|
||
else:
|
||
max_v = str(cfg['max_value'])
|
||
fp.write('\tlayer[%s] = model.active(act_adv_relu(%f,%s,%f), layer[%s]);\n'
|
||
% (id, cfg['negative_slope'], max_v, cfg['threshold'], LI[inp][0]))
|
||
# pooling
|
||
elif ('max_pooling' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
if ('global' in layer.name):
|
||
fp.write('\tlayer[{0}] = model.hook(global_maxpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
else:
|
||
fp.write('\tlayer[{0}] = model.hook(maxpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
elif ('average_pooling' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
if ('global' in layer.name):
|
||
fp.write('\tlayer[{0}] = model.hook(global_avgpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
else:
|
||
fp.write('\tlayer[{0}] = model.hook(avgpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
elif ('up_sampling' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
fp.write('\tlayer[{0}] = model.hook(upsample_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
# zero padding
|
||
elif ('zero_padding' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
fp.write('\tlayer[{0}] = model.hook(zeropadding_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
# Cropping
|
||
elif ('cropping' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
fp.write('\tlayer[{0}] = model.hook(cropping_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
|
||
# others
|
||
elif ('flatten' in layer.name): # flatten is needed in CHW backend but not needed in HWC
|
||
inp = layer_name_from_tensor(layer.input)
|
||
fp.write('\tlayer[{0}] = model.hook(flatten_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
elif ('reshape' in layer.name): # flatten is needed in CHW backend but not needed in HWC
|
||
inp = layer_name_from_tensor(layer.input)
|
||
fp.write('\tlayer[{0}] = model.hook(reshape_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
elif ('concatenate' in layer.name):
|
||
inps = [layer_name_from_tensor(input) for input in layer.input]
|
||
inX = ''
|
||
for inp in inps:
|
||
inX += ' ,layer[%d]' % (LI[inp][0])
|
||
fp.write('\tlayer[%s] = model.mergex(concat_s(&%s_config), %s%s);\n' % (
|
||
id, layer.name, len(inps), inX))
|
||
elif ('add' in layer.name):
|
||
inps = [layer_name_from_tensor(input) for input in layer.input]
|
||
inX = ''
|
||
for inp in inps:
|
||
inX += ' ,layer[%d]' % (LI[inp][0])
|
||
fp.write('\tlayer[%s] = model.mergex(add_s(&%s_config), %s%s);\n' % (
|
||
id, layer.name, len(inps), inX))
|
||
elif ('subtract' in layer.name):
|
||
inps = [layer_name_from_tensor(input) for input in layer.input]
|
||
inX = ''
|
||
for inp in inps:
|
||
inX += ' ,layer[%d]' % (LI[inp][0])
|
||
fp.write('\tlayer[%s] = model.mergex(sub_s(&%s_config), %s%s);\n' % (
|
||
id, layer.name, len(inps), inX))
|
||
elif ('multiply' in layer.name):
|
||
inps = [layer_name_from_tensor(input) for input in layer.input]
|
||
inX = ''
|
||
for inp in inps:
|
||
inX += ' ,layer[%d]' % (LI[inp][0])
|
||
fp.write('\tlayer[%s] = model.mergex(mult_s(&%s_config), %s%s);\n' % (
|
||
id, layer.name, len(inps), inX))
|
||
elif ('dense' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
fp.write('\tlayer[{0}] = model.hook(dense_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
elif ('softmax' in layer.name):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
fp.write('\tlayer[{0}] = model.hook(softmax_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
|
||
|
||
elif (is_rnn_layer(layer)):
|
||
inp = layer_name_from_tensor(layer.input)
|
||
line = '\tlayer[{0}] = model.hook(rnn_s(<rnn_cell>, &{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0])
|
||
if (type(layer.cell) is SimpleRNNCell):
|
||
line = line.replace('<rnn_cell>', 'simple_cell_s(&%s_simple_cell_config)' %(layer.name))
|
||
elif (type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
|
||
line = line.replace('<rnn_cell>', 'gru_cell_s(&%s_gru_cell_config)' % (layer.name))
|
||
elif (type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
|
||
line = line.replace('<rnn_cell>', 'lstm_cell_s(&%s_lstm_cell_config)' % (layer.name))
|
||
fp.write(line)
|
||
else:
|
||
raise Exception('unsupported layer', layer.name, layer)
|
||
|
||
# test, multiple output layer (not yet working with multiple outputs)
|
||
if(len(layer.outbound_nodes) == 0):
|
||
fp.write('\tlayer[{0}] = model.hook(output_s(&{1}_config), layer[{2}]);\n'.format(id + 1, 'output'+str(output_num), LI[inp][0] + 1))
|
||
output_num -=1 # the num is inverted in keras, not a good solution yet.
|
||
|
||
"""
|
||
# temporary fixed for activations attached into layers in construction
|
||
def is_activation_attached(layer):
|
||
if(("Softmax" in layer.output.name and "softmax" not in layer.name)or
|
||
("Relu" in layer.output.name and "re_lu" not in layer.name) or
|
||
("Sigmoid" in layer.output.name and "sigmoid" not in layer.name) or
|
||
("Tanh" in layer.output.name and "tanh" not in layer.name)):
|
||
return True
|
||
return False
|
||
if "input" not in layer.name and is_activation_attached(layer):
|
||
inp = layer.output.name.replace(':', '/').split('/')[0]
|
||
cfg = layer.get_config()
|
||
if(cfg['activation'] == 'relu'):
|
||
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n'%(id, LI[inp][0]))
|
||
if(cfg['activation'] == 'tanh'):
|
||
fp.write('\tlayer[%s] = model.active(act_tanh(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
|
||
if(cfg['activation'] == 'sigmoid'):
|
||
fp.write('\tlayer[%s] = model.active(act_sigmoid(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
|
||
elif(cfg['activation'] == 'softmax'):
|
||
fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n'%(id, LI[inp][0]))
|
||
"""
|
||
# generate final output layer
|
||
#fp.write('\tlayer[{0}] = model.hook(output_s(&{1}_config), layer[{2}]);\n'.format(id+1, 'output', LI[inp][0]+1))
|
||
fp.write('\tmodel_compile(&model, layer[0], layer[%s]);\n' % (id + 1))
|
||
if (ID > 32):
|
||
fp.write('\tfree(layer);\n')
|
||
fp.write('\treturn &model;\n}\n')
|
||
with open('.layer_q_list', 'w') as fp:
|
||
fp.write(str(layer_q_list))
|
||
|
||
def evaluate_model(model, x_test, y_test, running_time=False, to_file='evaluation.txt'):
|
||
# Score trained model.
|
||
scores = model.evaluate(x_test, y_test, verbose=2)
|
||
print('Test loss:', scores[0])
|
||
print('Top 1:', scores[1])
|
||
|
||
if(len(y_test.shape)>1):
|
||
bs = model.input.shape[0]
|
||
predictions = model.predict(x_test, batch_size=bs)
|
||
matrix = skmetrics.confusion_matrix(y_test.argmax(axis=1), predictions.argmax(axis=1))
|
||
print(matrix)
|
||
|
||
run_time = 0
|
||
if running_time:
|
||
# try to calculate the time
|
||
T = time.time()
|
||
bs = model.input.shape[0]
|
||
for i in range(10):
|
||
model.predict(x_test, batch_size=bs)
|
||
T = time.time() - T
|
||
run_time = round((T / 10 / x_test.shape[0] * 1000 * 1000), 2)
|
||
print("Runing time:",run_time , "us" )
|
||
#
|
||
with open(to_file, 'w') as f:
|
||
f.write("Runing time: "+ str(run_time) + "us" + "\n")
|
||
f.write('Test loss:'+ str(scores[0]) + "\n")
|
||
f.write('Top 1:'+ str(scores[1])+ "\n")
|
||
if (len(y_test.shape) > 1):
|
||
for row in matrix:
|
||
row.tofile(f, sep=',')
|
||
f.write("\n")
|
||
return scores
|
||
|
||
def f2q(d, Q):
|
||
'''To convert a number from floating point to Qm.n format:
|
||
1. Multiply the floating point number by 2n
|
||
2. Round to the nearest integer
|
||
'''
|
||
return np.round(d*2**Q)
|
||
|
||
|
||
def q2f(d, Q):
|
||
'''To convert a number from Qm.n format to floating point:
|
||
1. Convert the number to floating point as if it were an integer, in other words remove the binary point
|
||
2. Multiply by 2-n
|
||
'''
|
||
return d*2**-Q
|
||
|
||
def show_weights(w, name):
|
||
sz = 1
|
||
for s in w.shape:
|
||
sz = sz*s
|
||
aL = w.reshape(sz,)
|
||
MIN,MAX=min(aL),max(aL)
|
||
Q = int(np.ceil(np.log2(max(abs(MIN),abs(MAX)))))
|
||
Q = 7-Q
|
||
qL = f2q(aL,Q)
|
||
qL = q2f(qL,Q)
|
||
plt.figure(figsize=(18, 3))
|
||
plt.subplot(131)
|
||
plt.title(name)
|
||
plt.plot(aL)
|
||
plt.grid()
|
||
aL.sort()
|
||
plt.plot(aL,'r')
|
||
plt.grid()
|
||
plt.subplot(132)
|
||
plt.title('Q%s'%(Q))
|
||
qL.sort()
|
||
plt.plot(aL,'r')
|
||
plt.plot(qL,'g')
|
||
plt.grid()
|
||
plt.subplot(133)
|
||
plt.hist(aL,100)
|
||
plt.title('hist')
|
||
plt.grid()
|
||
plt.show()
|
||
|
||
def compare(a,b,name):
|
||
sz = 1
|
||
for s in a.shape:
|
||
sz = sz*s
|
||
aL = a.reshape(sz,)
|
||
bL = b.reshape(sz,)
|
||
assert(len(aL) == len(bL))
|
||
Z = list(zip(aL,bL))
|
||
Z.sort(key=lambda x: x[0])
|
||
aL1,bL1=zip(*Z)
|
||
plt.figure(figsize=(18, 3))
|
||
plt.subplot(131)
|
||
plt.plot(aL)
|
||
plt.plot(aL1,'r')
|
||
plt.grid()
|
||
plt.title('tf-%s'%(name))
|
||
plt.subplot(133)
|
||
plt.plot(bL1,'g')
|
||
plt.plot(aL1,'r')
|
||
plt.grid()
|
||
plt.title('compare')
|
||
plt.subplot(132)
|
||
bL1=list(bL1)
|
||
bL1.sort()
|
||
plt.plot(bL)
|
||
plt.plot(bL1,'g')
|
||
plt.grid()
|
||
plt.title('nn-%s'%(name))
|
||
plt.show()
|
||
|