xiuos/APP_Framework/Framework/knowing/nnom/scripts/nnom.py

1199 lines
57 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'''
Copyright (c) 2018-2020
Jianjia Ma
majianjia@live.com
SPDX-License-Identifier: Apache-2.0
Change Logs:
Date Author Notes
2019-02-05 Jianjia Ma The first version
'''
import sklearn.metrics as skmetrics
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import *
from tensorflow.keras.layers import *
from fully_connected_opt_weight_generation import *
from gen_config import *
import scipy.stats
import time
import warnings
model_major_version = 0
model_sub_version = 4
model_reversion = 3
#define NNOM_MAJORVERSION 0L /**< major version number */
#define NNOM_SUBVERSION 4L /**< minor version number */
#define NNOM_REVISION 3L /**< revise version number */
#define NNOM_VERSION (NNOM_MAJORVERSION * 10000) + (NNOM_SUBVERSION * 100) + NNOM_REVISION)
def fuse_bn_to_conv(layer):
# try to fuse BN layer to convolutional
if ('conv' in layer.name) and \
('batch_normalization' in layer.outbound_nodes[0].outbound_layer.name):
print("fusing batch normalization to", layer.name)
bn_layer = layer._outbound_nodes[0].outbound_layer
c_w = layer.get_weights()[0]
c_b = layer.get_weights()[1]
print('original weight max', c_w.max(), 'min', c_w.min())
print('original bias max', c_b.max(), 'min', c_b.min())
bn_gamma = bn_layer.get_weights()[0]
bn_beta = bn_layer.get_weights()[1]
bn_mean = bn_layer.get_weights()[2]
bn_variance = bn_layer.get_weights()[3]
epsilon = 1e-3 # default epsilon for tf.slim.batch_norm
if ('conv2d' in layer.name):
if "depthwise" in layer.name: # depthwise batchnorm params are ordered differently
for l in range(c_w.shape[3]):
for k in range(c_w.shape[2]):
for j in range(c_w.shape[1]):
for i in range(c_w.shape[0]):
c_w[i][j][k][l] *= bn_gamma[k*c_w.shape[3]+l] / np.sqrt(bn_variance[k*c_w.shape[3]+l] + epsilon)
depth_dim = c_w.shape[2] * c_w.shape[3] # test needed
# normal conv
else:
for l in range(c_w.shape[3]):
for k in range(c_w.shape[2]):
for j in range(c_w.shape[1]):
for i in range(c_w.shape[0]):
c_w[i][j][k][l] *= bn_gamma[l] / np.sqrt(bn_variance[l] + epsilon)
depth_dim = c_w.shape[3]
for l in range(depth_dim):
c_b[l] = (bn_gamma[l] * (c_b[l] - bn_mean[l]) / np.sqrt(bn_variance[l] + epsilon)) + bn_beta[l]
# conv1d
else:
epsilon = 1e-3 # default epsilon for tf.slim.batch_norm
for k in range(c_w.shape[2]):
for j in range(c_w.shape[1]):
for i in range(c_w.shape[0]):
if "depthwise" in layer.name: # depthwise batchnorm params are ordered differently
c_w[i][j][k] *= bn_gamma[j] / np.sqrt(bn_variance[j] + epsilon)
else:
c_w[i][j][k] *= bn_gamma[k] / np.sqrt(bn_variance[k] + epsilon)
if "depthwise" in layer.name:
depth_dim = c_w.shape[1]*c_w.shape[2] # need to be tested
else:
depth_dim = c_w.shape[2]
for l in range(depth_dim):
c_b[l] = (bn_gamma[l] * (c_b[l] - bn_mean[l]) / np.sqrt(bn_variance[l] + epsilon)) + bn_beta[l]
print('fused weight max', c_w.max(), 'min', c_w.min())
print('fused bias max', c_b.max(), 'min', c_b.min())
# write the weights back to the layer
# after that, the model will be destroyed.. need a better way to pass the new weight
layer.set_weights([c_w, c_b])
def generate_test_bin(x, y, name='test_data_with_label.bin'):
'''
this method generate the
:param x: input x data size
:param y: input label (one hot label)
:return:
'''
# quantize input x
dec_bits = find_dec_bits_max_min(x, bit_width=8)
x = np.round(x*2**dec_bits).clip(-128, 127).astype(np.int8)
# get label
if(len(y.shape) >1):
test_label = np.argwhere(y == 1).astype(np.int8) # test data
test_label = test_label[:, 1]
else:
test_label = y
# get data
dat = x.astype(dtype="byte") # test data
batch_size = dat.shape[0] # total pices of data
dat = dat.flatten() # flatten to get the total size.
block_size = int(dat.size / batch_size) # this must be integer but... just to confirm
# write (label x 128) (data_block x 128)
label_batch = 128 # the Y-modem example uses 128 batch
with open(name, 'wb') as f:
start = 0
while start <= (test_label.size - label_batch):
test_label[start: start + label_batch].tofile(f)
dat[block_size * start: block_size * (start + label_batch)].tofile(f)
start += label_batch
# the rest data
if (start < test_label.size):
rest_len = test_label.size - start
new_labls = test_label[start:]
new_labls = np.pad(new_labls, (0, label_batch - rest_len), mode='constant')
new_labls.tofile(f)
dat[block_size * start:].tofile(f)
print("binary test file generated:", name)
print("test data length:", test_label.size)
return
def is_shift_layer(layer):
''' layer which can change the output encoding'''
#FIXME: add more which will change the output shift
if('input' in layer.name or
'conv2d' in layer.name or
'conv1d' in layer.name or
'dense' in layer.name or
'softmax' in layer.name or
'sigmoid' in layer.name or
'tanh' in layer.name or
('add' in layer.name and 'zero' not in layer.name) or # the name, zero_padding contains 'add'
'subtract' in layer.name or
'multiply' in layer.name or
('activation' in layer.name and layer.get_config()['activation'] == 'softmax')or
('activation' in layer.name and layer.get_config()['activation'] == 'hard_sigmoid') or
('activation' in layer.name and layer.get_config()['activation'] == 'tanh') or
('activation' in layer.name and layer.get_config()['activation'] == 'hard_tanh') or
is_rnn_layer(layer)
):
return True
return False
def is_shift_fixed(layer):
''' layer which shift to a fixed value'''
#FIXME: add more which will change the output shift
if('softmax' in layer.name or
'sigmoid' in layer.name or
'tanh' in layer.name or
('activation' in layer.name and layer.get_config()['activation'] == 'softmax') or
('activation' in layer.name and layer.get_config()['activation'] == 'sigmoid') or
('activation' in layer.name and layer.get_config()['activation'] == 'hard_sigmoid') or
('activation' in layer.name and layer.get_config()['activation'] == 'tanh') or
('activation' in layer.name and layer.get_config()['activation'] == 'hard_tanh') or
is_rnn_layer(layer)
):
return True
return False
def is_lstm_layer(layer):
if type(layer) is LSTM or 'lstm' in layer.name:
return True
if(type(layer) is RNN or 'rnn' in layer.name):
if(type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
return True
return False
def is_gru_layer(layer):
if type(layer) is GRU or 'gru' in layer.name:
return True
if(type(layer) is RNN or 'rnn' in layer.name):
if(type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
return True
return False
def is_rnn_layer(layer):
if( 'rnn' in layer.name or
is_lstm_layer(layer) or
is_gru_layer(layer)
):
return True
return False
def find_offset(data):
"""
Offset of the original data before quantisation
:param data:
:return: offset of the data block
"""
return np.average(data)
def find_dec_bits_max_min(data, bit_width=8, maximum_bit=32):
"""
A ragular non-saturated shift-based quantisation mathod. Using max/min values
:param data:
:param bit_width:
:param maximum_bit: maximum decimal bit. Incase sometime bias is too small lead to very large size dec bit
:return:
"""
max_val = abs(data.max()) - abs(data.max()/pow(2, bit_width)) # allow very small saturation.
min_val = abs(data.min()) - abs(data.min()/pow(2, bit_width))
int_bits = int(np.ceil(np.log2(max(max_val, min_val))))
dec_bits = (bit_width-1) - int_bits
return min(dec_bits, maximum_bit)
def find_dec_bits_max_min_axis(data, axis=-1,bit_width=8, maximum_bit=32):
"""
A ragular non-saturated shift-based quantisation mathod. Using max/min values
:param data:
:param axis:
:param bit_width:
:return:
"""
dec_bits = []
# if(len(data.shape) < np.abs(axis)): # for depthwise with axis = -2 while len(shape) =1
# size = data.shape[0]
# axis = 0 #
# else:
# size = data.shape[axis]
for i in np.arange(0, data.shape[axis]):
d = np.take(data, indices=i, axis=axis)
max_val = abs(d.max()) - abs(d.max() / pow(2, bit_width)) # allow very small saturation.
min_val = abs(d.min()) - abs(d.min() / pow(2, bit_width))
int_bit = int(np.ceil(np.log2(max(abs(max_val), abs(min_val)))))
dec_bit = (bit_width-1) - int_bit
dec_bits.append(min(dec_bit, maximum_bit))
return dec_bits
def find_dec_bits_kld(data, bit_width=8, scan_times=4, maximum_bit=16):
"""
# saturation shift, using KLD method (Kullback-Leibler divergence)
# Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
:param data: The data for looking for quantisation
:param bit_width: the bitwidth of the data
:param scan_times: the times to try the best kld (normally the second is the best.)
:return: dec bit width for this data
"""
# do a regular non-saturated quantisation
max_val = data.max()
min_val = data.min()
abs_max = max(abs(max_val), abs(min_val))
int_bits = int(np.ceil(np.log2(max(abs(max_val), abs(min_val)))))
dec_bits = (bit_width-1) - int_bits
# now looking for the best quantisation using KLD method
small_var = 1e-5
bins = np.arange(-abs_max, abs_max, abs_max / 2048 * 2)
q_bins = np.arange(-abs_max, abs_max, abs_max / 256 * 2)
flat_hist = np.histogram(data.flatten(), bins=bins)[0]
kl_loss = []
kl_shifts = []
for shift in range(scan_times):
t = 2 ** (dec_bits + shift) # 2-based threshold
act = np.round(data.flatten() * t)
act = act / t
act = np.clip(act, -128 / t, 127 / t)
act = np.histogram(act, bins=q_bins)[0]
act_hist = np.zeros(2047)
chunk = int(2048 / 256)
for i in range(int(255)):
none_zero = np.count_nonzero(flat_hist[i * chunk:(i + 1) * chunk])
if none_zero == 0:
continue
for j in range(chunk):
act_hist[i * chunk + j] = act[i] / none_zero if flat_hist[i * chunk + j] != 0 else 0
flat_hist[flat_hist == 0] = small_var
act_hist[act_hist == 0] = small_var
kl = scipy.stats.entropy(flat_hist, act_hist)
kl_loss.append(kl)
kl_shifts.append(dec_bits + shift)
# now get the least loss from the scaned kld shift
dec_bits = kl_shifts[np.argmin(kl_loss)] # set the dec_bit to the KLD results
return min(dec_bits, maximum_bit)
# convert to [-128,128) or int8
def quantize_data(data, dec_bits, axis=-1, per_axis=False, bitwith=8):
if (per_axis):
out = []
for i in np.arange(0, data.shape[axis]):
d = np.take(data, indices=i, axis=axis)
d = np.round(d * 2 ** dec_bits[i])
d = np.clip(d, -2**(bitwith-1), 2**(bitwith-1)-1)
d = np.expand_dims(d, axis=axis)
out.append(d)
out = np.concatenate(out, axis=axis)
return out
else:
return np.clip(np.round(data * 2 ** dec_bits), -2**(bitwith-1), 2**(bitwith-1) -1)
def quantize_rnn_intermediate_output(layer, features):
def nnom_sigmoid(data):
return 1 / (1 + np.exp(-data))
def nnom_tanh(data):
return np.tanh(data)
def split_array(d, num):
l = len(d)
if(num==4):
return d[:int(l/4)], d[int(l/4): int(l/2)], d[int(l/2):-int(l/4)], d[-int(l/4):]
elif(num==3):
return d[:int(l/3)], d[int(l/3): -int(l/3)], d[-int(l/3):]
lcfg = layer.get_config()
if(lcfg['go_backwards']):
features = features[:,::-1,:] # reverse timestamp
if(type(layer.cell) is SimpleRNNCell):
cfg = layer.cell.get_config()
state = np.zeros(cfg['units'])
kernel = layer.get_weights()[0]
recurrent_kernel = layer.get_weights()[1]
bias = layer.get_weights()[2]
# replicate keras's implementation
def simple_cell_step(inputs, state, kernel, recurrent_kernel, bias, activation):
h = np.dot(inputs, kernel)
h = np.add(h, bias)
h2 = np.dot(state, recurrent_kernel)
output = h + h2
output = activation(output)
return output, h, h2
output_arrary = []
h_array = []
h2_array = []
activation = nnom_tanh if cfg['activation'] is 'tanh' else nnom_sigmoid
state = np.zeros(cfg['units'])
for feature in features:
if(not layer.stateful):
state = np.zeros(cfg['units'])
for fe in feature:
output, h, h2 = simple_cell_step(fe, state, kernel, recurrent_kernel, bias, activation)
state = output
output_arrary.append(output)
h_array.append(h)
h2_array.append(h2)
output_arrary = np.array(output_arrary)
h_array = np.array(h_array)
h2_array = np.array(h2_array)
# qout = find_dec_bits_kld(output_arrary)
# qh = find_dec_bits_kld(h_array)
# qh2 = find_dec_bits_kld(h2_array)
qout = find_dec_bits_max_min(output_arrary)
qh = find_dec_bits_max_min(h_array)
qh2 = find_dec_bits_max_min(h2_array)
return [qout, qh, qh2]
elif (type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
cfg = layer.cell.get_config()
state = np.zeros(cfg['units']*2)
kernel = layer.get_weights()[0]
recurrent_kernel = layer.get_weights()[1]
bias = layer.get_weights()[2]
def lstm_cell_step(cell_inputs, cell_states, kernel, recurrent_kernel, bias):
h_tm1 = cell_states[0] # previous memory state
c_tm1 = cell_states[1] # previous carry state
z1 = np.dot(cell_inputs, kernel)
z1 = np.add(z1, bias)
z2 = np.dot(h_tm1, recurrent_kernel)
z = z1+z2 # -----> q_z
z0, z1, z2, z3 = split_array(z, 4)
i = nnom_sigmoid(z0) # q0.7
f = nnom_sigmoid(z1) # q0.7
c1 = f*c_tm1
c2 = i*nnom_tanh(z2) # q0.7
c = c1 + c2 # -----> q_c
o = nnom_sigmoid(z3) # q0.7
tc = nnom_tanh(c)
h = o * tc # q0.7
return h, [h, c], z ,z0, z1, z2, z3
h_array = []
c_array = []
z_array = []
z0_array = []
z1_array = []
z2_array = []
z3_array = []
state = [np.zeros(cfg['units']), np.zeros(cfg['units'])]
for feature in features:
if(not layer.stateful):
state = [np.zeros(cfg['units']), np.zeros(cfg['units']) ]
for fe in feature:
output, state, z, z0, z1, z2, z3 = lstm_cell_step(fe, state, kernel, recurrent_kernel, bias)
h_array.append(output)
c_array.append(state[1])
z_array.append(z)
z0_array.append(z0)
z1_array.append(z1)
z2_array.append(z2)
z3_array.append(z3)
h_array = np.array(h_array)
c_array = np.array(c_array)
z_array = np.array(z_array)
z0_array = np.array(z0_array)
z1_array = np.array(z1_array)
z2_array = np.array(z2_array)
z3_array = np.array(z3_array)
# q_h = find_dec_bits_kld(h_array)
# q_c = find_dec_bits_kld(c_array)
# q_z = find_dec_bits_kld(z_array)
# q_z0 = find_dec_bits_kld(z0_array)
# q_z1 = find_dec_bits_kld(z1_array)
# q_z2 = find_dec_bits_kld(z2_array)
# q_z3 = find_dec_bits_kld(z3_array)
q_h = find_dec_bits_max_min(h_array)
q_c = find_dec_bits_max_min(c_array)
q_z = find_dec_bits_max_min(z_array)
q_z0 = find_dec_bits_max_min(z0_array) # not needed.
q_z1 = find_dec_bits_max_min(z1_array)
q_z2 = find_dec_bits_max_min(z2_array)
q_z3 = find_dec_bits_max_min(z3_array)
return [q_h, q_c, q_z]
elif (type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
cfg = layer.cell.get_config()
state = np.zeros(cfg['units'])
k = layer.get_weights()[0]
rk = layer.get_weights()[1]
bias = layer.get_weights()[2]
def gru_cell_step(cell_inputs, cell_states, kernel, recurrent_kernel, input_bias, recurrent_bias):
h_tm1 = cell_states[0]
# inputs projected by all gate matrices at once
matrix_x = np.dot(cell_inputs, kernel) + input_bias
x_z, x_r, x_h = split_array(matrix_x, 3)
# hidden state projected by all gate matrices at once
matrix_inner = np.dot(h_tm1, recurrent_kernel) + recurrent_bias
recurrent_z, recurrent_r, recurrent_h = split_array(matrix_inner, 3)
z = nnom_sigmoid(x_z + recurrent_z)
r = nnom_sigmoid(x_r + recurrent_r)
hh = nnom_tanh(x_h + r * recurrent_h)
# previous and candidate state mixed by update gate
# h = z * h_tm1 + (1 - z) * hh
h1 = z*h_tm1
h2 = 1-z
h3 = h2 * hh
h = h1 + h3
return h, [h], matrix_x, matrix_inner
h_array = []
z_array = []
i_array=[]
state = [np.zeros(cfg['units'])]
for feature in features:
if (not layer.stateful):
state = [np.zeros(cfg['units'])]
for fe in feature:
output, state, z, i = gru_cell_step(fe, state, k, rk, bias[0], bias[1])
h_array.append(output)
z_array.append(z)
i_array.append(i)
h_array = np.array(h_array)
i_array = np.array(i_array)
z_array = np.array(z_array)
# q_h = find_dec_bits_kld(h_array)
# q_i = find_dec_bits_kld(i_array)
# q_z = find_dec_bits_kld(z_array)
q_h = find_dec_bits_max_min(h_array)
q_i = find_dec_bits_max_min(i_array)
q_z = find_dec_bits_max_min(z_array)
q_z = min(q_i, q_z)
return [q_h, q_z]
return []
def quantize_output(model, x_test, quantize_method='max_min', layer_offset=False, calibrate_size=None):
# limit the test data size
if(calibrate_size is not None):
if (x_test.shape[0] > calibrate_size):
x_test = x_test[:calibrate_size]
# test, show the output ranges
layer_q_list = {}
# FIXME: only support one input
if (type(model.layers[0]) != InputLayer):
L = [model.input] + model.layers
else:
L = model.layers
for layer in L: # layer loop
if ("input" in layer.name):
features = x_test
else:
# rnn need a further step to determine the intermediate q format
if (is_rnn_layer(layer)):
in_layer = layer.inbound_nodes[0].inbound_layers
layer_model = Model(inputs=model.input, outputs=in_layer.output)
bs = model.input.shape[0]
features = layer_model.predict(x_test, batch_size=bs)
intermediate_dec = quantize_rnn_intermediate_output(layer, features)
print(layer.name, 'dec bit', intermediate_dec)
layer_q_list['intermediate_' + layer.name] = intermediate_dec
# batch_normalization will need to be handled differently, since we are fusing the weight to its previosu conv.
# sigmoid and tanh are different, their shift is fixed to 7
if (is_shift_layer(layer) or
('batch_normalization' in layer.name)):
layer_model = Model(inputs=model.input, outputs=layer.output)
bs = model.input.shape[0]
features = layer_model.predict(x_test, batch_size=bs)
else:
# leave the features not changed, so this layer shift will be the same as its inputs
pass
# we currently only support one offset for a layer output.
if(layer_offset):
offset = find_offset(features)
features = features - offset
else:
offset = 0
# saturated shift using KLD method OR non saturated shift using max-min
if ("kld" in quantize_method
and not is_shift_fixed(layer)
and "input" not in layer.name
and "dense" not in layer.name): # test, also do not use kld in input layer
dec_bits = find_dec_bits_kld(features, bit_width=8, scan_times=4)
print(layer.name,"Quantized method:", "KLD", "Values max:", np.max(features), "min:", np.min(features), "dec bit", dec_bits)
else:
dec_bits = find_dec_bits_max_min(features, bit_width=8)
print(layer.name,"Quantized method:","max-min"," Values max:", np.max(features), "min:", np.min(features), "dec bit", dec_bits)
# quantise offset
offset = int(np.round(offset * 2 ** dec_bits))
# record the shift
if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
layer_q_list[layer.name.split(':')[0]] = [dec_bits, offset]
else:
layer_q_list[layer.name] = [dec_bits, offset]
if ('batch_normalization' in layer.name):
layer_q_list[layer.inbound_nodes[0].inbound_layers.name] = [dec_bits, offset] # use the bn layer shift to update the last layer.
# scan the layers backward, try to unify the dec bit in multiple input layers, (add, mult... concat...etc.)
LM = {}
for layer in model.layers:
LM[layer.name] = layer
L = [l for l in model.layers[1:]]
L.reverse()
def update_previous_layer_shift(layer, dec_bit):
if(type(layer.input) == list):
for inp in layer.input:
iname = inp.name.split('/')[0]
if('input' in iname):
continue
layer_q_list[iname][0] = dec_min
if(not is_shift_layer(LM[iname])):
update_previous_layer_shift(LM[iname], dec_bit)
else:
iname = layer.input.name.split('/')[0]
if('input' in iname):
return
layer_q_list[iname][0] = dec_min
if(not is_shift_layer(LM[iname])):
update_previous_layer_shift(LM[iname], dec_bit)
for layer in L:
if(type(layer.input) == list):
iname = layer.input[0].name.split('/')[0].split(':')[0]
dec_min = layer_q_list[iname][0]
# find min dec bit in these input
for inp in layer.input:
iname = inp.name.split('/')[0].split(':')[0]
if(layer_q_list[iname][0] < dec_min):
dec_min = layer_q_list[iname][0]
if(layer_q_list[iname][0] != dec_min):
bFlag = True
for inp in layer.input:
iname = inp.name.split('/')[0].split(':')[0]
layer_q_list[iname][0] = dec_min
if(not is_shift_layer(LM[iname])):
update_previous_layer_shift(LM[iname], dec_min)
print('set dec bit', dec_min, 'for the input of', layer.name, ':', [inp.name.split('/')[0] for inp in layer.input])
if(not is_shift_layer(layer) or dec_min < layer_q_list[layer.name][0]): # update current layer's shift only when we cannot change the shift
layer_q_list[layer.name][0] = dec_min
# quantise offset
print("quantisation list", layer_q_list)
return layer_q_list
def layer_name_from_tensor(t):
return t.name.replace(':','/').split('/')[0]
def quantize_weights(model, name='weights.h', format='hwc', per_channel_quant=True, layer_q_list=None):
# Quantize weights to 8-bits using (min,max) and write to file
f = open(name, 'w')
f.write('#include "nnom.h"\n\n')
f.write('/* Weights, bias and Q format */\n')
f.close()
for curr_idx, layer in enumerate(model.layers):
if (not layer.weights):
continue
# before merging bn layer, check if the bn is "legally" after Conv
if('batch_normalization' in layer.name) and \
('conv' not in layer.inbound_nodes[0].inbound_layers.name):
raise Exception('Only support batch_normalization placed after conv', layer.name,
layer.inbound_nodes[0].inbound_layers.name)
# try to fuse BN layer to convolutional
if ('conv' in layer.name) and \
('batch_normalization' in layer.outbound_nodes[0].outbound_layer.name):
fuse_bn_to_conv(layer)
# generate weights and bias now
weight_dec_shift = 0
print('quantizing weights for layer', layer.name)
layer_weights = layer.get_weights()
for idx, var in enumerate(layer_weights):
var_name = convert_tensor_name(layer.weights[idx])
var_values = var
if("kernel" not in var_name and 'bias' not in var_name): # ignore batchnormalisation's parameters
continue
if (per_channel_quant and type(layer) in [Conv2D, Conv1D, DepthwiseConv2D, Conv2DTranspose]):
if(type(layer) in [DepthwiseConv2D] and "kernel" in var_name): #depthwise kernel quantised by
shape = var_values.shape[:2] + (-1,) # need to combine the mult and channel first
var = var_values.reshape(shape)
dec_bits = find_dec_bits_max_min_axis(var, axis=-1, bit_width=8)
elif(type(layer) in [Conv2DTranspose]):
dec_bits = find_dec_bits_max_min_axis(var_values, axis=-2, bit_width=8)
else:
dec_bits = find_dec_bits_max_min_axis(var_values, bit_width=8)
else:
dec_bits = find_dec_bits_max_min(var_values, bit_width=8)
print(' ', var_name, "dec bit", dec_bits)
# kernel dec, bias dec, bias shift, output shift
if(is_shift_layer(layer) and not is_rnn_layer(layer)):
inp = layer.input.name.replace(':','/').split('/')[0]
layer_input_dec = layer_q_list[inp][0]
layer_output_dec = layer_q_list[layer.name][0]
if ("kernel" in var_name):
weight_dec_shift = dec_bits
else:
# channel wise
if hasattr(dec_bits, '__len__'):
bias_shift = np.full(len(dec_bits), layer_input_dec)+weight_dec_shift-dec_bits
layer_output_shift = np.full(len(weight_dec_shift), layer_input_dec) + weight_dec_shift \
- np.full(len(weight_dec_shift), layer_output_dec)
if (np.min(bias_shift) < 0):
for i, w_dec in enumerate(weight_dec_shift):
if (bias_shift[i] < 0):
dec_bits[i] = w_dec
bias_shift[i] = 0
# layer wise
else:
bias_shift = layer_input_dec + weight_dec_shift - dec_bits
layer_output_shift = layer_input_dec + weight_dec_shift - layer_output_dec
if (bias_shift < 0):
dec_bits = weight_dec_shift
bias_shift = 0
# RNN layer's kernel dec, bias dec, bias shift, output shift
if(is_rnn_layer(layer)):
inp = layer.input.name.replace(':','/').split('/')[0]
layer_input_dec = layer_q_list[inp][0]
layer_output_dec = layer_q_list[layer.name][0]
#if (type(layer.cell) is SimpleRNNCell):
if ("kernel" in var_name and 'recurrent' not in var_name):
weight_dec_shift = dec_bits
elif ('bias' in var_name):
bias_shift = layer_input_dec + weight_dec_shift - dec_bits
layer_output_shift = layer_input_dec + weight_dec_shift - layer_output_dec # this is not valid
if (bias_shift < 0):
dec_bits = weight_dec_shift
bias_shift = 0
# now quantise them
if(type(layer) in [Conv2D, Conv1D, DepthwiseConv2D, Conv2DTranspose]):
if(type(layer) in [DepthwiseConv2D] and "kernel" in var_name):
old_shape = var_values.shape
var_values = quantize_data(var_values.reshape(var_values.shape[:2] + (-1,)),
dec_bits, axis=-1, per_axis=per_channel_quant) # convert to [h, w, out x mult]
var_values = var_values.reshape(old_shape) # convert the shape back to [h, w, out, mult]
elif(type(layer) in [Conv2DTranspose] and "kernel" in var_name):
var_values = quantize_data(var_values, dec_bits, axis=-2, per_axis=per_channel_quant) # [h, w, out, in]
else:
var_values = quantize_data(var_values, dec_bits, per_axis=per_channel_quant) # [h, w, in, out]
else:
var_values = quantize_data(var_values, dec_bits, per_axis=False)
# CHW format
if ('chw' in format):
if (is_lstm_layer(layer) or is_gru_layer(layer)): # currently we use 16 bit intermediate use reorder optimation
transposed_wts = np.transpose(var_values)
if('kernel' in var_name):
transposed_wts = convert_q7_q15_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
# dense and rnn still working under HWC format
elif ("dense" in var_name or is_rnn_layer(layer)) and "kernel" in var_name:
transposed_wts = np.transpose(var_values)
transposed_wts = convert_to_x4_q7_weights(np.reshape(transposed_wts, (transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
# all other kernels, bias stay the same
else:
transposed_wts = var_values
# HWC format (NNOM/CMSIS-NN use [out_ch, h, w, in_ch], in C order)
else:
if (len(var_values.shape) == 3): # 1D convolution layer weights
transposed_wts = np.transpose(var_values, (2, 0, 1))
elif (len(var_values.shape) == 4): # 2D convolution layer weights
if(type(layer) == Conv2DTranspose): # test
transposed_wts = np.transpose(var_values, (2, 0, 1, 3))
elif type(layer) == DepthwiseConv2D:
transposed_wts = var_values#np.transpose(var_values, (0, 1, 3, 2)) # [h, w, out, mult] test for multiplier
else:
transposed_wts = np.transpose(var_values, (3, 0, 1, 2))
elif(is_lstm_layer(layer) or is_gru_layer(layer)): # currently we use 16 bit intermediate, use reorder optimation
if('kernel' in var_name):
transposed_wts = np.transpose(var_values)
transposed_wts = convert_q7_q15_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
else: # bias will not need to be transposed (for GRU which has 2d bias)
transposed_wts = var_values
else: # fully connected layer weights or biases of any layer
# test, use opt weight reorder
transposed_wts = np.transpose(var_values)
if ("dense" in var_name or is_rnn_layer(layer)) and "kernel" in var_name: # and other RNN layers
transposed_wts = convert_to_x4_q7_weights(np.reshape(transposed_wts ,(transposed_wts.shape[0], transposed_wts.shape[1], 1, 1)))
with open(name, 'a') as f:
def write_weights(f, name, value):
f.write('#define ' + name + ' {')
value.tofile(f, sep=", ", format="%d")
f.write('}\n\n')
# weights or bias
write_weights(f, var_name.upper(), transposed_wts)
# dec bits
write_weights(f, var_name.upper()+'_DEC_BITS' , np.array(dec_bits))
# for test
if( "bias" in var_name):
f.write('#define ' + layer.name.upper() + '_BIAS_LSHIFT '+to_cstyle(bias_shift) +'\n\n')
#f.write('#define ' + layer.name.upper() + '_OUTPUT_DEC '+ to_cstyle(layer_output_dec)+'\n\n') # not here
f.write('#define ' + layer.name.upper() + '_OUTPUT_RSHIFT ' + to_cstyle(layer_output_shift)+'\n\n')
def generate_model(model, x_test, per_channel_quant=False, name='weights.h', format='hwc', quantize_method='max_min'):
"""
:param model:
:param x_test:
:param name:
:param format:
:param quantize_method: "max_min" or "kld"
:return:
"""
# get the quantize output range/format
layer_q_list = quantize_output(model, x_test, layer_offset=False, quantize_method=quantize_method)
# quantize weights and output shift
quantize_weights(model, per_channel_quant=per_channel_quant, name=name, format=format, layer_q_list=layer_q_list)
# now generate the model
if (type(model.layers[0]) != InputLayer):
L = [model.input] + model.layers
else:
L = model.layers
with open(name, 'a') as fp:
# generate the list of output
fp.write('\n/* output q format for each layer */\n')
for layer in L:
if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
iname = layer.name.split(':')[0]
else:
iname = layer.name
fp.write('#define %s_OUTPUT_DEC %s\n' % (iname.upper(), layer_q_list[iname][0]))
fp.write('#define %s_OUTPUT_OFFSET %s\n' % (iname.upper(), layer_q_list[iname][1]))
fp.write('\n/* bias shift and output shift for none-weighted layer */\n')
# generate output shift for the layers without weights (weighted layers were generated in quantize_weights)
for layer in model.layers:
if (is_shift_layer(layer)):
iname = layer.name.upper()
# add, sub
if ('add' in layer.name or 'subtract' in layer.name):
# only consider the first, they have been set to same in out_put_range()
inp = layer.input[0].name.replace(':', '/').split('/')[0].upper()
fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_DEC-{0}_OUTPUT_DEC)\n'.format(
iname, inp))
fp.write(
'#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(
iname))
# mult is different, Q3.4 * Q3.4 = Q6.8. if mult out is Q4.3, then shift (Q.4+q.4)-Q.3=5. Am I right?
elif ('multiply' in layer.name):
inp = layer.input[0].name.replace(':', '/').split('/')[0].upper()
fp.write('#define {0}_OUTPUT_RSHIFT ({1}_OUTPUT_DEC*2-{0}_OUTPUT_DEC)\n'.format(
iname, inp))
fp.write(
'#if {0}_OUTPUT_RSHIFT < 0\n#error {0}_OUTPUT_RSHIFT must be bigger than 0\n#endif\n'.format(
iname))
fp.write('\n/* tensors and configurations for each layer */\n')
LI = {}
ID = 0
def is_skipable_layer(layer):
# FIXME: add more that could be skiped
if ('lambda' in layer.name or
'dropout' in layer.name or
'gaussian_noise' in layer.name or
'batch_normalization' in layer.name
#or ('flatten' in layer.name and 'chw' not in format)
): # flatten layer can be skipped in HWC but needed in CHW
return True
return False
output_num = 0
for id, layer in enumerate(L):
if (is_skipable_layer(layer)):
inp = layer.input.name.replace(':', '/').split('/')[0]
LI[layer.name] = (LI[inp][0], layer)
else:
if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
LI[layer.name.split(':')[0]] = (ID, layer)
else:
LI[layer.name] = (ID, layer)
ID += 1
def gen_weight_tensor(w, per_axis):
var_cname = convert_tensor_name(w) + '_data'
dec_bits_name = convert_tensor_name(w).upper() + '_DEC_BITS'
fp.write(gen_values(var_cname, convert_tensor_name(w).upper()))
fp.write(gen_tensor(w, dec_bits=dec_bits_name, tensor_value=var_cname, per_axis=per_axis))
# output the config of all layer
if (type(layer) in [InputLayer] or 'input' in layer.name):
if(type(layer) == tf.Tensor):
raise Exception('Not yet support tensor as input/or Sequential model. '
'please use Input layer as your first layer in the model', layer.name, layer)
size = 1
for s in layer.input.shape[1:]:
size *= s if s is not None else 1
fp.write(gen_values('nnom_input_data', '{0}', size=str(size), dtype='static int8_t'))
fp.write(gen_tensor(layer.input, layer_q_list[layer.name][0], tensor_value='nnom_input_data', is_io_tensor=True))
fp.write(gen_io_config(layer, tensor_name=convert_tensor_name(layer.input)))
elif (type(layer) in [Conv2D, Conv1D, DepthwiseConv2D]):
for w in layer.weights:
gen_weight_tensor(w, per_axis=per_channel_quant)
fp.write(gen_conv2d_config(layer, layer.name.upper() +'_OUTPUT_RSHIFT', layer.name.upper() +'_BIAS_LSHIFT'))
elif (type(layer) in [Conv2DTranspose]):
for w in layer.weights:
gen_weight_tensor(w, per_axis=per_channel_quant)
fp.write(gen_conv2d_trans_config(layer, layer.name.upper() +'_OUTPUT_RSHIFT', layer.name.upper() +'_BIAS_LSHIFT'))
elif (type(layer) in [Dense]):
for w in layer.weights:
gen_weight_tensor(w, per_axis=False)
fp.write(gen_dense_config(layer, layer.name.upper() +'_OUTPUT_RSHIFT', layer.name.upper() +'_BIAS_LSHIFT'))
elif (type(layer) in [MaxPooling2D, AveragePooling2D, MaxPooling1D, AveragePooling1D]):
fp.write(gen_pooling_config(layer))
elif (type(layer) in [GlobalMaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling1D, GlobalAveragePooling1D]):
fp.write(gen_gl_pooling_config(layer))
elif (type(layer) in [Multiply, Add, Subtract]):
fp.write(gen_matrix_config(layer, output_shift_name=layer.name.upper()+'_OUTPUT_RSHIFT'))
elif (type(layer) in [ZeroPadding2D, ZeroPadding1D]):
fp.write(gen_zero_padding_config(layer))
elif (type(layer) in [Cropping2D, Cropping1D]):
fp.write(gen_cropping_config(layer))
elif (type(layer) in [Softmax]):
fp.write(gen_softmax_config(layer))
elif (type(layer) in [Flatten]):
fp.write(gen_flatten_config(layer))
elif (type(layer) in [Reshape]):
fp.write(gen_reshape_config(layer))
elif (type(layer) in [Concatenate]):
fp.write(gen_concat_config(layer))
elif (type(layer) in [Lambda]):
fp.write(gen_lambda_config(layer))
elif (type(layer) in [UpSampling2D, UpSampling1D]):
fp.write(gen_upsampling_config(layer))
elif(is_rnn_layer(layer)):
if(type(layer.cell) is SimpleRNNCell):
for w in layer.weights:
gen_weight_tensor(w, per_axis=False)
fp.write(gen_simple_cell_config(layer, layer_q_list['intermediate_'+layer.name]))
elif(type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
for w in layer.weights:
gen_weight_tensor(w, per_axis=False)
fp.write(gen_gru_cell_config(layer, layer_q_list['intermediate_'+layer.name]))
elif(type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
for w in layer.weights:
gen_weight_tensor(w, per_axis=False)
fp.write(gen_lstm_cell_config(layer, layer_q_list['intermediate_'+layer.name]))
fp.write(gen_rnn_config(layer))
# test, multiple output layer
if(len(layer.outbound_nodes) == 0):
size=1
for s in layer.output.shape[1:]:
size *= s if s is not None else 1
if(output_num == 0): # the first output or the only output
fp.write(gen_values('nnom_output_data', '{0}', size=str(size), dtype='static int8_t'))
fp.write(gen_output_config(layer, dec_bits=layer.name.upper() + '_OUTPUT_DEC', output_num=output_num, value_name='nnom_output_data'))
output_num += 1
else:
output_value_names = 'nnom_output_data'+str(output_num)
fp.write(gen_values(output_value_names, '{0}', size=str(size), dtype='static int8_t'))
fp.write(gen_output_config(layer, dec_bits=layer.name.upper() + '_OUTPUT_DEC', output_num=output_num, value_name=output_value_names))
output_num += 1
# # last layer, attach the additional nnom output layer
# if(id == len(L)-1):
# size=1
# for s in layer.output.shape[1:]:
# size *= s if s is not None else 1
# fp.write(gen_values('nnom_output_data', '{0}', size=str(size), dtype='static int8_t'))
# fp.write(gen_output_config(layer, dec_bits=layer.name.upper()+'_OUTPUT_DEC', value_name='nnom_output_data'))
# write version
fp.write('/* model version */\n')
fp.write('#define NNOM_MODEL_VERSION (10000*{0} + 100*{1} + {2})\n'.format(model_major_version, model_sub_version, model_reversion ))
# model
fp.write('\n/* nnom model */\n')
fp.write('static nnom_model_t* nnom_model_create(void)\n{\n')
fp.write('\tstatic nnom_model_t model;\n')
if (ID > 32):
fp.write('\tnnom_layer_t **layer = (nnom_layer_t**)malloc(sizeof(nnom_layer_t *)*%d);\n' % (ID + 1))
fp.write('\tif(NULL == layer) return NULL;\n')
else:
fp.write('\tnnom_layer_t* layer[%d];\n' % (ID + 1))
fp.write('\n\tcheck_model_version(NNOM_MODEL_VERSION);')
fp.write('\n\tnew_model(&model);\n\n')
# inverted order of output, very strange
output_num = (len(model.output) -1) if type(model.output) is list else 0
for layer in L:
if (is_skipable_layer(layer)):
continue
# FIXME: need a better solution to seperate the input 'tensor' from other layers
if (type(model.input) == tf.Tensor and type(model.layers[0]) != InputLayer):
id, _ = LI[layer.name.split(':')[0]]
else:
id, _ = LI[layer.name]
if ('input' in layer.name):
fp.write('\tlayer[%d] = input_s(&%s_config);\n' % (id, layer.name))
# convlutional
elif ('conv1d' in layer.name
or 'conv2d' in layer.name):
inp = layer_name_from_tensor(layer.input)
if('transpose' in layer.name):
fp.write('\tlayer[{0}] = model.hook(conv2d_trans_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
elif('depthwise' in layer.name):
fp.write('\tlayer[{0}] = model.hook(dw_conv2d_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
else:
fp.write('\tlayer[{0}] = model.hook(conv2d_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
elif ('activation' in layer.name):
inp = layer_name_from_tensor(layer.input)
cfg = layer.get_config()
if (cfg['activation'] == 'relu'):
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n' % (id, LI[inp][0]))
elif (cfg['activation'] == 'tanh'):
fp.write('\tlayer[%s] = model.active(act_hard_tanh(%s_OUTPUT_DEC), layer[%s]);\n' % (
id, inp.upper(), LI[inp][0]))
elif (cfg['activation'] == 'sigmoid'):
fp.write('\tlayer[%s] = model.active(act_sigmoid(%s_OUTPUT_DEC), layer[%s]);\n' % (
id, inp.upper(), LI[inp][0]))
elif (cfg['activation'] == 'hard_sigmoid'):
fp.write('\tlayer[%s] = model.active(act_hard_sigmoid(%s_OUTPUT_DEC), layer[%s]);\n' % (
id, inp.upper(), LI[inp][0]))
elif (cfg['activation'] == 'softmax'):
fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n' % (id, LI[inp][0]))
elif ('leaky_re_lu' in layer.name):
inp = layer_name_from_tensor(layer.input)
cfg = layer.get_config()
fp.write('\tlayer[%s] = model.active(act_leaky_relu(%ff), layer[%s]);\n' % (id, cfg["alpha"],LI[inp][0]))
elif ('re_lu' in layer.name):
inp = layer_name_from_tensor(layer.input)
cfg = layer.get_config()
if(cfg['max_value'] is None and cfg['negative_slope'] == 0 and cfg['threshold'] == 0):
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n' % (id, LI[inp][0]))
else:
if(cfg['max_value'] is None):
max_v = 'INFINITY '
else:
max_v = str(cfg['max_value'])
fp.write('\tlayer[%s] = model.active(act_adv_relu(%f,%s,%f), layer[%s]);\n'
% (id, cfg['negative_slope'], max_v, cfg['threshold'], LI[inp][0]))
# pooling
elif ('max_pooling' in layer.name):
inp = layer_name_from_tensor(layer.input)
if ('global' in layer.name):
fp.write('\tlayer[{0}] = model.hook(global_maxpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
else:
fp.write('\tlayer[{0}] = model.hook(maxpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
elif ('average_pooling' in layer.name):
inp = layer_name_from_tensor(layer.input)
if ('global' in layer.name):
fp.write('\tlayer[{0}] = model.hook(global_avgpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
else:
fp.write('\tlayer[{0}] = model.hook(avgpool_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
elif ('up_sampling' in layer.name):
inp = layer_name_from_tensor(layer.input)
fp.write('\tlayer[{0}] = model.hook(upsample_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
# zero padding
elif ('zero_padding' in layer.name):
inp = layer_name_from_tensor(layer.input)
fp.write('\tlayer[{0}] = model.hook(zeropadding_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
# Cropping
elif ('cropping' in layer.name):
inp = layer_name_from_tensor(layer.input)
fp.write('\tlayer[{0}] = model.hook(cropping_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
# others
elif ('flatten' in layer.name): # flatten is needed in CHW backend but not needed in HWC
inp = layer_name_from_tensor(layer.input)
fp.write('\tlayer[{0}] = model.hook(flatten_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
elif ('reshape' in layer.name): # flatten is needed in CHW backend but not needed in HWC
inp = layer_name_from_tensor(layer.input)
fp.write('\tlayer[{0}] = model.hook(reshape_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
elif ('concatenate' in layer.name):
inps = [layer_name_from_tensor(input) for input in layer.input]
inX = ''
for inp in inps:
inX += ' ,layer[%d]' % (LI[inp][0])
fp.write('\tlayer[%s] = model.mergex(concat_s(&%s_config), %s%s);\n' % (
id, layer.name, len(inps), inX))
elif ('add' in layer.name):
inps = [layer_name_from_tensor(input) for input in layer.input]
inX = ''
for inp in inps:
inX += ' ,layer[%d]' % (LI[inp][0])
fp.write('\tlayer[%s] = model.mergex(add_s(&%s_config), %s%s);\n' % (
id, layer.name, len(inps), inX))
elif ('subtract' in layer.name):
inps = [layer_name_from_tensor(input) for input in layer.input]
inX = ''
for inp in inps:
inX += ' ,layer[%d]' % (LI[inp][0])
fp.write('\tlayer[%s] = model.mergex(sub_s(&%s_config), %s%s);\n' % (
id, layer.name, len(inps), inX))
elif ('multiply' in layer.name):
inps = [layer_name_from_tensor(input) for input in layer.input]
inX = ''
for inp in inps:
inX += ' ,layer[%d]' % (LI[inp][0])
fp.write('\tlayer[%s] = model.mergex(mult_s(&%s_config), %s%s);\n' % (
id, layer.name, len(inps), inX))
elif ('dense' in layer.name):
inp = layer_name_from_tensor(layer.input)
fp.write('\tlayer[{0}] = model.hook(dense_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
elif ('softmax' in layer.name):
inp = layer_name_from_tensor(layer.input)
fp.write('\tlayer[{0}] = model.hook(softmax_s(&{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0]))
elif (is_rnn_layer(layer)):
inp = layer_name_from_tensor(layer.input)
line = '\tlayer[{0}] = model.hook(rnn_s(<rnn_cell>, &{1}_config), layer[{2}]);\n'.format(id, layer.name, LI[inp][0])
if (type(layer.cell) is SimpleRNNCell):
line = line.replace('<rnn_cell>', 'simple_cell_s(&%s_simple_cell_config)' %(layer.name))
elif (type(layer.cell) is GRUCell or 'gru' in layer.cell.name):
line = line.replace('<rnn_cell>', 'gru_cell_s(&%s_gru_cell_config)' % (layer.name))
elif (type(layer.cell) is LSTMCell or 'lstm' in layer.cell.name):
line = line.replace('<rnn_cell>', 'lstm_cell_s(&%s_lstm_cell_config)' % (layer.name))
fp.write(line)
else:
raise Exception('unsupported layer', layer.name, layer)
# test, multiple output layer (not yet working with multiple outputs)
if(len(layer.outbound_nodes) == 0):
fp.write('\tlayer[{0}] = model.hook(output_s(&{1}_config), layer[{2}]);\n'.format(id + 1, 'output'+str(output_num), LI[inp][0] + 1))
output_num -=1 # the num is inverted in keras, not a good solution yet.
"""
# temporary fixed for activations attached into layers in construction
def is_activation_attached(layer):
if(("Softmax" in layer.output.name and "softmax" not in layer.name)or
("Relu" in layer.output.name and "re_lu" not in layer.name) or
("Sigmoid" in layer.output.name and "sigmoid" not in layer.name) or
("Tanh" in layer.output.name and "tanh" not in layer.name)):
return True
return False
if "input" not in layer.name and is_activation_attached(layer):
inp = layer.output.name.replace(':', '/').split('/')[0]
cfg = layer.get_config()
if(cfg['activation'] == 'relu'):
fp.write('\tlayer[%s] = model.active(act_relu(), layer[%s]);\n'%(id, LI[inp][0]))
if(cfg['activation'] == 'tanh'):
fp.write('\tlayer[%s] = model.active(act_tanh(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
if(cfg['activation'] == 'sigmoid'):
fp.write('\tlayer[%s] = model.active(act_sigmoid(%s_OUTPUT_SHIFT), layer[%s]);\n'%(id, inp.upper(), LI[inp][0]))
elif(cfg['activation'] == 'softmax'):
fp.write('\tlayer[%s] = model.hook(Softmax(), layer[%s]);\n'%(id, LI[inp][0]))
"""
# generate final output layer
#fp.write('\tlayer[{0}] = model.hook(output_s(&{1}_config), layer[{2}]);\n'.format(id+1, 'output', LI[inp][0]+1))
fp.write('\tmodel_compile(&model, layer[0], layer[%s]);\n' % (id + 1))
if (ID > 32):
fp.write('\tfree(layer);\n')
fp.write('\treturn &model;\n}\n')
with open('.layer_q_list', 'w') as fp:
fp.write(str(layer_q_list))
def evaluate_model(model, x_test, y_test, running_time=False, to_file='evaluation.txt'):
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=2)
print('Test loss:', scores[0])
print('Top 1:', scores[1])
if(len(y_test.shape)>1):
bs = model.input.shape[0]
predictions = model.predict(x_test, batch_size=bs)
matrix = skmetrics.confusion_matrix(y_test.argmax(axis=1), predictions.argmax(axis=1))
print(matrix)
run_time = 0
if running_time:
# try to calculate the time
T = time.time()
bs = model.input.shape[0]
for i in range(10):
model.predict(x_test, batch_size=bs)
T = time.time() - T
run_time = round((T / 10 / x_test.shape[0] * 1000 * 1000), 2)
print("Runing time:",run_time , "us" )
#
with open(to_file, 'w') as f:
f.write("Runing time: "+ str(run_time) + "us" + "\n")
f.write('Test loss:'+ str(scores[0]) + "\n")
f.write('Top 1:'+ str(scores[1])+ "\n")
if (len(y_test.shape) > 1):
for row in matrix:
row.tofile(f, sep=',')
f.write("\n")
return scores
def f2q(d, Q):
'''To convert a number from floating point to Qm.n format:
1. Multiply the floating point number by 2n
2. Round to the nearest integer
'''
return np.round(d*2**Q)
def q2f(d, Q):
'''To convert a number from Qm.n format to floating point:
1. Convert the number to floating point as if it were an integer, in other words remove the binary point
2. Multiply by 2-n
'''
return d*2**-Q
def show_weights(w, name):
sz = 1
for s in w.shape:
sz = sz*s
aL = w.reshape(sz,)
MIN,MAX=min(aL),max(aL)
Q = int(np.ceil(np.log2(max(abs(MIN),abs(MAX)))))
Q = 7-Q
qL = f2q(aL,Q)
qL = q2f(qL,Q)
plt.figure(figsize=(18, 3))
plt.subplot(131)
plt.title(name)
plt.plot(aL)
plt.grid()
aL.sort()
plt.plot(aL,'r')
plt.grid()
plt.subplot(132)
plt.title('Q%s'%(Q))
qL.sort()
plt.plot(aL,'r')
plt.plot(qL,'g')
plt.grid()
plt.subplot(133)
plt.hist(aL,100)
plt.title('hist')
plt.grid()
plt.show()
def compare(a,b,name):
sz = 1
for s in a.shape:
sz = sz*s
aL = a.reshape(sz,)
bL = b.reshape(sz,)
assert(len(aL) == len(bL))
Z = list(zip(aL,bL))
Z.sort(key=lambda x: x[0])
aL1,bL1=zip(*Z)
plt.figure(figsize=(18, 3))
plt.subplot(131)
plt.plot(aL)
plt.plot(aL1,'r')
plt.grid()
plt.title('tf-%s'%(name))
plt.subplot(133)
plt.plot(bL1,'g')
plt.plot(aL1,'r')
plt.grid()
plt.title('compare')
plt.subplot(132)
bL1=list(bL1)
bL1.sort()
plt.plot(bL)
plt.plot(bL1,'g')
plt.grid()
plt.title('nn-%s'%(name))
plt.show()