Add files via upload

This commit is contained in:
柯南道尔
2021-09-24 16:14:40 +08:00
committed by GitHub
parent 43829a3c6a
commit ecbdf032c7

View File

@@ -1,5 +1,5 @@
import pandas as pd
import numpy as np
import numpy as np
from tensorflow.keras import *
from tensorflow.keras.layers import *
@@ -12,6 +12,7 @@ from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
# dense特征取对数  sparse特征进行类别编码
def process_feat(data, dense_feats, sparse_feats):
df = data.copy()
@@ -19,56 +20,64 @@ def process_feat(data, dense_feats, sparse_feats):
df_dense = df[dense_feats].fillna(0.0)
for f in tqdm(dense_feats):
df_dense[f] = df_dense[f].apply(lambda x: np.log(1 + x) if x > -1 else -1)
# sparse
df_sparse = df[sparse_feats].fillna('-1')
for f in tqdm(sparse_feats):
lbe = LabelEncoder()
df_sparse[f] = lbe.fit_transform(df_sparse[f])
df_new = pd.concat([df_dense, df_sparse], axis=1)
df_sparse_arr = []
for f in tqdm(sparse_feats):
data_new = pd.get_dummies(df_sparse.loc[:, f].values)
data_new.columns = [f + "_{}".format(i) for i in range(data_new.shape[1])]
df_sparse_arr.append(data_new)
df_new = pd.concat([df_dense] + df_sparse_arr, axis=1)
return df_new
# FM 特征组合层
class crossLayer(layers.Layer):
def __init__(self,input_dim, output_dim=10, **kwargs):
def __init__(self, input_dim, output_dim=10, **kwargs):
super(crossLayer, self).__init__(**kwargs)
self.input_dim = input_dim
self.output_dim = output_dim
# 定义交叉特征的权重
self.kernel = self.add_weight(name='kernel',
shape=(self.input_dim, self.output_dim),
initializer='glorot_uniform',
trainable=True)
def call(self, x): # 对照上述公式中的二次项优化公式一起理解
self.kernel = self.add_weight(name='kernel',
shape=(self.input_dim, self.output_dim),
initializer='glorot_uniform',
trainable=True)
def call(self, x): # 对照上述公式中的二次项优化公式一起理解
a = K.pow(K.dot(x, self.kernel), 2)
b = K.dot(K.pow(x, 2), K.pow(self.kernel, 2))
return 0.5 * K.mean(a-b, 1, keepdims=True)
return 0.5 * K.mean(a - b, 1, keepdims=True)
# 定义FM模型
def FM(feature_dim):
inputs = Input(shape=(feature_dim, ))
inputs = Input(shape=(feature_dim,))
# 一阶特征
linear = Dense(units=1,
kernel_regularizer=regularizers.l2(0.01),
linear = Dense(units=1,
kernel_regularizer=regularizers.l2(0.01),
bias_regularizer=regularizers.l2(0.01))(inputs)
# 二阶特征
cross = crossLayer(feature_dim)(inputs)
add = Add()([linear, cross]) # 将一阶特征与二阶特征相加构建FM模型
pred = Activation('sigmoid')(add)
pred = Dense(units=1, activation="sigmoid")(add)
model = Model(inputs=inputs, outputs=pred)
model.summary()
model.summary()
model.compile(loss='binary_crossentropy',
optimizer=optimizers.Adam(),
metrics=['binary_accuracy'])
return model
return model
# 读取数据