From ecbdf032c7abd7d8320c1c72c58d1bd8efae7579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9F=AF=E5=8D=97=E9=81=93=E5=B0=94?= <68650142+Evan-wyl@users.noreply.github.com> Date: Fri, 24 Sep 2021 16:14:40 +0800 Subject: [PATCH] Add files via upload --- codes/FM.py | 55 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/codes/FM.py b/codes/FM.py index 3ac1a730..9ec61f43 100644 --- a/codes/FM.py +++ b/codes/FM.py @@ -1,5 +1,5 @@ import pandas as pd -import numpy as np +import numpy as np from tensorflow.keras import * from tensorflow.keras.layers import * @@ -12,6 +12,7 @@ from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from tqdm import tqdm + # dense特征取对数  sparse特征进行类别编码 def process_feat(data, dense_feats, sparse_feats): df = data.copy() @@ -19,56 +20,64 @@ def process_feat(data, dense_feats, sparse_feats): df_dense = df[dense_feats].fillna(0.0) for f in tqdm(dense_feats): df_dense[f] = df_dense[f].apply(lambda x: np.log(1 + x) if x > -1 else -1) - + # sparse df_sparse = df[sparse_feats].fillna('-1') for f in tqdm(sparse_feats): lbe = LabelEncoder() df_sparse[f] = lbe.fit_transform(df_sparse[f]) - - df_new = pd.concat([df_dense, df_sparse], axis=1) + + df_sparse_arr = [] + for f in tqdm(sparse_feats): + data_new = pd.get_dummies(df_sparse.loc[:, f].values) + data_new.columns = [f + "_{}".format(i) for i in range(data_new.shape[1])] + df_sparse_arr.append(data_new) + + df_new = pd.concat([df_dense] + df_sparse_arr, axis=1) return df_new + # FM 特征组合层 class crossLayer(layers.Layer): - def __init__(self,input_dim, output_dim=10, **kwargs): + def __init__(self, input_dim, output_dim=10, **kwargs): super(crossLayer, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = output_dim # 定义交叉特征的权重 - self.kernel = self.add_weight(name='kernel', - shape=(self.input_dim, self.output_dim), - initializer='glorot_uniform', - trainable=True) - - def call(self, x): # 对照上述公式中的二次项优化公式一起理解 + self.kernel = self.add_weight(name='kernel', + shape=(self.input_dim, self.output_dim), + initializer='glorot_uniform', + trainable=True) + + def call(self, x): # 对照上述公式中的二次项优化公式一起理解 a = K.pow(K.dot(x, self.kernel), 2) b = K.dot(K.pow(x, 2), K.pow(self.kernel, 2)) - return 0.5 * K.mean(a-b, 1, keepdims=True) + return 0.5 * K.mean(a - b, 1, keepdims=True) + # 定义FM模型 def FM(feature_dim): - inputs = Input(shape=(feature_dim, )) - + inputs = Input(shape=(feature_dim,)) + # 一阶特征 - linear = Dense(units=1, - kernel_regularizer=regularizers.l2(0.01), + linear = Dense(units=1, + kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01))(inputs) - + # 二阶特征 cross = crossLayer(feature_dim)(inputs) add = Add()([linear, cross]) # 将一阶特征与二阶特征相加构建FM模型 - - pred = Activation('sigmoid')(add) + + pred = Dense(units=1, activation="sigmoid")(add) model = Model(inputs=inputs, outputs=pred) - - model.summary() + + model.summary() model.compile(loss='binary_crossentropy', optimizer=optimizers.Adam(), metrics=['binary_accuracy']) - - return model + + return model # 读取数据