Merge pull request #56 from Lyons-T/master

add new recall files
This commit is contained in:
若如意
2022-03-28 19:48:16 +08:00
committed by GitHub
3 changed files with 2926 additions and 0 deletions

View File

@@ -0,0 +1,450 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b06bec9d",
"metadata": {},
"outputs": [],
"source": [
"import gc\n",
"import os\n",
"import math\n",
"import pickle\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"from tqdm.auto import tqdm\n",
"from operator import itemgetter\n",
"\n",
"from sklearn.utils import shuffle\n",
"from collections import defaultdict\n",
"from metric import PrintMetric\n",
"\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"raw_data_path = 'D:/news-rec/dataset/raw_data'\n",
"new_data_path = 'D:/news-rec/dataset/recall_data'\n",
"\n",
"os.makedirs(new_data_path, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4479018f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " user_id 设备名称 操作系统 所在省 所在市 \\\n0 1000372820 TAS-AN00 Android 广东 广州 \n10 1001440812 iPad IOS NaN NaN \n16 1001771644 V1901A Android 陕西 宝鸡 \n17 1001773994 STK-AL00 Android 广东 河源 \n142 1017050854 DUB-AL00 Android 湖北 武汉 \n\n 年龄 \\\n0 A_0_24:0.404616,A_25_29:0.059027,A_30_39:0.516... \n10 A_0_24:0.312738,A_25_29:0.261741,A_30_39:0.268... \n16 A_0_24:0.445645,A_25_29:0.330315,A_30_39:0.153... \n17 A_0_24:0.497841,A_25_29:0.245965,A_30_39:0.219... \n142 A_0_24:0.008895,A_25_29:0.067247,A_30_39:0.824... \n\n 性别 \n0 female:0.051339,male:0.948661 \n10 female:0.907997,male:0.092003 \n16 female:0.049787,male:0.950213 \n17 female:0.117317,male:0.882683 \n142 female:0.519291,male:0.480709 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>user_id</th>\n <th>设备名称</th>\n <th>操作系统</th>\n <th>所在省</th>\n <th>所在市</th>\n <th>年龄</th>\n <th>性别</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1000372820</td>\n <td>TAS-AN00</td>\n <td>Android</td>\n <td>广东</td>\n <td>广州</td>\n <td>A_0_24:0.404616,A_25_29:0.059027,A_30_39:0.516...</td>\n <td>female:0.051339,male:0.948661</td>\n </tr>\n <tr>\n <th>10</th>\n <td>1001440812</td>\n <td>iPad</td>\n <td>IOS</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>A_0_24:0.312738,A_25_29:0.261741,A_30_39:0.268...</td>\n <td>female:0.907997,male:0.092003</td>\n </tr>\n <tr>\n <th>16</th>\n <td>1001771644</td>\n <td>V1901A</td>\n <td>Android</td>\n <td>陕西</td>\n <td>宝鸡</td>\n <td>A_0_24:0.445645,A_25_29:0.330315,A_30_39:0.153...</td>\n <td>female:0.049787,male:0.950213</td>\n </tr>\n <tr>\n <th>17</th>\n <td>1001773994</td>\n <td>STK-AL00</td>\n <td>Android</td>\n <td>广东</td>\n <td>河源</td>\n <td>A_0_24:0.497841,A_25_29:0.245965,A_30_39:0.219...</td>\n <td>female:0.117317,male:0.882683</td>\n </tr>\n <tr>\n <th>142</th>\n <td>1017050854</td>\n <td>DUB-AL00</td>\n <td>Android</td>\n <td>湖北</td>\n <td>武汉</td>\n <td>A_0_24:0.008895,A_25_29:0.067247,A_30_39:0.824...</td>\n <td>female:0.519291,male:0.480709</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_info = pd.read_csv(raw_data_path + '/user_info_5w.csv', sep='\\t', index_col=0)\n",
"user_info.columns = [\"user_id\", \"设备名称\", \"操作系统\", \"所在省\", \"所在市\", \"年龄\",\"性别\"]\n",
"\n",
"user_info.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "22d466d5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " item_id 标题 发文时间 图片数量 一级分类 \\\n0 361653323 疫情谣言粉碎机丨接种新冠疫苗后用麻药或致死?盘点最新疫情谣言,别被忽悠了 1624522285000 1 健康 \n1 426732705 实拍本田飞度:空间真大,8万出头工薪族可选,但内饰能忍? 1610808303000 9 汽车 \n2 430221183 搭载135kw电机比亚迪秦plus纯电动版外观更精致 1612581556000 2 汽车 \n3 441756326 【提车作业】不顾他人眼光帕萨特phev俘获30老男人浪子心 1618825835000 23 汽车 \n4 443485341 魏延有反骨之心都能重用,赵云忠心为什么却不被重用? 1619484501000 4 历史 \n\n 二级分类 关键词 \n0 健康/疾病防护治疗及西医用药 医生:14.760494,吸烟:16.474872,板蓝根:15.597788,板蓝根^^熏... \n1 汽车/买车 155n:8.979802,polo:7.951116,中控台:5.954278,中网:7.... \n2 汽车/买车 etc:12.055207,代表:8.878175,内饰:5.342025,刀片:9.453... \n3 汽车/买车 丰田凯美瑞:12.772149,充电器:8.394001,品牌:8.436843,城市:7.... \n4 历史/中国史 三国:8.979797,五虎将:13.072728,人才:7.532783,保镖:6.811... ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>item_id</th>\n <th>标题</th>\n <th>发文时间</th>\n <th>图片数量</th>\n <th>一级分类</th>\n <th>二级分类</th>\n <th>关键词</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>361653323</td>\n <td>疫情谣言粉碎机丨接种新冠疫苗后用麻药或致死?盘点最新疫情谣言,别被忽悠了</td>\n <td>1624522285000</td>\n <td>1</td>\n <td>健康</td>\n <td>健康/疾病防护治疗及西医用药</td>\n <td>医生:14.760494,吸烟:16.474872,板蓝根:15.597788,板蓝根^^熏...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>426732705</td>\n <td>实拍本田飞度:空间真大,8万出头工薪族可选,但内饰能忍?</td>\n <td>1610808303000</td>\n <td>9</td>\n <td>汽车</td>\n <td>汽车/买车</td>\n <td>155n:8.979802,polo:7.951116,中控台:5.954278,中网:7....</td>\n </tr>\n <tr>\n <th>2</th>\n <td>430221183</td>\n <td>搭载135kw电机比亚迪秦plus纯电动版外观更精致</td>\n <td>1612581556000</td>\n <td>2</td>\n <td>汽车</td>\n <td>汽车/买车</td>\n <td>etc:12.055207,代表:8.878175,内饰:5.342025,刀片:9.453...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>441756326</td>\n <td>【提车作业】不顾他人眼光帕萨特phev俘获30老男人浪子心</td>\n <td>1618825835000</td>\n <td>23</td>\n <td>汽车</td>\n <td>汽车/买车</td>\n <td>丰田凯美瑞:12.772149,充电器:8.394001,品牌:8.436843,城市:7....</td>\n </tr>\n <tr>\n <th>4</th>\n <td>443485341</td>\n <td>魏延有反骨之心都能重用,赵云忠心为什么却不被重用?</td>\n <td>1619484501000</td>\n <td>4</td>\n <td>历史</td>\n <td>历史/中国史</td>\n <td>三国:8.979797,五虎将:13.072728,人才:7.532783,保镖:6.811...</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"doc_info = pd.read_table(raw_data_path + '/doc_info.txt', sep='\\t')\n",
"doc_info.columns = [\"item_id\", \"标题\", \"发文时间\", \"图片数量\", \"一级分类\", \"二级分类\", \"关键词\"]\n",
"\n",
"item2cate = dict(zip(doc_info['item_id'], doc_info['一级分类']))\n",
"doc_info.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7cf3ff94",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " user_id item_id 展现时间 网路环境 刷新次数 展现位置 是否点击 消费时长(秒)\n0 1000014754 463510256 1624843756147 5 0 16 0 0\n1 1000014754 463852707 1624843756147 5 0 13 1 80\n2 1000014754 464757134 1625052999841 5 0 13 1 1050\n3 1000014754 464617167 1625052999841 5 0 16 1 286\n4 1000014754 465426190 1625382421168 5 0 5 0 0",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>user_id</th>\n <th>item_id</th>\n <th>展现时间</th>\n <th>网路环境</th>\n <th>刷新次数</th>\n <th>展现位置</th>\n <th>是否点击</th>\n <th>消费时长(秒)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1000014754</td>\n <td>463510256</td>\n <td>1624843756147</td>\n <td>5</td>\n <td>0</td>\n <td>16</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1000014754</td>\n <td>463852707</td>\n <td>1624843756147</td>\n <td>5</td>\n <td>0</td>\n <td>13</td>\n <td>1</td>\n <td>80</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1000014754</td>\n <td>464757134</td>\n <td>1625052999841</td>\n <td>5</td>\n <td>0</td>\n <td>13</td>\n <td>1</td>\n <td>1050</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1000014754</td>\n <td>464617167</td>\n <td>1625052999841</td>\n <td>5</td>\n <td>0</td>\n <td>16</td>\n <td>1</td>\n <td>286</td>\n </tr>\n <tr>\n <th>4</th>\n <td>1000014754</td>\n <td>465426190</td>\n <td>1625382421168</td>\n <td>5</td>\n <td>0</td>\n <td>5</td>\n <td>0</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_data = pd.read_csv(raw_data_path + '/train_data_5w.csv', sep='\\t', index_col=0)\n",
"all_data.columns = [\"user_id\", \"item_id\", \"展现时间\", \"网路环境\", \"刷新次数\", \"展现位置\", \"是否点击\", \"消费时长(秒)\"]\n",
"\n",
"all_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"data": {
"text/plain": " user_id item_id 展现时间 网路环境 刷新次数 展现位置 是否点击 消费时长(秒) \\\n0 1000014754 463510256 2021-06-28 01:29:16 5 0 16 0 0 \n1 1000014754 463852707 2021-06-28 01:29:16 5 0 13 1 80 \n2 1000014754 464757134 2021-06-30 11:36:39 5 0 13 1 1050 \n3 1000014754 464617167 2021-06-30 11:36:39 5 0 16 1 286 \n4 1000014754 465426190 2021-07-04 07:07:01 5 0 5 0 0 \n\n 展现时间_日期 \n0 28 \n1 28 \n2 30 \n3 30 \n4 4 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>user_id</th>\n <th>item_id</th>\n <th>展现时间</th>\n <th>网路环境</th>\n <th>刷新次数</th>\n <th>展现位置</th>\n <th>是否点击</th>\n <th>消费时长(秒)</th>\n <th>展现时间_日期</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1000014754</td>\n <td>463510256</td>\n <td>2021-06-28 01:29:16</td>\n <td>5</td>\n <td>0</td>\n <td>16</td>\n <td>0</td>\n <td>0</td>\n <td>28</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1000014754</td>\n <td>463852707</td>\n <td>2021-06-28 01:29:16</td>\n <td>5</td>\n <td>0</td>\n <td>13</td>\n <td>1</td>\n <td>80</td>\n <td>28</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1000014754</td>\n <td>464757134</td>\n <td>2021-06-30 11:36:39</td>\n <td>5</td>\n <td>0</td>\n <td>13</td>\n <td>1</td>\n <td>1050</td>\n <td>30</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1000014754</td>\n <td>464617167</td>\n <td>2021-06-30 11:36:39</td>\n <td>5</td>\n <td>0</td>\n <td>16</td>\n <td>1</td>\n <td>286</td>\n <td>30</td>\n </tr>\n <tr>\n <th>4</th>\n <td>1000014754</td>\n <td>465426190</td>\n <td>2021-07-04 07:07:01</td>\n <td>5</td>\n <td>0</td>\n <td>5</td>\n <td>0</td>\n <td>0</td>\n <td>4</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_data['展现时间'] = all_data['展现时间'].astype('str')\n",
"all_data['展现时间'] = all_data['展现时间'].apply(lambda x: int(x[:10]))\n",
"\n",
"all_data['展现时间'] = pd.to_datetime(all_data['展现时间'], unit='s', errors='coerce')\n",
"all_data['展现时间_日期'] = all_data['展现时间'].dt.day\n",
"\n",
"all_data.head()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 6,
"id": "73c9843e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "179"
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mode = 'debug'\n",
"\n",
"if mode == 'debug':\n",
" all_data = shuffle(all_data)\n",
" all_data.reset_index(drop=True)\n",
"\n",
" train_data = all_data[(all_data['展现时间_日期'] >= 5) & (all_data['展现时间_日期'] < 6)]\n",
" test_data = all_data.loc[all_data['展现时间_日期'] == 6, :]\n",
"else:\n",
" train_data = all_data[(all_data['展现时间_日期'] >= 1) & (all_data['展现时间_日期'] < 6)]\n",
" test_data = all_data.loc[all_data['展现时间_日期'] == 6, :]\n",
"\n",
"# 训练集按照时间排序\n",
"train_data.sort_values('展现时间', inplace=True)\n",
"\n",
"del all_data, doc_info, user_info\n",
"gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"15655\n",
"33664\n"
]
}
],
"source": [
"print(train_data['user_id'].nunique())\n",
"print(train_data['item_id'].nunique())"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [],
"source": [
"class ItemCF(object):\n",
" def __init__(self, his_data, item2cate):\n",
" self.user_set = set()\n",
" self.his_data = his_data\n",
" self.item2cate = item2cate\n",
"\n",
" self.item_sim_matrix = dict()\n",
" self.item_interacted_num = defaultdict(int)\n",
"\n",
" def calculate_similarity_matrix(self):\n",
" # his_data已经按照时间排序....\n",
" user2items = self.his_data.groupby('user_id')['item_id'].apply(list).reset_index()\n",
" # print(f'计算ItemCF第一阶段...')\n",
" pbar = tqdm(total=user2items.shape[0])\n",
" for idx, row in user2items.iterrows():\n",
" self.user_set.add(row['user_id'])\n",
" for idx1, item_1 in enumerate(row['item_id']):\n",
" self.item_interacted_num[item_1] += 1\n",
" self.item_sim_matrix.setdefault(item_1, {})\n",
" for idx2, item_2 in enumerate(row['item_id']):\n",
" if item_1 == item_2:\n",
" continue\n",
" self.item_sim_matrix[item_1].setdefault(item_2, 0)\n",
" # 新闻阅读可能具有连续性,后续阅读的新闻与前面阅读的新闻相似度更高\n",
" related_score = 1 if idx1 > idx2 else 0.8\n",
" # 如果二者类别相同,新闻之间的相似度更高\n",
" related_score *= 1 if item2cate.get(item_1, None) == item2cate.get(item_2, None) else 0.5\n",
"\n",
" # 活跃用户在计算物品之间相似度时,贡献小于非活跃用户\n",
" self.item_sim_matrix[item_1][item_2] += related_score / math.log(1 + len(row['item_id']))\n",
" pbar.update(1)\n",
" pbar.close()\n",
"\n",
" # 理论上,物品之间共现的用户越多,相似度越高\n",
" # 但是,热门物品与很多物品之间的相似度都很高\n",
" # print(f'计算ItemCF第二阶段...')\n",
" for item_1, related_items in tqdm(self.item_sim_matrix.items()):\n",
" for item_2, weight in related_items.items():\n",
" # 打压热门物品\n",
" self.item_sim_matrix[item_1][item_2] = \\\n",
" weight / math.sqrt(self.item_interacted_num[item_1] * self.item_interacted_num[item_2])\n",
"\n",
" def __call__(self, users, _n=50, _topk=20):\n",
" print(f'开始ItemCF召回: Recall@{topk}-Near@{_n}')\n",
" user2items = self.his_data.groupby('user_id')['item_id'].apply(list)\n",
" popular_items = [val[0] for val in sorted(\n",
" self.item_interacted_num.items(), key=lambda x: x[1], reverse=True)[:_topk]]\n",
"\n",
" user_rec = {}\n",
" for user_id in tqdm(users):\n",
" # 新用户,直接推荐热门物品\n",
" if user_id not in self.user_set:\n",
" user_rec[user_id] = popular_items\n",
" else:\n",
" rank = defaultdict(int)\n",
" his_items = user2items.loc[user_id]\n",
" # 遍历用户历史交互物品\n",
" for his_item in his_items:\n",
" # 选取与his_item相似度最高的_n个物品\n",
" for candidate_item, item_smi_score in sorted(self.item_sim_matrix[his_item].items(),\n",
" key=itemgetter(1), reverse=True)[:_n]:\n",
" # 如果推荐的物品已经被购买过了,是否纳入推荐(可跳过)\n",
" # if candidate_item in his_items:\n",
" # continue\n",
" rank[candidate_item] += item_smi_score\n",
" rec_items = [item[0] for item in sorted(rank.items(), key=itemgetter(1), reverse=True)[:_topk]]\n",
" # 如果推荐的物品不够,用热门物品进行填充\n",
" rec_items += popular_items[:topk-len(rec_items)]\n",
" user_rec[user_id] = rec_items\n",
"\n",
" return user_rec"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [],
"source": [
"icf_cls_path = os.path.join(new_data_path, 'item_cf')\n",
"os.makedirs(icf_cls_path, exist_ok=True)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [
{
"data": {
"text/plain": " 0%| | 0/15655 [00:00<?, ?it/s]",
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "d5c3c1e99e764b40a4dac12d60bc4b77"
}
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": " 0%| | 0/33664 [00:00<?, ?it/s]",
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9011a3e044c948f9850a19db02e464bf"
}
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"demo_icf_path = os.path.join(icf_cls_path, mode+'_ifc.pkl')\n",
"if os.path.exists(demo_icf_path):\n",
" with open(demo_icf_path, 'rb') as file:\n",
" demo_icf = pickle.loads(file.read())\n",
" file.close()\n",
"else:\n",
" demo_icf = ItemCF(train_data, item2cate)\n",
" demo_icf.calculate_similarity_matrix()\n",
" demo_icf_pkl = pickle.dumps(demo_icf)\n",
"\n",
" output_icf = open(demo_icf_path, 'wb')\n",
" output_icf.write(demo_icf_pkl)\n",
" output_icf.close()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 13,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"开始ItemCF召回: Recall@100-Near@50\n"
]
},
{
"data": {
"text/plain": " 0%| | 0/13792 [00:00<?, ?it/s]",
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "afe3cbc4db0f423c9a62c7bede9befa2"
}
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"n, topk = 50, 100\n",
"\n",
"# 召回\n",
"test_users = test_data['user_id'].unique()\n",
"icf_rec_result = demo_icf(test_users, n, topk)\n",
"\n",
"test_user_group = test_data.groupby('user_id')['item_id'].agg(list).reset_index()\n",
"test_pred = [icf_rec_result[user_id] for user_id in test_user_group['user_id']]\n",
"test_true = test_user_group['item_id'].to_list()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 14,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MAP@100: 0.016906571748779006\n",
"Recall@100: 0.15798311228206416\n",
"Precision@100: 0.027745069605568447\n",
"F1@100: 0.03914852311427278\n"
]
}
],
"source": [
"PrintMetric(test_true, test_pred, topk)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,413 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b06bec9d",
"metadata": {},
"outputs": [],
"source": [
"import gc\n",
"import os\n",
"import math\n",
"import pickle\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"from tqdm.auto import tqdm\n",
"from operator import itemgetter\n",
"\n",
"from sklearn.utils import shuffle\n",
"from collections import defaultdict\n",
"\n",
"from metric import PrintMetric\n",
"\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"raw_data_path = 'D:/news-rec/dataset/raw_data'\n",
"new_data_path = 'D:/news-rec/dataset/recall_data'\n",
"\n",
"os.makedirs(new_data_path, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4479018f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " user_id 设备名称 操作系统 所在省 所在市 \\\n0 1000372820 TAS-AN00 Android 广东 广州 \n10 1001440812 iPad IOS NaN NaN \n16 1001771644 V1901A Android 陕西 宝鸡 \n17 1001773994 STK-AL00 Android 广东 河源 \n142 1017050854 DUB-AL00 Android 湖北 武汉 \n\n 年龄 \\\n0 A_0_24:0.404616,A_25_29:0.059027,A_30_39:0.516... \n10 A_0_24:0.312738,A_25_29:0.261741,A_30_39:0.268... \n16 A_0_24:0.445645,A_25_29:0.330315,A_30_39:0.153... \n17 A_0_24:0.497841,A_25_29:0.245965,A_30_39:0.219... \n142 A_0_24:0.008895,A_25_29:0.067247,A_30_39:0.824... \n\n 性别 \n0 female:0.051339,male:0.948661 \n10 female:0.907997,male:0.092003 \n16 female:0.049787,male:0.950213 \n17 female:0.117317,male:0.882683 \n142 female:0.519291,male:0.480709 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>user_id</th>\n <th>设备名称</th>\n <th>操作系统</th>\n <th>所在省</th>\n <th>所在市</th>\n <th>年龄</th>\n <th>性别</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1000372820</td>\n <td>TAS-AN00</td>\n <td>Android</td>\n <td>广东</td>\n <td>广州</td>\n <td>A_0_24:0.404616,A_25_29:0.059027,A_30_39:0.516...</td>\n <td>female:0.051339,male:0.948661</td>\n </tr>\n <tr>\n <th>10</th>\n <td>1001440812</td>\n <td>iPad</td>\n <td>IOS</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>A_0_24:0.312738,A_25_29:0.261741,A_30_39:0.268...</td>\n <td>female:0.907997,male:0.092003</td>\n </tr>\n <tr>\n <th>16</th>\n <td>1001771644</td>\n <td>V1901A</td>\n <td>Android</td>\n <td>陕西</td>\n <td>宝鸡</td>\n <td>A_0_24:0.445645,A_25_29:0.330315,A_30_39:0.153...</td>\n <td>female:0.049787,male:0.950213</td>\n </tr>\n <tr>\n <th>17</th>\n <td>1001773994</td>\n <td>STK-AL00</td>\n <td>Android</td>\n <td>广东</td>\n <td>河源</td>\n <td>A_0_24:0.497841,A_25_29:0.245965,A_30_39:0.219...</td>\n <td>female:0.117317,male:0.882683</td>\n </tr>\n <tr>\n <th>142</th>\n <td>1017050854</td>\n <td>DUB-AL00</td>\n <td>Android</td>\n <td>湖北</td>\n <td>武汉</td>\n <td>A_0_24:0.008895,A_25_29:0.067247,A_30_39:0.824...</td>\n <td>female:0.519291,male:0.480709</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_info = pd.read_csv(raw_data_path + '/user_info_5w.csv', sep='\\t', index_col=0)\n",
"user_info.columns = [\"user_id\", \"设备名称\", \"操作系统\", \"所在省\", \"所在市\", \"年龄\",\"性别\"]\n",
"\n",
"user_info.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "22d466d5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " item_id 标题 发文时间 图片数量 一级分类 \\\n0 361653323 疫情谣言粉碎机丨接种新冠疫苗后用麻药或致死?盘点最新疫情谣言,别被忽悠了 1624522285000 1 健康 \n1 426732705 实拍本田飞度:空间真大,8万出头工薪族可选,但内饰能忍? 1610808303000 9 汽车 \n2 430221183 搭载135kw电机比亚迪秦plus纯电动版外观更精致 1612581556000 2 汽车 \n3 441756326 【提车作业】不顾他人眼光帕萨特phev俘获30老男人浪子心 1618825835000 23 汽车 \n4 443485341 魏延有反骨之心都能重用,赵云忠心为什么却不被重用? 1619484501000 4 历史 \n\n 二级分类 关键词 \n0 健康/疾病防护治疗及西医用药 医生:14.760494,吸烟:16.474872,板蓝根:15.597788,板蓝根^^熏... \n1 汽车/买车 155n:8.979802,polo:7.951116,中控台:5.954278,中网:7.... \n2 汽车/买车 etc:12.055207,代表:8.878175,内饰:5.342025,刀片:9.453... \n3 汽车/买车 丰田凯美瑞:12.772149,充电器:8.394001,品牌:8.436843,城市:7.... \n4 历史/中国史 三国:8.979797,五虎将:13.072728,人才:7.532783,保镖:6.811... ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>item_id</th>\n <th>标题</th>\n <th>发文时间</th>\n <th>图片数量</th>\n <th>一级分类</th>\n <th>二级分类</th>\n <th>关键词</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>361653323</td>\n <td>疫情谣言粉碎机丨接种新冠疫苗后用麻药或致死?盘点最新疫情谣言,别被忽悠了</td>\n <td>1624522285000</td>\n <td>1</td>\n <td>健康</td>\n <td>健康/疾病防护治疗及西医用药</td>\n <td>医生:14.760494,吸烟:16.474872,板蓝根:15.597788,板蓝根^^熏...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>426732705</td>\n <td>实拍本田飞度:空间真大,8万出头工薪族可选,但内饰能忍?</td>\n <td>1610808303000</td>\n <td>9</td>\n <td>汽车</td>\n <td>汽车/买车</td>\n <td>155n:8.979802,polo:7.951116,中控台:5.954278,中网:7....</td>\n </tr>\n <tr>\n <th>2</th>\n <td>430221183</td>\n <td>搭载135kw电机比亚迪秦plus纯电动版外观更精致</td>\n <td>1612581556000</td>\n <td>2</td>\n <td>汽车</td>\n <td>汽车/买车</td>\n <td>etc:12.055207,代表:8.878175,内饰:5.342025,刀片:9.453...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>441756326</td>\n <td>【提车作业】不顾他人眼光帕萨特phev俘获30老男人浪子心</td>\n <td>1618825835000</td>\n <td>23</td>\n <td>汽车</td>\n <td>汽车/买车</td>\n <td>丰田凯美瑞:12.772149,充电器:8.394001,品牌:8.436843,城市:7....</td>\n </tr>\n <tr>\n <th>4</th>\n <td>443485341</td>\n <td>魏延有反骨之心都能重用,赵云忠心为什么却不被重用?</td>\n <td>1619484501000</td>\n <td>4</td>\n <td>历史</td>\n <td>历史/中国史</td>\n <td>三国:8.979797,五虎将:13.072728,人才:7.532783,保镖:6.811...</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"doc_info = pd.read_table(raw_data_path + '/doc_info.txt', sep='\\t')\n",
"doc_info.columns = [\"item_id\", \"标题\", \"发文时间\", \"图片数量\", \"一级分类\", \"二级分类\", \"关键词\"]\n",
"\n",
"doc_info.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7cf3ff94",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " user_id item_id 展现时间 网路环境 刷新次数 展现位置 是否点击 消费时长(秒)\n0 1000014754 463510256 1624843756147 5 0 16 0 0\n1 1000014754 463852707 1624843756147 5 0 13 1 80\n2 1000014754 464757134 1625052999841 5 0 13 1 1050\n3 1000014754 464617167 1625052999841 5 0 16 1 286\n4 1000014754 465426190 1625382421168 5 0 5 0 0",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>user_id</th>\n <th>item_id</th>\n <th>展现时间</th>\n <th>网路环境</th>\n <th>刷新次数</th>\n <th>展现位置</th>\n <th>是否点击</th>\n <th>消费时长(秒)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1000014754</td>\n <td>463510256</td>\n <td>1624843756147</td>\n <td>5</td>\n <td>0</td>\n <td>16</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1000014754</td>\n <td>463852707</td>\n <td>1624843756147</td>\n <td>5</td>\n <td>0</td>\n <td>13</td>\n <td>1</td>\n <td>80</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1000014754</td>\n <td>464757134</td>\n <td>1625052999841</td>\n <td>5</td>\n <td>0</td>\n <td>13</td>\n <td>1</td>\n <td>1050</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1000014754</td>\n <td>464617167</td>\n <td>1625052999841</td>\n <td>5</td>\n <td>0</td>\n <td>16</td>\n <td>1</td>\n <td>286</td>\n </tr>\n <tr>\n <th>4</th>\n <td>1000014754</td>\n <td>465426190</td>\n <td>1625382421168</td>\n <td>5</td>\n <td>0</td>\n <td>5</td>\n <td>0</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_data = pd.read_csv(raw_data_path + '/train_data_5w.csv', sep='\\t', index_col=0)\n",
"all_data.columns = [\"user_id\", \"item_id\", \"展现时间\", \"网路环境\", \"刷新次数\", \"展现位置\", \"是否点击\", \"消费时长(秒)\"]\n",
"\n",
"all_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"data": {
"text/plain": " user_id item_id 展现时间 网路环境 刷新次数 展现位置 是否点击 消费时长(秒) \\\n0 1000014754 463510256 2021-06-28 01:29:16 5 0 16 0 0 \n1 1000014754 463852707 2021-06-28 01:29:16 5 0 13 1 80 \n2 1000014754 464757134 2021-06-30 11:36:39 5 0 13 1 1050 \n3 1000014754 464617167 2021-06-30 11:36:39 5 0 16 1 286 \n4 1000014754 465426190 2021-07-04 07:07:01 5 0 5 0 0 \n\n 展现时间_日期 \n0 28 \n1 28 \n2 30 \n3 30 \n4 4 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>user_id</th>\n <th>item_id</th>\n <th>展现时间</th>\n <th>网路环境</th>\n <th>刷新次数</th>\n <th>展现位置</th>\n <th>是否点击</th>\n <th>消费时长(秒)</th>\n <th>展现时间_日期</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1000014754</td>\n <td>463510256</td>\n <td>2021-06-28 01:29:16</td>\n <td>5</td>\n <td>0</td>\n <td>16</td>\n <td>0</td>\n <td>0</td>\n <td>28</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1000014754</td>\n <td>463852707</td>\n <td>2021-06-28 01:29:16</td>\n <td>5</td>\n <td>0</td>\n <td>13</td>\n <td>1</td>\n <td>80</td>\n <td>28</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1000014754</td>\n <td>464757134</td>\n <td>2021-06-30 11:36:39</td>\n <td>5</td>\n <td>0</td>\n <td>13</td>\n <td>1</td>\n <td>1050</td>\n <td>30</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1000014754</td>\n <td>464617167</td>\n <td>2021-06-30 11:36:39</td>\n <td>5</td>\n <td>0</td>\n <td>16</td>\n <td>1</td>\n <td>286</td>\n <td>30</td>\n </tr>\n <tr>\n <th>4</th>\n <td>1000014754</td>\n <td>465426190</td>\n <td>2021-07-04 07:07:01</td>\n <td>5</td>\n <td>0</td>\n <td>5</td>\n <td>0</td>\n <td>0</td>\n <td>4</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_data['展现时间'] = all_data['展现时间'].astype('str')\n",
"all_data['展现时间'] = all_data['展现时间'].apply(lambda x: int(x[:10]))\n",
"\n",
"all_data['展现时间'] = pd.to_datetime(all_data['展现时间'], unit='s', errors='coerce')\n",
"all_data['展现时间_日期'] = all_data['展现时间'].dt.day\n",
"\n",
"all_data.head()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 6,
"id": "73c9843e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "75"
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mode = 'debug'\n",
"\n",
"if mode == 'debug':\n",
" all_data = shuffle(all_data)\n",
" all_data.reset_index(drop=True)\n",
"\n",
" train_data = all_data[(all_data['展现时间_日期'] >= 5) & (all_data['展现时间_日期'] < 6)]\n",
" test_data = all_data.loc[all_data['展现时间_日期'] == 6, :]\n",
"else:\n",
" train_data = all_data[(all_data['展现时间_日期'] >= 1) & (all_data['展现时间_日期'] < 6)]\n",
" test_data = all_data.loc[all_data['展现时间_日期'] == 6, :]\n",
"\n",
"del all_data, doc_info, user_info\n",
"gc.collect()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"15655\n",
"33664\n"
]
}
],
"source": [
"print(train_data['user_id'].nunique())\n",
"print(train_data['item_id'].nunique())"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 8,
"id": "345cc0d2",
"metadata": {},
"outputs": [],
"source": [
"class UserCF(object):\n",
" def __init__(self, his_data):\n",
" self.user_set = set()\n",
" self.item_set = set()\n",
"\n",
" self.his_data = his_data\n",
" self.user_sim_matrix = dict()\n",
" self.user_interacted_num = defaultdict(int)\n",
" self.item_interacted_num = defaultdict(int) # 热门推荐时会用到\n",
"\n",
" def calculate_similarity_matrix(self):\n",
" item2users = self.his_data.groupby('item_id')['user_id'].apply(list).reset_index()\n",
"\n",
" # print(f'计算ItemCF第一阶段...')\n",
" pbar = tqdm(total=item2users.shape[0])\n",
" for idx, row in item2users.iterrows():\n",
" self.item_set.add(row['item_id'])\n",
" self.user_set.update(row['user_id'])\n",
" self.item_interacted_num[row['item_id']] += len(row['user_id'])\n",
" for idx1, user_1 in enumerate(row['user_id']):\n",
" self.user_interacted_num[user_1] += 1\n",
" self.user_sim_matrix.setdefault(user_1, {})\n",
" for idx2, user_2 in enumerate(row['user_id']):\n",
" if user_1 == user_2:\n",
" continue\n",
" self.user_sim_matrix[user_1].setdefault(user_2, 0)\n",
" # 热门物品用在计算用户之间相似度时,贡献小于非热门物品\n",
" self.user_sim_matrix[user_1][user_2] += 1 / math.log(1 + len(row['user_id']))\n",
" pbar.update(1)\n",
" pbar.close()\n",
"\n",
" # 理论上,用户之间共现的物品越多,相似度越高\n",
" # 但是,活跃用户与很多用户之间的相似度都很高\n",
" print(f'计算UserCF第二阶段...')\n",
" for user_1, related_users in tqdm(self.user_sim_matrix.items()):\n",
" for user_2, weight in related_users.items():\n",
" # 打压活跃用户\n",
" self.user_sim_matrix[user_1][user_2] =\\\n",
" weight / math.sqrt(self.user_interacted_num[user_1] * self.user_interacted_num[user_2])\n",
"\n",
" def __call__(self, users, _n=50, _topk=20):\n",
" print(f'开始ItemCF召回: Recall@{topk}-Near@{_n}')\n",
" user2items = self.his_data.groupby('user_id')['item_id'].apply(list)\n",
" popular_items = [val[0] for val in sorted(\n",
" self.item_interacted_num.items(), key=lambda x: x[1], reverse=True)[:_topk]]\n",
"\n",
" user_rec = {}\n",
" for user_id in tqdm(users):\n",
" # 新用户,直接推荐热门物品\n",
" if user_id not in self.user_set:\n",
" user_rec[user_id] = popular_items\n",
" else:\n",
" rank = defaultdict(int)\n",
" for relate_user, user_smi_score in sorted(self.user_sim_matrix[user_id].items(),\n",
" key=itemgetter(1), reverse=True)[:_n]:\n",
" for candidate_item in user2items.loc[relate_user]:\n",
" # if candidate_item in user2items.loc[user_id]:\n",
" # continue\n",
" rank[candidate_item] += user_smi_score\n",
" rec_items = [item[0] for item in sorted(rank.items(), key=itemgetter(1), reverse=True)[:_topk]]\n",
" # 如果推荐的物品不够,用热门物品进行填充\n",
" rec_items += popular_items[:topk-len(rec_items)]\n",
" user_rec[user_id] = rec_items\n",
"\n",
" return user_rec"
]
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [],
"source": [
"ucf_cls_path = os.path.join(new_data_path, 'user_cf')\n",
"os.makedirs(ucf_cls_path, exist_ok=True)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [],
"source": [
"demo_ucf_path = os.path.join(ucf_cls_path, mode+'_ufc.pkl')\n",
"\n",
"if os.path.exists(demo_ucf_path):\n",
" with open(demo_ucf_path, 'rb') as file:\n",
" demo_ucf = pickle.loads(file.read())\n",
" file.close()\n",
"else:\n",
" demo_ucf = UserCF(train_data)\n",
" demo_ucf.calculate_similarity_matrix()\n",
" demo_ucf_pkl = pickle.dumps(demo_ucf)\n",
"\n",
" output_ucf = open(demo_ucf_path, 'wb')\n",
" output_ucf.write(demo_ucf_pkl)\n",
" output_ucf.close()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 11,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"开始ItemCF召回: Recall@100-Near@50\n"
]
},
{
"data": {
"text/plain": " 0%| | 0/13792 [00:00<?, ?it/s]",
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "e7e74728073f45ba9d21802109555731"
}
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"n, topk = 50, 100\n",
"\n",
"# 召回\n",
"test_users = test_data['user_id'].unique()\n",
"icf_rec_result = demo_ucf(test_users, n, topk)\n",
"\n",
"test_user_group = test_data.groupby('user_id')['item_id'].agg(list).reset_index()\n",
"test_pred = [icf_rec_result[user_id] for user_id in test_user_group['user_id']]\n",
"test_true = test_user_group['item_id'].to_list()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MAP@100: 0.012998163356723474\n",
"Recall@100: 0.14660816973054847\n",
"Precision@100: 0.02628625290023202\n",
"F1@100: 0.036618405618781665\n"
]
}
],
"source": [
"PrintMetric(test_true, test_pred, topk)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}