修改配置文件
This commit is contained in:
6
codes/news_recsys/news_rec_server/conf/proj_path.py
Normal file
6
codes/news_recsys/news_rec_server/conf/proj_path.py
Normal file
@@ -0,0 +1,6 @@
|
||||
import os
|
||||
|
||||
home_path = os.environ['HOME']
|
||||
proj_path = home_path + "/news_rec_server/"
|
||||
|
||||
stop_words_path = proj_path + "conf/stop_words.txt"
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,20 +0,0 @@
|
||||
2021-12-04-00-10-21
|
||||
run update_new_items success.
|
||||
update_dynamic_feature_protrail success.
|
||||
delete RedisProtrail ...
|
||||
run update_redis_mongo_protrail_data success.
|
||||
process_material success.
|
||||
process_user.py success.
|
||||
news detail info are saved in redis db.
|
||||
update_redis success.
|
||||
|
||||
2021-12-05-00-14-24
|
||||
run update_new_items success.
|
||||
update_dynamic_feature_protrail success.
|
||||
delete RedisProtrail ...
|
||||
run update_redis_mongo_protrail_data success.
|
||||
process_material success.
|
||||
process_user.py success.
|
||||
news detail info are saved in redis db.
|
||||
update_redis success.
|
||||
|
||||
@@ -1,151 +0,0 @@
|
||||
d04ec960-fb54-44c4-93d8-27aee82a14a5
|
||||
9eb67338-c4dd-4fab-b9f6-dd3d1f635078
|
||||
8e744b2e-283e-4880-a35d-010e22f9b6d1
|
||||
64a9131f-7bef-4026-af19-a437258b698b
|
||||
5735d3ba-2ae7-44b0-87b1-a4212042dfd5
|
||||
2ec76526-1734-4631-85d5-38b53c289724
|
||||
2631c157-4bd1-469e-a69a-5cc40d56087e
|
||||
0ed4e74d-5133-42fe-b9e9-c2f4a217aae6
|
||||
e6feadd0-b0ca-4dad-9cf0-e51d59208741
|
||||
de8f34ed-894f-454a-8af5-498cb5bfa416
|
||||
d6bfbcb5-e2aa-43af-85c1-a53776ee55da
|
||||
c83f6e63-3614-46d4-9c3b-56acad2c6053
|
||||
ae39b902-7e4c-4392-972d-97d876e09802
|
||||
a2b457b0-232f-4175-a618-08bf257bff13
|
||||
8ce201a5-a59a-45e2-9c80-d1530213dd76
|
||||
5f52e821-b2f1-4328-85f0-62bc8e0b36e7
|
||||
328a2cc0-89bf-4626-b9ee-f3ee4c6873f8
|
||||
30fb8ac3-7cc8-4666-9aac-c66cd4cd8e20
|
||||
148e8b52-5407-4545-9c5a-1745236f8139
|
||||
06ab8ab1-0170-4fca-8f7a-900a82872378
|
||||
f9f4c879-005a-4d7a-827e-099666396bd4
|
||||
d04ec960-fb54-44c4-93d8-27aee82a14a5
|
||||
9eb67338-c4dd-4fab-b9f6-dd3d1f635078
|
||||
8e744b2e-283e-4880-a35d-010e22f9b6d1
|
||||
64a9131f-7bef-4026-af19-a437258b698b
|
||||
5735d3ba-2ae7-44b0-87b1-a4212042dfd5
|
||||
2ec76526-1734-4631-85d5-38b53c289724
|
||||
2631c157-4bd1-469e-a69a-5cc40d56087e
|
||||
0ed4e74d-5133-42fe-b9e9-c2f4a217aae6
|
||||
e6feadd0-b0ca-4dad-9cf0-e51d59208741
|
||||
de8f34ed-894f-454a-8af5-498cb5bfa416
|
||||
d6bfbcb5-e2aa-43af-85c1-a53776ee55da
|
||||
c83f6e63-3614-46d4-9c3b-56acad2c6053
|
||||
ae39b902-7e4c-4392-972d-97d876e09802
|
||||
a2b457b0-232f-4175-a618-08bf257bff13
|
||||
8ce201a5-a59a-45e2-9c80-d1530213dd76
|
||||
5f52e821-b2f1-4328-85f0-62bc8e0b36e7
|
||||
328a2cc0-89bf-4626-b9ee-f3ee4c6873f8
|
||||
30fb8ac3-7cc8-4666-9aac-c66cd4cd8e20
|
||||
148e8b52-5407-4545-9c5a-1745236f8139
|
||||
06ab8ab1-0170-4fca-8f7a-900a82872378
|
||||
f9f4c879-005a-4d7a-827e-099666396bd4
|
||||
d04ec960-fb54-44c4-93d8-27aee82a14a5
|
||||
9eb67338-c4dd-4fab-b9f6-dd3d1f635078
|
||||
8e744b2e-283e-4880-a35d-010e22f9b6d1
|
||||
64a9131f-7bef-4026-af19-a437258b698b
|
||||
5735d3ba-2ae7-44b0-87b1-a4212042dfd5
|
||||
2ec76526-1734-4631-85d5-38b53c289724
|
||||
2631c157-4bd1-469e-a69a-5cc40d56087e
|
||||
0ed4e74d-5133-42fe-b9e9-c2f4a217aae6
|
||||
e6feadd0-b0ca-4dad-9cf0-e51d59208741
|
||||
de8f34ed-894f-454a-8af5-498cb5bfa416
|
||||
d6bfbcb5-e2aa-43af-85c1-a53776ee55da
|
||||
c83f6e63-3614-46d4-9c3b-56acad2c6053
|
||||
ae39b902-7e4c-4392-972d-97d876e09802
|
||||
a2b457b0-232f-4175-a618-08bf257bff13
|
||||
8ce201a5-a59a-45e2-9c80-d1530213dd76
|
||||
5f52e821-b2f1-4328-85f0-62bc8e0b36e7
|
||||
328a2cc0-89bf-4626-b9ee-f3ee4c6873f8
|
||||
30fb8ac3-7cc8-4666-9aac-c66cd4cd8e20
|
||||
148e8b52-5407-4545-9c5a-1745236f8139
|
||||
06ab8ab1-0170-4fca-8f7a-900a82872378
|
||||
f9f4c879-005a-4d7a-827e-099666396bd4
|
||||
d04ec960-fb54-44c4-93d8-27aee82a14a5
|
||||
9eb67338-c4dd-4fab-b9f6-dd3d1f635078
|
||||
8e744b2e-283e-4880-a35d-010e22f9b6d1
|
||||
64a9131f-7bef-4026-af19-a437258b698b
|
||||
5735d3ba-2ae7-44b0-87b1-a4212042dfd5
|
||||
2ec76526-1734-4631-85d5-38b53c289724
|
||||
2631c157-4bd1-469e-a69a-5cc40d56087e
|
||||
0ed4e74d-5133-42fe-b9e9-c2f4a217aae6
|
||||
e6feadd0-b0ca-4dad-9cf0-e51d59208741
|
||||
de8f34ed-894f-454a-8af5-498cb5bfa416
|
||||
d6bfbcb5-e2aa-43af-85c1-a53776ee55da
|
||||
c83f6e63-3614-46d4-9c3b-56acad2c6053
|
||||
ae39b902-7e4c-4392-972d-97d876e09802
|
||||
a2b457b0-232f-4175-a618-08bf257bff13
|
||||
8ce201a5-a59a-45e2-9c80-d1530213dd76
|
||||
5f52e821-b2f1-4328-85f0-62bc8e0b36e7
|
||||
328a2cc0-89bf-4626-b9ee-f3ee4c6873f8
|
||||
30fb8ac3-7cc8-4666-9aac-c66cd4cd8e20
|
||||
148e8b52-5407-4545-9c5a-1745236f8139
|
||||
06ab8ab1-0170-4fca-8f7a-900a82872378
|
||||
f9f4c879-005a-4d7a-827e-099666396bd4
|
||||
c1ea3624-7e16-41e3-91ca-5f2237c90016
|
||||
d04ec960-fb54-44c4-93d8-27aee82a14a5
|
||||
9eb67338-c4dd-4fab-b9f6-dd3d1f635078
|
||||
8e744b2e-283e-4880-a35d-010e22f9b6d1
|
||||
64a9131f-7bef-4026-af19-a437258b698b
|
||||
5735d3ba-2ae7-44b0-87b1-a4212042dfd5
|
||||
2ec76526-1734-4631-85d5-38b53c289724
|
||||
2631c157-4bd1-469e-a69a-5cc40d56087e
|
||||
0ed4e74d-5133-42fe-b9e9-c2f4a217aae6
|
||||
e6feadd0-b0ca-4dad-9cf0-e51d59208741
|
||||
de8f34ed-894f-454a-8af5-498cb5bfa416
|
||||
d6bfbcb5-e2aa-43af-85c1-a53776ee55da
|
||||
c83f6e63-3614-46d4-9c3b-56acad2c6053
|
||||
ae39b902-7e4c-4392-972d-97d876e09802
|
||||
a2b457b0-232f-4175-a618-08bf257bff13
|
||||
8ce201a5-a59a-45e2-9c80-d1530213dd76
|
||||
5f52e821-b2f1-4328-85f0-62bc8e0b36e7
|
||||
328a2cc0-89bf-4626-b9ee-f3ee4c6873f8
|
||||
30fb8ac3-7cc8-4666-9aac-c66cd4cd8e20
|
||||
148e8b52-5407-4545-9c5a-1745236f8139
|
||||
06ab8ab1-0170-4fca-8f7a-900a82872378
|
||||
f9f4c879-005a-4d7a-827e-099666396bd4
|
||||
c1ea3624-7e16-41e3-91ca-5f2237c90016
|
||||
d04ec960-fb54-44c4-93d8-27aee82a14a5
|
||||
9eb67338-c4dd-4fab-b9f6-dd3d1f635078
|
||||
8e744b2e-283e-4880-a35d-010e22f9b6d1
|
||||
64a9131f-7bef-4026-af19-a437258b698b
|
||||
5735d3ba-2ae7-44b0-87b1-a4212042dfd5
|
||||
2ec76526-1734-4631-85d5-38b53c289724
|
||||
2631c157-4bd1-469e-a69a-5cc40d56087e
|
||||
0ed4e74d-5133-42fe-b9e9-c2f4a217aae6
|
||||
e6feadd0-b0ca-4dad-9cf0-e51d59208741
|
||||
de8f34ed-894f-454a-8af5-498cb5bfa416
|
||||
d6bfbcb5-e2aa-43af-85c1-a53776ee55da
|
||||
c83f6e63-3614-46d4-9c3b-56acad2c6053
|
||||
ae39b902-7e4c-4392-972d-97d876e09802
|
||||
a2b457b0-232f-4175-a618-08bf257bff13
|
||||
8ce201a5-a59a-45e2-9c80-d1530213dd76
|
||||
5f52e821-b2f1-4328-85f0-62bc8e0b36e7
|
||||
328a2cc0-89bf-4626-b9ee-f3ee4c6873f8
|
||||
30fb8ac3-7cc8-4666-9aac-c66cd4cd8e20
|
||||
148e8b52-5407-4545-9c5a-1745236f8139
|
||||
06ab8ab1-0170-4fca-8f7a-900a82872378
|
||||
f9f4c879-005a-4d7a-827e-099666396bd4
|
||||
c1ea3624-7e16-41e3-91ca-5f2237c90016
|
||||
d04ec960-fb54-44c4-93d8-27aee82a14a5
|
||||
9eb67338-c4dd-4fab-b9f6-dd3d1f635078
|
||||
8e744b2e-283e-4880-a35d-010e22f9b6d1
|
||||
64a9131f-7bef-4026-af19-a437258b698b
|
||||
5735d3ba-2ae7-44b0-87b1-a4212042dfd5
|
||||
2ec76526-1734-4631-85d5-38b53c289724
|
||||
2631c157-4bd1-469e-a69a-5cc40d56087e
|
||||
0ed4e74d-5133-42fe-b9e9-c2f4a217aae6
|
||||
e6feadd0-b0ca-4dad-9cf0-e51d59208741
|
||||
de8f34ed-894f-454a-8af5-498cb5bfa416
|
||||
d6bfbcb5-e2aa-43af-85c1-a53776ee55da
|
||||
c83f6e63-3614-46d4-9c3b-56acad2c6053
|
||||
ae39b902-7e4c-4392-972d-97d876e09802
|
||||
a2b457b0-232f-4175-a618-08bf257bff13
|
||||
8ce201a5-a59a-45e2-9c80-d1530213dd76
|
||||
5f52e821-b2f1-4328-85f0-62bc8e0b36e7
|
||||
328a2cc0-89bf-4626-b9ee-f3ee4c6873f8
|
||||
30fb8ac3-7cc8-4666-9aac-c66cd4cd8e20
|
||||
148e8b52-5407-4545-9c5a-1745236f8139
|
||||
06ab8ab1-0170-4fca-8f7a-900a82872378
|
||||
f9f4c879-005a-4d7a-827e-099666396bd4
|
||||
c1ea3624-7e16-41e3-91ca-5f2237c90016
|
||||
@@ -1,78 +0,0 @@
|
||||
2021-11-30-19-03-01
|
||||
scrapy crawl sina_spider --pages success.
|
||||
run python monitor_news.py success.
|
||||
run update_new_items success.
|
||||
delete RedisProtrail ...
|
||||
run update_redis_mongo_protrail_data success.
|
||||
news detail info are saved in redis db.
|
||||
material to mongo and redis success.
|
||||
|
||||
2021-11-30-19-08-01
|
||||
scrapy crawl sina_spider --pages success.
|
||||
run python monitor_news.py success.
|
||||
run update_new_items success.
|
||||
delete RedisProtrail ...
|
||||
run update_redis_mongo_protrail_data success.
|
||||
news detail info are saved in redis db.
|
||||
material to mongo and redis success.
|
||||
|
||||
material to mongo and redis fail.
|
||||
|
||||
material to mongo and redis fail.
|
||||
|
||||
2021-12-02-09-13-04
|
||||
scrapy crawl sina_spider --pages success.
|
||||
the news nums of news_20211202 collection is 251 and less then 1000.
|
||||
run python monitor_news.py success.
|
||||
material to mongo and redis fail.
|
||||
|
||||
material to mongo and redis fail.
|
||||
|
||||
update_dynamic_feature_protrail success.
|
||||
material to mongo and redis fail.
|
||||
|
||||
update_dynamic_feature_protrail success.
|
||||
run update_new_items success.
|
||||
delete RedisProtrail ...
|
||||
run update_redis_mongo_protrail_data success.
|
||||
news detail info are saved in redis db.
|
||||
material to mongo and redis success.
|
||||
|
||||
2021-12-02-23-00-01
|
||||
scrapy crawl sina_spider --pages success.
|
||||
the news nums of news_20211202 collection is 644 and less then 1000.
|
||||
run python monitor_news.py success.
|
||||
material to mongo and redis fail.
|
||||
|
||||
material to mongo and redis fail.
|
||||
|
||||
material to mongo and redis fail.
|
||||
|
||||
update_dynamic_feature_protrail success.
|
||||
run update_new_items success.
|
||||
delete RedisProtrail ...
|
||||
run update_redis_mongo_protrail_data success.
|
||||
news detail info are saved in redis db.
|
||||
material to mongo and redis success.
|
||||
|
||||
2021-12-03-09-38-39
|
||||
scrapy crawl sina_spider --pages success.
|
||||
the news nums of news_20211203 collection is 659 and less then 1000.
|
||||
run python monitor_news.py success.
|
||||
2021-12-03-09-50-04
|
||||
scrapy crawl sina_spider --pages success.
|
||||
run python monitor_news.py success.
|
||||
update_dynamic_feature_protrail success.
|
||||
run update_new_items success.
|
||||
delete RedisProtrail ...
|
||||
run update_redis_mongo_protrail_data success.
|
||||
news detail info are saved in redis db.
|
||||
material to mongo and redis success.
|
||||
|
||||
2021-12-04-00-00-01
|
||||
scrapy crawl sina_spider --pages success.
|
||||
run python monitor_news.py success.
|
||||
2021-12-05-00-00-01
|
||||
scrapy crawl sina_spider --pages success.
|
||||
the news nums of news_20211205 collection is 793 and less then 1000.
|
||||
run python monitor_news.py success.
|
||||
@@ -1,98 +0,0 @@
|
||||
2021-11-30-19-03-01
|
||||
a sorted news_ids are saved into redis.
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-11-30-19-08-19
|
||||
a sorted news_ids are saved into redis.
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-01-01-00-01
|
||||
a sorted news_ids are saved into redis.
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-02-01-00-01
|
||||
a sorted news_ids are saved into redis.
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-02-09-13-30
|
||||
a sorted news_ids are saved into redis.
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-02-09-18-07
|
||||
a sorted news_ids are saved into redis.
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-02-09-23-18
|
||||
a sorted news_ids are saved into redis.
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-01-00-02
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-09-32-44
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-09-33-10
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-09-33-54
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-10-05-18
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-10-13-03
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-10-14-12
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-10-18-59
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-10-22-57
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-10-27-21
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-10-28-49
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-03-10-45-22
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-04-00-11-59
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
2021-12-05-00-15-43
|
||||
a sorted news_ids are saved into redis.
|
||||
a hot rec list are saved into redis.....
|
||||
run /home/recsys/miniconda3/envs/news_rec_py3/bin/python /home/recsys/news_rec_server/recprocess/offline.py success.
|
||||
|
||||
@@ -7,7 +7,7 @@ import sys
|
||||
sys.path.append("../../")
|
||||
import jieba
|
||||
import jieba.analyse
|
||||
|
||||
from conf.proj_path import stop_words_path
|
||||
|
||||
def get_key_words(words_str):
|
||||
"""提取中文中的关键词
|
||||
@@ -22,8 +22,7 @@ def get_key_words(words_str):
|
||||
|
||||
# 加载停用词
|
||||
stopword_set = set()
|
||||
# TODO 改成变量而不是写死
|
||||
with open('/home/recsys/news_rec_server/conf/stop_words.txt', encoding="utf-8") as f:
|
||||
with open(stop_words_path, encoding="utf-8") as f:
|
||||
line = f.readline().rstrip()
|
||||
stopword_set.add(line)
|
||||
|
||||
@@ -41,10 +40,10 @@ def get_key_words(words_str):
|
||||
key_words_list_tfidf = jieba.analyse.extract_tags(new_words_str, topK=10, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'))
|
||||
key_words_list_textrank = jieba.analyse.textrank(new_words_str, topK=10, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'))
|
||||
|
||||
# print("key_words_list_tfidf", key_words_list_tfidf)
|
||||
# print("key_words_list_textrank", key_words_list_textrank)
|
||||
|
||||
tfidf_textrank_list = list(set(key_words_list_tfidf) & set(key_words_list_textrank))[:3]
|
||||
|
||||
# print(tfidf_textrank_list)
|
||||
return tfidf_textrank_list
|
||||
|
||||
if __name__ == "__main__":
|
||||
key_words = get_key_words("本教程主要是针对具有机器学习基础并想找推荐算法岗位的同学,由推荐算法基础、推荐算法入门赛、新闻推荐项目及推荐算法面经组成,形成了一个完整的从基础到实战再到面试的闭环。主要分为三个阶段,分别是推荐系统基础、推荐系统进阶和推荐算法面经,每个阶段的具体内容如下")
|
||||
print(key_words)
|
||||
Binary file not shown.
@@ -5,7 +5,6 @@ from dao.mongo_server import MongoServer
|
||||
from dao.redis_server import RedisServer
|
||||
from recall.hot_recall import HotRecall
|
||||
from cold_start.cold_start import ColdStart
|
||||
from datetime import datetime
|
||||
|
||||
# 这个类是用来实现离线推荐流程的,给每个用户都存储一个倒排索引列表
|
||||
# 对于热门页的内容,初始化的时候每个用户都是一样的
|
||||
|
||||
@@ -61,12 +61,12 @@ class OnlineServer(object):
|
||||
user_exposure_key = user_exposure_prefix + str(userid)
|
||||
|
||||
# 一页默认10个item, 但这里候选20条,因为有可能有的在推荐页曝光过
|
||||
article_num = 50
|
||||
article_num = 200
|
||||
|
||||
# 返回的是一个news_id列表 zrevrange排序分值从大到小
|
||||
candiate_id_list = self.reclist_redis_db.zrevrange(cold_start_user_key, 0, article_num-1)
|
||||
|
||||
print("candiate_id_list", candiate_id_list)
|
||||
# print("candiate_id_list", candiate_id_list)
|
||||
|
||||
if len(candiate_id_list) > 0:
|
||||
# 根据news_id获取新闻的具体内容,并返回一个列表,列表中的元素是按照顺序展示的新闻信息字典
|
||||
@@ -190,13 +190,11 @@ class OnlineServer(object):
|
||||
self.reclist_redis_db.zunionstore(hot_list_user_key, ["hot_list"])
|
||||
|
||||
# 一页默认10个item, 但这里候选20条,因为有可能有的在推荐页曝光过
|
||||
article_num = 50
|
||||
article_num = 200
|
||||
|
||||
# 返回的是一个news_id列表 zrevrange排序分值从大到小
|
||||
candiate_id_list = self.reclist_redis_db.zrevrange(hot_list_user_key, 0, article_num-1)
|
||||
|
||||
print("candiate_id_list", candiate_id_list)
|
||||
|
||||
if len(candiate_id_list) > 0:
|
||||
# 根据news_id获取新闻的具体内容,并返回一个列表,列表中的元素是按照顺序展示的新闻信息字典
|
||||
news_info_list = []
|
||||
@@ -229,8 +227,6 @@ class OnlineServer(object):
|
||||
f.write(news_id + "\n")
|
||||
print("there are not news detail info for {}".format(news_id))
|
||||
continue
|
||||
# news_info_str = news_info_str.replace("'", '"' ) # 将单引号都替换成双引号
|
||||
# news_info_dict = json.loads(news_info_str)
|
||||
# 需要确认一下前端接收的json,key需要是单引号还是双引号
|
||||
news_info_list.append(news_info_dict)
|
||||
news_expose_list.add(news_id)
|
||||
@@ -247,8 +243,6 @@ class OnlineServer(object):
|
||||
|
||||
# 曝光重新落表
|
||||
self._save_user_exposure(user_id,news_expose_list)
|
||||
#print(news_expose_list, len(news_expose_list))
|
||||
# print(news_info_list)
|
||||
return news_info_list
|
||||
else:
|
||||
#TODO 临时这么做,这么做不太好
|
||||
@@ -262,22 +256,14 @@ class OnlineServer(object):
|
||||
def get_news_detail(self, news_id):
|
||||
"""获取新闻展示的详细信息
|
||||
"""
|
||||
# print(1111)
|
||||
news_info_str = self.static_news_info_redis_db.get("static_news_detail:" + news_id)
|
||||
# print(222)
|
||||
# print(news_info_str)
|
||||
news_info_str = news_info_str.replace('\'', '\"' ) # 将单引号都替换成双引号
|
||||
# print(333)
|
||||
# print(news_info_str)
|
||||
news_info_dit = json.loads(news_info_str)
|
||||
# print(444)
|
||||
|
||||
# print("news_info_dit:", news_info_dit)
|
||||
news_dynamic_info_str = self.dynamic_news_info_redis_db.get("dynamic_news_detail:" + news_id)
|
||||
news_dynamic_info_str = news_dynamic_info_str.replace("'", '"' ) # 将单引号都替换成双引号
|
||||
news_dynamic_info_dit = json.loads(news_dynamic_info_str)
|
||||
|
||||
# print("news_info_dit:", news_dynamic_info_dit)
|
||||
|
||||
for k in news_dynamic_info_dit.keys():
|
||||
news_info_dit[k] = news_dynamic_info_dit[k]
|
||||
@@ -298,9 +284,7 @@ class OnlineServer(object):
|
||||
news_dynamic_info_dict[action_type[0]] -=1
|
||||
else:
|
||||
news_dynamic_info_dict["read_num"] +=1
|
||||
# print("update",news_dynamic_info_dict)
|
||||
news_dynamic_info_str = json.dumps(news_dynamic_info_dict)
|
||||
# print("update",news_dynamic_info_str)
|
||||
news_dynamic_info_str = news_dynamic_info_str.replace('"', "'" )
|
||||
res = self.dynamic_news_info_redis_db.set("dynamic_news_detail:" + news_id, news_dynamic_info_str)
|
||||
return res
|
||||
|
||||
@@ -3,6 +3,7 @@ attrs==21.2.0
|
||||
Automat==20.2.0
|
||||
certifi==2021.10.8
|
||||
cffi==1.15.0
|
||||
charset-normalizer==2.0.8
|
||||
click==8.0.3
|
||||
constantly==15.1.0
|
||||
cryptography==35.0.0
|
||||
@@ -37,15 +38,19 @@ PyDispatcher==2.0.5
|
||||
pymongo==3.12.1
|
||||
PyMySQL==1.0.2
|
||||
pyOpenSSL==21.0.0
|
||||
pysnowflake==0.1.3
|
||||
queuelib==1.6.2
|
||||
redis==3.5.3
|
||||
requests==2.26.0
|
||||
Scrapy==2.5.1
|
||||
selenium==4.0.0
|
||||
service-identity==21.1.0
|
||||
six==1.16.0
|
||||
sniffio==1.2.0
|
||||
snowflake==0.0.3
|
||||
sortedcontainers==2.4.0
|
||||
SQLAlchemy==1.4.26
|
||||
tornado==6.1
|
||||
trio==0.19.0
|
||||
trio-websocket==0.9.2
|
||||
Twisted==21.7.0
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 这个脚本每天凌晨2点30会自动跑
|
||||
# 设置python环境
|
||||
# python 环境需要换成自己的虚拟环境中的Python
|
||||
python=/home/recsys/miniconda3/envs/news_rec_py3/bin/python
|
||||
news_recsys_path="/home/recsys/news_rec_server"
|
||||
home_path=$HOME
|
||||
|
||||
news_recsys_path=${home_path}"/news_rec_server"
|
||||
|
||||
# 得跳转到这个目录才能执行下面爬虫的命令
|
||||
cd ${news_recsys_path}/materials/news_scrapy
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# python 环境需要换成自己的虚拟环境中的Python
|
||||
python=/home/recsys/miniconda3/envs/news_rec_py3/bin/python
|
||||
news_recsys_path="/home/recsys/news_rec_server"
|
||||
home_path=$HOME
|
||||
news_recsys_path=${home_path}"/news_rec_server"
|
||||
|
||||
echo "$(date -d today +%Y-%m-%d-%H-%M-%S)"
|
||||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
# python 环境需要换成自己的虚拟环境中的Python
|
||||
python=/home/recsys/miniconda3/envs/news_rec_py3/bin/python
|
||||
news_recsys_path="/home/recsys/news_rec_server"
|
||||
home_path=$HOME
|
||||
news_recsys_path=${home_path}"/news_rec_server"
|
||||
|
||||
cd ${news_recsys_path}/recprocess
|
||||
|
||||
|
||||
Reference in New Issue
Block a user