From 4a40f08e32853f47ceaf1b6cd160fc769bf4609b Mon Sep 17 00:00:00 2001 From: RuyiLuo Date: Sun, 5 Dec 2021 15:22:11 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=85=8D=E7=BD=AE=E6=96=87?= =?UTF-8?q?=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 24 +-- .../news_rec_server/conf/proj_path.py | 6 + .../__pycache__/log_controller.cpython-38.pyc | Bin 894 -> 0 bytes .../user_action_controller.cpython-38.pyc | Bin 3975 -> 0 bytes .../dao/__pycache__/Mongo.cpython-38.pyc | Bin 1465 -> 0 bytes .../dao/__pycache__/Mysql.cpython-38.pyc | Bin 999 -> 0 bytes .../dao/__pycache__/Redis.cpython-38.pyc | Bin 706 -> 0 bytes .../__pycache__/mongo_server.cpython-38.pyc | Bin 2516 -> 0 bytes .../__pycache__/mongo_server.cpython-39.pyc | Bin 2566 -> 0 bytes .../__pycache__/mysql_server.cpython-38.pyc | Bin 3685 -> 0 bytes .../__pycache__/redis_server.cpython-38.pyc | Bin 2040 -> 0 bytes .../entity/__pycache__/logitem.cpython-38.pyc | Bin 1455 -> 0 bytes .../__pycache__/register_user.cpython-38.pyc | Bin 1123 -> 0 bytes .../user_collections.cpython-38.pyc | Bin 1404 -> 0 bytes .../__pycache__/user_exposure.cpython-38.pyc | Bin 1397 -> 0 bytes .../__pycache__/user_likes.cpython-38.pyc | Bin 1359 -> 0 bytes .../__pycache__/user_read.cpython-38.pyc | Bin 1350 -> 0 bytes .../logs/material_and_user_process.log | 20 --- .../news_rec_server/logs/news_bad_cases.log | 151 ------------------ .../logs/offline_material_process.log | 78 --------- .../logs/offline_rec_list_to_redis.log | 98 ------------ .../materials/material_process/utils.py | 13 +- .../__pycache__/online.cpython-38.pyc | Bin 7861 -> 7823 bytes .../news_rec_server/recprocess/offline.py | 1 - .../news_rec_server/recprocess/online.py | 22 +-- .../news_rec_server/requirements.txt | 5 + .../news_rec_server/scheduler/crawl_news.sh | 8 +- .../offline_material_and_user_process.sh | 5 +- .../news_rec_server/scheduler/run_offline.sh | 4 +- 29 files changed, 42 insertions(+), 393 deletions(-) create mode 100644 codes/news_recsys/news_rec_server/conf/proj_path.py delete mode 100644 codes/news_recsys/news_rec_server/controller/__pycache__/log_controller.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/controller/__pycache__/user_action_controller.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/__pycache__/Mongo.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/__pycache__/Mysql.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/__pycache__/Redis.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/__pycache__/mongo_server.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/__pycache__/mongo_server.cpython-39.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/__pycache__/mysql_server.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/__pycache__/redis_server.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/entity/__pycache__/logitem.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/entity/__pycache__/register_user.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_collections.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_exposure.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_likes.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_read.cpython-38.pyc delete mode 100644 codes/news_recsys/news_rec_server/logs/material_and_user_process.log delete mode 100644 codes/news_recsys/news_rec_server/logs/news_bad_cases.log delete mode 100644 codes/news_recsys/news_rec_server/logs/offline_material_process.log delete mode 100644 codes/news_recsys/news_rec_server/logs/offline_rec_list_to_redis.log diff --git a/.gitignore b/.gitignore index 4d591874..13bcf024 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,12 @@ -/codes/news_recsys/news_rec_web/Vue-newsinfo/node_modules -/codes/news_recsys/news_rec_server/conf/__pycache__ -/codes/news_recsys/news_rec_server/controller/__pycache__ -/codes/news_recsys/news_rec_server/dao/__pycache__ -/codes/news_recsys/news_rec_server/dao/entity/__pycache__ -/codes/news_recsys/news_rec_server/logs -/codes/news_recsys/news_rec_server/materials/material_process/__pycache__ -/codes/news_recsys/news_rec_server/materials/news_scrapy/sinanews/__pycache__ -/codes/news_recsys/news_rec_server/materials/news_scrapy/sinanews/spiders/__pycache__ -/codes/news_recsys/news_rec_server/materials/user_process/__pycache__ -/codes/news_recsys/news_rec_server/recprocess/__pycache__ -/codes/news_recsys/news_rec_server/recprocess/recall/__pycache__ \ No newline at end of file +/codes/news_recsys/news_rec_web/Vue-newsinfo/node_modules/ +/codes/news_recsys/news_rec_server/conf/__pycache__/ +/codes/news_recsys/news_rec_server/controller/__pycache__/ +/codes/news_recsys/news_rec_server/dao/__pycache__/ +/codes/news_recsys/news_rec_server/dao/entity/__pycache__/ +/codes/news_recsys/news_rec_server/logs/ +/codes/news_recsys/news_rec_server/materials/material_process/__pycache__/ +/codes/news_recsys/news_rec_server/materials/news_scrapy/sinanews/__pycache__/ +/codes/news_recsys/news_rec_server/materials/news_scrapy/sinanews/spiders/__pycache__/ +/codes/news_recsys/news_rec_server/materials/user_process/__pycache__/ +/codes/news_recsys/news_rec_server/recprocess/__pycache__/ +/codes/news_recsys/news_rec_server/recprocess/recall/__pycache__/ \ No newline at end of file diff --git a/codes/news_recsys/news_rec_server/conf/proj_path.py b/codes/news_recsys/news_rec_server/conf/proj_path.py new file mode 100644 index 00000000..292fd1f2 --- /dev/null +++ b/codes/news_recsys/news_rec_server/conf/proj_path.py @@ -0,0 +1,6 @@ +import os + +home_path = os.environ['HOME'] +proj_path = home_path + "/news_rec_server/" + +stop_words_path = proj_path + "conf/stop_words.txt" \ No newline at end of file diff --git a/codes/news_recsys/news_rec_server/controller/__pycache__/log_controller.cpython-38.pyc b/codes/news_recsys/news_rec_server/controller/__pycache__/log_controller.cpython-38.pyc deleted file mode 100644 index 6025bfcc657211550e9ed723c506ab0bd232bac1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 894 zcmY*Xy>1gh5T4n+yEus*Sp-EWC7l~vLNrJSMT!6+ftv&cu31^Pn{aSH_U<}h%g{KF zK|v^xcpK_%sc3irDrU|kI3w++nc3O-`F37*I$J>Ee}5bOK!BfYtcxV$fR4LIph05+ zC8mh1i$at#m5nZoD2<+ij&ythI$nx2*768?$uD{bJq+6~W^Pg(8GCLlnH$gRvuECv z!;J6O<+|cs@)}4{Ln<_C@fo$$(M6O>9qZ%*(nup!*a#iUby!#4)*2)>@^12GQ$)Q%BNpa<+ zVv#e&>36W3^zzSx!JE1?18XujbA!sfcZzh1=D7#ib(aH9_TTJ}XQ8Did+wFuM2`Ca zobTP3t$!7gHP;+M!i@1RHj^EbGT&doTbgZwm-JJOE-nG#1giTHc=>M24iP>Ig!!J2 ze7q1#TvFr-yoRq0tD0^=oZ08n5Xcd$!%2j}}YhhKlviY6r6$u+vAVI_l+*4qFsW<6P1uNln{*1=sN4$eBA!uV;w4%Ri^40_Z diff --git a/codes/news_recsys/news_rec_server/controller/__pycache__/user_action_controller.cpython-38.pyc b/codes/news_recsys/news_rec_server/controller/__pycache__/user_action_controller.cpython-38.pyc deleted file mode 100644 index 3145700998904d0b55b28590e66fb8cbcbb26a6e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3975 zcmb_fTW{P%6rLGh*6ZD*X`7TZ;SvfI7D2+LT!a7>aT5|mf>57As#^VVN-Z$9-t2(QC0jBzd~j;Wx(Jd{IvoindBjBagTH4MJQT^XAp0MX3O#{;?2-*mAn%1 zR_L_KUYU41bXyg#!dXySV_r3=5H*OFk-pAkNjhhkbk>A7BFhV`=3a;AvKmh+Z>{Q; za3N490tLhBAswh!Pc64~M+G>3Cpg~JaiDM>#$~*}*<1=V3>+N16VxT}JCJ|59fm<8 zZnh)cK>fulx7dT(fO%XpPe?AsSuPD}o-sW`TGBqlJTpD5XUVd3VQkBatirt{N8~8n z9l1k}!M!YZ%3W}G^&aDeg}@=2N{mO4zvbPyuCxy zE8$=%-K8?H)9u-=Wbe>)Vb?_dtPp&XE1RI_q*Ddy>0G*zJ1XBZO?!$9g`JzvQQ+=; zPx_p6YEv38H+2vm+hA$}0Qu{sxsTedU`_=My{hM;;H36}2PGJDbB%TsD?E;xqZ8E0 zasKA??DA^j_Ud?{p-G_32&kp1o7;b|Lc?zIfU}mwy6D^MkdRBJdR7_>5FOje!|s99QE4uK`~r&}{zuu* zQ6tJq3&Pg!ybT9)XR_<@nQQCkul{i9%8%b)UH|FN-@bbyo49=8)2sbYu7CaQwF?(u z`mt>K>euJ5eD>=f7r(gt<2RSjp3n6pR@gq?j1&g^bs`v}^R<#1fqP=DbON=iOyuoj z%@7hjanKD>y%i+(a$W0_GO-%%P88P+HI8y7u6t#2R=m0#kdsrg2iHevrKutOp>j;Z zl%4{@Oo#8`?hTuZ8>TS%K5hzIAni^b+H~NS%?&*4;6g8==gEuj0@IyJG4AnoD4fKL zb;j9Y79B8IY%IBIp0keyXWc!q8CyNE#@9gdFnf=kGqG4mSUoL{z&HYmzSuD4UwgM^ zr|1}UDHFTVZnc_mQhxPRBUr}TmRQTG8O4dAW0irTX5c&tzX^-7uAc}fn;!-NJqmLDap3JOuOJFi7e>3_KDW568%5nTnsgy8#mhS1#wgtpmq>rB5N6b+Et z%MFsGh;fi( zgl}>Gmh!!&ZB0Q7#vqURno#dk0~38CZ(yQt7DChlkdEXH%#KU{)WD)kq#NGy)kV@> zF6b`br2Ciur28o8cGtw0TN1n8vif)vH7ZfSmyM_Mcr$R?zpMC@ggKVN6O}R8RG^%E6 zyVCwWRfvNjg|#Zaw_;=S{lxYCR$F#LRkZhF=N_os>)uFJb;D$6o73J@wvG-8Y_aTx9}C{8%Fi9x0bSGc-FxN6Zw)lfLSPcqsWM37tobyO5sjKgrpKv~3~ zoL~l5mWZKpm1{6-JjUZN$9RJ4FvocpPr{tw-Mj~8ou^=jUO#cqDLr(iCJF$xC1~R& zlQ7&gs2Z&iMrsOZ*bEYBE~EJRBggeh=A7kgWk+}!)z@6ho)MTXkmfECahU%0XS1X5 z=cY3%x6792eVwslspv6=fi5aS_RAS9`(tw|{=D1P5PFkJf^X&DO)@wT5IgKT)2S8Ai9n zGHDmBlGi4cQAG?&`w5{bYTq;MlfR@QO~U&5w$1$j@k!guqrc z`r2_JBZcQfB9fA1lWfQILQaEFl2-ehgxUcG3K2~~8;9hC1Z8V0l_4-sUK|9!o}R27 z28*xHHl778ken%FLgzZBT( zP&8Qm7<_(Nf4_hB;qU^;A_cMMPjA32mP}T%p12XDgAt^|O{ARyF9q=?Zfo(Pd;IMS zyn5MQ!OrsW;m@$iJDzVxuvcUmCM!hXjzr*&G~sp*&1Jgqe&g+6%uh0?#Va=2J&ZlB zn0Dw9cr|1@iXODEu|yv#{N_Z?u+@eFUo74b7ZY@j-R$-{C0330gpcID~8=oX3bwUTD`-nT74iTqDuyI>_MzkGd_WF4~Du7(8qnmP@e$ez5_`5 z6wv9rfRrOB2CoIK_B%?*EJ}s;+L2Pvx%N#CyF=T#EOd#lGqVglO_fL*at6Xn!?2t+ zP%%&i_=My5&-2nNtJtkPpLF zwE|87V(md?EvTynRkfg|7F5*oKDeA1!FHShE=qmO-5oq2zyjGu4PvD0EJA>j-LK-^X9v>c+FHNntkQyPbw7Yg*sn@p#D_etA zi{SU%PSIGr{U{*X7g?P1X7)6sBHqXY9Mbh>#sgm(RkYX0Wf~R#rB0ek14qum6nc}U z(xqujSym@-4t|#x01V&iA^@TurfB;8CWIjJ%X7<4uR7s%8Y6I}=|O1}kR=0#SK9#0 zs>_%zGnVGOlL+uDjBR$J#OThLJZ~;JE93aMT$09F3%sxl>_thu pA=0kpA)NcWQ3FP7+1kt3TOuy(t$!uZ{+-FY?wa1PSt_{V`~i3N>Pr9s diff --git a/codes/news_recsys/news_rec_server/dao/__pycache__/Redis.cpython-38.pyc b/codes/news_recsys/news_rec_server/dao/__pycache__/Redis.cpython-38.pyc deleted file mode 100644 index db3b03be01602c61dca7873ad98e87009da3a5e1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 706 zcmZ8fy>8nu5I%~OqNX8Qf+A=FWV3;!U9=Fkj)g7Ql1s`79H>*X<`uH% zmAGc=D|E^oT?ax=;K$$l?!FKDF&rKP#q{iXHWdJ`yy=Ic%>@njg#ZD?3JSOej8HxT zN+bTrB0_NsnfgmFU{W&nBcja(jlf`_5D1)s(IXTRB`TT~#>9~LqZ@-uclE90vWMU` zlU&kpCj_17KtmT1icS(n2$U`+ne^$RacwaYhtdDu z$A}3lT$*4#~MG`8-jy$Q~qGK=R5%;9=M#Yj3k ztF=GYdfni5#p{9AzqYg0{>FYH8arfQV|&2WEW5xpLZMWoaX87O9nhgqnt5f)Hk|b` S1JS* zAm|5bXrxvuj?}7DqNoQ{Xw%UDW3FVK=ES{1g7;<}$9A2xV(oeJ`F7s#{oZ@_$LVx} zz%$|9%HN3-@)u5e9|I<*q4{5eV1&^!aiX3&l#`Oy(wVkFX|!fI2At8$X2o)>O3aC= zbpz(MV^`u%T+L0GC!9nj=_J+Mf_chGsc{U(X(vs|hlJTIzCu`>k1XrXD5GadHn9s< zkhvv2mphr&^LkALu$%I|YTjk@uG*VcvqG(0=7qqkRn@9n z7rf|w4u><9e87d5FLxDYi##7R1cx)VK;*r$JD{AUa$0ca`EGRW)7!5!d@c^uFCdoQ z$A-yiX#N}sP8`aJqj8-Z+~gLIahu0^qG&V9v=!44KZgvc@XDL#(6&mu(5 zxQFl|5T|_AJ(Lq^M`W^ju(v~N+2hq98<&R9%SC6jv%D8{PO`IrAVnJP5F;o?QDjhz zp?CqrZ6aQV?#UEHFML_Pb~3sGq6A3E)feG<*Q!#qKk4nU}Xx~R$8t-8;x2I5` zc)0T*otU|b(s39Bqy?mwfYOq_p?yJ{m`$|tE^Ix1biIAyrZS}cmtxM{D7=>)pk~Z0E@uGFD$y8cq~dXOxdb5?&|J@~kFag3hrD(FVd+*u?~nUIZ}% z%|{!Fo}mMf)T`NiA0&j=jZ$sWLROO<9Qs%GVZna9Fs#4u^cShUiNtdf9k}ULPXVx1cGK-iHAqVswHq ztwjSZ&`UgUHi)2-Sy30eNBFJ6hL+)ugfhgY{ZZ-!f!!yC7EuCBD#@9nPt7XGjmUOL}(nWU~HPv|?@xQc6O zlxkj8>V6=k&7fum9u!^!M~)fPSbkBOe!c7kk)tsm_JU!2SOp&R{3Lo%9jx-t{GO8g zCcMeV3Q2UEPX23XOUDKsKTNI9<#{3C=F%tuulimpa9sy_cDGPY%#cFZ=_scZ%kV9^&x z;h;3?`QV(i^7T5eGBE{9&S;@lE#{!UL;O~Y-oo=5Ev?#o3I3j_ONQAf?GGZioWf96 RzYjLGX$o3KeNr@~{Rg%-x>NuF diff --git a/codes/news_recsys/news_rec_server/dao/__pycache__/mongo_server.cpython-39.pyc b/codes/news_recsys/news_rec_server/dao/__pycache__/mongo_server.cpython-39.pyc deleted file mode 100644 index ce3cae9352b4269b2c16b405efa31aa8abd8e6cd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2566 zcmbVO&2Jk;6rb5IufGxp0!f+xDJli45KF)jRh4?+)LeSo%WC79Bx~7TGrI;783~jK zf}kI$p^;jtBvPvqiJ~4*p`>a5kGYa{niKa53ErEv^Wmmx#n|&^-pst6_j_NvUOt~8 z(1!fm-kmfde<8EK8Ni%@9)1ag5k{-TNk-~WPAb}}&a^d3lQqLJ;EY~1YnEfxQcg;( z8^GI+T}wM@#hbuqoJ=k2WEF1#pL23*o`QMa$y4$%VKz%I5|-wD3%b+K=y_7i?0^1r?R}1Phfjb+7QQfO?nMwFYAR^e!g?`<0*_5mHMio~Ms#RW&{GhH@-G<=P z{^xMGQ1c=#e6PAIFh0$_s3|y{2_oV7RkugDNad*D%n#k<+NZZ)Yld7rQ@)H?_BR`t zGtk3HAe=aq5l7=XH@L|yp5iu7^USo(DAN|HlVv(H7KxK%CbMA7vlO#o>|<$`fw7-u zSq{bm%Y)_mWcC~y>pT~qa#03X?>3xk(9c2-PXlSu7I8^S14>&uP_1PE)mtV|qh$d# zTPdJcgw$@4)$|&HS%wj~icS`@GQH;&(?y5KynDbOV+q@1w2njMfcum`j=Rruh#Yf| z@P|O0@^$w}pGZ4#CYu6%J8rG|ydD+P(g=BV+Uf5scZ1H!b`}t%$ip3?4@p0g0+InF zF9BI6;#C-)oGZ-+HC__D9L|NMI{z#Ly~{4d3b>}!1s;~VnwEJOmLTHc&%B_NsFj?a zXv~4iu3z^f*UiDhL%fKX7CrFO<_GtAf1)c;rg*s1Fa$8$RTvDyBqFO22o(r}ioT|O zL0g!CwD~@4J$`hfeesr3r2XUF_Jv>LKd=1#;QRRgy~mH1jJH@=g@Vk6iswfTB3sTjTs&t z6{2s#p%Aqp`uwT=-gu7O0YoToL03-t5GII-(ILXLRT^oLUNKh9HL^fgEvB#0HP`{i zkqYE##Rk-X9Gqx09T1H#{uJN-u6_0H&dSDqR}|L&7jUkJ``&MXO0;K z%$t*D*r@tZVra~vZZHfUR<(OAKa3Vs2URZZ>H0Q2Da5Kubej(UYiRQ)dJTV=*a3^b zSPY$lJ5=r2`r{5*MFnGg^P8=W&BT63#gwWK$M69MLtY+3T!&Y*2{_&jrjxyx4j1_! zs7}Bw+si+9o&G`wh5T3(~U>r9NmlG9%f y>eCZY-QgpnKJCxEsLIj`rYi73qb@m2Lur4M809nuvU)q%)TTM;1@+6(oc142~ z?BHOMK)XC~-#(HeO4u2qTO-#3Y;)=z_wOMM`l_b=0C}YI3YOv7&D3 zavXEwMZ+|T2{S>-2ZZS?K1W!bCoik!0HbF}+E{^8$?2*(Ihje*U@$AV?Q@Hlrt&2Y z6Df~-UfwMg?ekoum0;L$;Z4r9*g30Y7kRLu;&EX`r?PlrwCV8U6m;(!Ae@+#5mRB5 zDNJP=SGktcS&Zp()Qqt>Gv=4B-_9?0v}+bYz%mc zZDN~&53((6EASz|PcPe^caa4KUL+Ue5itZ4{~XCoyG5Q6JnPMPnG(O~SunQXHUOAmwwoy;;O4|+c_uI{uwCA_ zEIi-CQ;Dk6)>9peC}^^^G>gz%4TUtIOI4hJfe$Xa0O}}!G76xI0w|&otI@?+Xz*+5 ztm5l6g~hJXSrt^KUM4vORHIi8!}gV%e>AS&Ykc>xaqU6bS&ns5~S2$xP-EVyLxOrk*UrUOVOFGVyDiH%%vth2&@#l7oGbtlqUQJZSc)O&Z_- zSikc1nyew?LTZ6 zCH_RO_?^7+5&`t2l@|uEy$v9eAHv;dyI>sX3uD!$wYx_jYlP9*&XF$+reh5mV&T3| zyI}OUMr8TA0PO2=r*=|&VbNuXcIUTnuiYj7su1nB<#c)!t5Ph6Eh|V`R?%e@2gfPP zx=^v5@J*-q6(ev-Sl4XF(M}}0kc=b2YDG*Sc?AiA75Ett2ap^@f)#uuux0*{MI8Qf z!Q$4#&^M?-p>TVaOd2Ue@An8?pH2lTRNsMCw*Bcqx65T-VrCM`ZrjOD^WuywIL)EX z#3YoS0*gFzIO~>jlTfEyILJ@QzW@?xpu6V^@Y$4C^piC(-VPP@E|!lT{!=h$NElFESK_H>Ravd<=K%zyW zB1i=(1qo<{P(^bhy+Ci3;K28oE7&Aofdk^b**J0>8?_y4-oBlA@9q2j=FRg5jYdRZ z{PgnA?RG%OpZGF8Ja~Bvrv4CwAcFP?Ba(FKeMdO^l$L8Qb79Zzd8yC*G+;qmVU;vw zVOnKXYvaLQ#3D-GA;K5IE)jvOeeSZlpz9>A96@xlw&OONt=Kn9N{Wu=Tba)L?Npkm zd>>>gHWP zhe{sDNidV?0j!h^ybFpXj0(aW=}J%fGLRJ+ZU%x1XP2_7SQeh}L5oBnD!YW$L@288 ztcyt0;Mov$(SYZYSOV1NOzk}cx-Qj6QYDqCucGe2d>f{I6+}S`!b#zPq=gI8DLjyF z;e+&w0HiMn1f^?nV8RpXJS0uwDJmZYqEQ$cb!Gg}=aDbMUXlJm6^-wM}2iqp(aP*y= z^D6jj3_{$1PJTNpQ?xcGriy}9f}~D{3kvdKNxlL~4s~6SE|A=H3yP<@=Y8rF4$$hZ zy&rqVLy{EU7~_Zyc23A76B01%LDtt&gNbi*WwrHOb&}jBqA*MA6}zeiIt$7gWC3_W z-B}t+(ZJZ%lS+LCUNxQ`p%>}S>M30IL*Shtu*pCjaal}AOhN9$?9zR_XLR@)EFIo{ zH2V6^(Y+tWpWPhad3bd9_rLzwAK(0S{Qbl6-Y=uyzKvcy9Hn&UQsx9H}&b8^(7rQ(qmeE%SkB!A%1LSoU$#a<2*~+$1u1-ODWfsZ;pC`f+ z!w3HU?qGcD(fIbk;odi+TXzrl5A3%WAB(-r^PrqZD5q}XOH%-zp9OHr@@dol@!sf* zFCQyyY(HvV+7~eGlYVT$Uz^JNh1tAMo36#w-tl-D`vd-QaBd>b(@YF|sMk5aF>LqB z9fgId@T$z~j%o=vZ0)b0wu<5+3M}fW%E2q9@Y1Rf1Wt_(Av7b0IMn$!iXt!c@n0JN+3{8-2bY+s;sPhss;Tt9LH?b#Z OY+50$(~w@Jb@xBLuNf8q diff --git a/codes/news_recsys/news_rec_server/dao/entity/__pycache__/logitem.cpython-38.pyc b/codes/news_recsys/news_rec_server/dao/entity/__pycache__/logitem.cpython-38.pyc deleted file mode 100644 index e7c42f9912b2e6790e04ab09f4b27d8c6ac6b6fc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1455 zcmZ8hOK;mo5Z)yzlA@k=T<6h83ba1xA^|xEMbH!|0<^w3J#;UNHFqU5<&w&Bl~{@m z6qT<&7P(4?w#cREKj_~m_FB|{|3Xilq3l-ZO8fES&MZ0K%q+j@bP@vNkMDmGKL>>T zi8tE=*2oi>)eiuYNSYHy6nQ~cp7hp~Zq|I}gU!za-CzwJvQS4X(lLu`dyqGE!jhUd z@|JG1cFn`Qqr0qI^C<7>KI`ipwnK?ZmW1toNn|XW7eqGI?$T#>Bt0U7X%)Jg)f; z@Llk4UQ%^uwYMfRft~NkmTX@Twl6!f3)j1{2l?u|=&N$_%ql(GgP1_d)!U!``u*+I zkH1}A{+fIYV-X*G%MUa^kc-BfbN=T1IVNRlGA+!Uf2ZcxAHmLRiGa@lYiuE{i?YBp zcB?vbT&l5{k$3cP&@eIhxZR1e z{5I*TfNCfgd=5o(u~t?{VTDUlqoBN8%h0RaAgf99wIUL@a)7pe)7|2m+ zt#Tj889gJMoOyus%m?%&IU;5kOatGAGYBD*F2s=Fnuye9Wnt%2H3-aIa55hte29Q? z)N(d}Idnd3`~*l90uUO)Z_&Dq5NwBodng2(mvfhJu1h(C&gHJn`OBHeZ!8h#vP@w) zz;e2{vSwVfW?rTuN6MmUSy^ax7miD#g*EuOa2?L;!?1afLGcE&;2K#W)v3Xn|6c^2 z4HKc0UzD%^RShpO@1yA<0v2uyP_AhK^k7!ld@&8YzTc)@+NN!|?t5{g_89CrAL{Ia zsjw$56w|3Hr1=!KxCD;3$kP+0=bKRL@GxG3LAeM2*@x=X4!6D)22;$sTMj@sD*Dht zLyez~o0i%%Z!ay2@ephrNj5P}d=F;LVElEgTMBEzbyefXn=*Zd2dKhci9usb@1at! JP5XZ6{{y?lcWwXx diff --git a/codes/news_recsys/news_rec_server/dao/entity/__pycache__/register_user.cpython-38.pyc b/codes/news_recsys/news_rec_server/dao/entity/__pycache__/register_user.cpython-38.pyc deleted file mode 100644 index 1c51d454401b3240ba5b852a827960d0383e783d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1123 zcmY*YJ#Q015Z%3RpY0?dLZTFi){Wg#AcUYm6r>=e*@D&Pc5)otot?ciU<8#BqJR#1 zQXqu}AyM#OwnZ4Zr=nu!oWL>Gj^55b@6MZ<+wb>dhW32%!QQ?DVdn8P-Z3{!EAX6x!Ety*wRO|q>d)I z`)dv&Y%aTrtJ*nZkM z+Mbk0QvZHR7ziK?n6 z+6E~g*H~AjSfB)@7)rQN-*TH2>!711RaBaYdydD~c+7ieKc`{r4%Tdp%s%I0ZR&v&pad|kz!b<{Kx~jwm28IQFB&V%u iM!~GptCV$13FU4`j*YIas(g)RuIiz?KuR21f%6BPEGu9D diff --git a/codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_collections.cpython-38.pyc b/codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_collections.cpython-38.pyc deleted file mode 100644 index 9a5e91c9bdbd202eabac3e2c2ce2e84e4e5f3136..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1404 zcmZ8hJChtW5SHe(v$Ib>u+14T=_b2B0GG>UQ$;!8lCu*fs_{s>9*}0v8jbC}T8dyD zkX(@%oZdy?gmMG{|3oQO3-=d@=+@rBa+*?4kJQ!rzOP&JWH87Gj7Q)8l#_^%Kk#FB zz#6#^voT-<5mXUP8Px#`bjU&-v54Y&pkke{#QUM@=#-`Z?nSDrGnV6JWMk1iAd~D2&Lxw`B@b)W>dGYt)|A!4bskk#E~Ihk z=ed=SOD!kVjfE^!ZgN|`ko;-h$VuR`2j${C*2I{Ly4!6d4KHe?WMRv?YPijxD#@$d zXX$;pY@VwFX7VzEqQXAPpj6CQ^wh41c~ ztSQUD>xpPh!S8!N_Wl6;Aw(y~RE|!@ry#{~z^;f?bYBy8Rb-+E?`xtDk__DF3E&4k zF5tHx1BO4}{Ji<@x6PYh|9boV=EwJEZ-3wX@P2drOZF*@#~?)S(3CniOa8T7x863I!d)L7)N`a-O%!78F7*MgUF-^C8S;4~!*iazanZ zH`E4Sg(uO9o>DCI0eMIFC$Z^5jN4mC%P;BAWwj_P>7oZvGM8#;bCKKJWdIL~%X6gw zf7D1dcgZ&P9Wft5y1zf0J*%~x8Cf*TW>(3U4F{iZVb4Tf&wyTOm$OSpHq*nUOF1vA z(sKR*QovRxK}L^mU6OZt5mxvR41<9_O5o|=0B1#4gp*YOmaamu0a7W7 zx)v>THg|o_pSQU>-|28J>Vk8VV)g;wUV!bUK~@Ioxwx!^d}TVg-~nR2cTnObF!%!j zfb~n)DO!WgV{YU69W+>^9fKT94-C!*rAcWNM089u+K1;xkj8$0e{#iTa0_!)Jd=94 z#pF*%VE!Vzz5n*7@|B(LTz-Q(v70*q;7(!M<)LgmAvY@OYCeS)fj>btFBe-a#;`Ee v_yag^BHy-b`2SR%w7HH@kjV~1PuSNiy}K3v9$uPe2<8?r90$m=A0)v)3R7+( diff --git a/codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_exposure.cpython-38.pyc b/codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_exposure.cpython-38.pyc deleted file mode 100644 index 437b21872f95b1fd0068a8eaa72abec4c3146c52..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1397 zcmY*Zzi%Wp6t?I0&dw&2kmHU(aTF=G*_Eg$5IWs~5Cv&+CxIJlZv1v{U_7(u@rGrioEh6at1uC9BIoNpRZ$H~_{oeQ9^FA7lQi9`~^)JONAmmTG z*c`Y=?&0i?5d;ymCXzBLJ?2TD`PA-tbs$3)I_}qzj9Dxbmbi6L4`j+x$HRIkGnP3X z)g!sZwj7V^ZMF^Jny~y6A`&rpPQ(E6wa<1$d_>0SEA&mqzKI^R^-?w_I?}3Y<|h8M z(J%)!rY09q)`co`bqxG*(ZSd=>HTVcZDEp+3Jnh{323_yCyTaQD&V?!Tmx?k34Ebo zR-NGK@UvC-v_1lL>@E&w%ceXmZ3LSm#Kk?FU4sCGQ9+m|DDxq>2!)3jU6MzUi45~GGTEaJ z)W_EbpO#e){>ZZ@<6#@%O72zomC^oF)ff@B_&Y#A*2KoIg8%h&!;8Ac_<|1V znTdMamqfjZm;Lkp^hqmWs-Wyv-L!$PJC2;=?O3O&DB7tFU$3S&98MN1lW<-&mFE0y z8@+94;-&QT)@BYTuWd?qZf$T_ZlycfHi|QPMmRb15b2qZ=vm2X8;@*zTGy2@Ax4K^ zak*65c3hzL&_KOq!6r}HK%A-`Z2gRnKe~&gix3En@N=5q2oVhq$G5B;*Rb5AoXb`$ zv4gqEIDfh<>g$u3bJ3Q#9oPU&(rGnwY*4plQ9Byr8_?MDCYrTM7Fs3f*Nk}oDEf<( zRj+IsOjtDnPLz$VS7Q5NL(Uz$e8+-K)CO2}YKSnx+3g}EH1Kji^KzQfjAniky7Td_ zN%7?sb@>G3s(0<2IRE?qA5GvyPd47WvX7D7-Y`Hq!KqgZ==zXjG%?#{+sr1|E8LQ5 z-b>DLV* H7kU2z`e|#< diff --git a/codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_likes.cpython-38.pyc b/codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_likes.cpython-38.pyc deleted file mode 100644 index 3caaac25e3e04259fc836f04b4b2b08bfa3346b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1359 zcmY*Zy^kC<6u0MVXJ9h^Yh*xjWPo5%Wr-z{tOBE z3n$wxg2`Ahb+_)izu!KD%J^0JPuV)r!4h2QhlAV%;Q)M^pFib zPSi+`*_aZUtO(orl!%^4pA(VFomI#tqJKd0>>Of}JaWn1Ms>P&$$>RhJ$Jp&YAff` zxb&04%6pZTIdu~uOI4V{R!<~^Sq5kz zq7?-^^f>l-1b7VI$qAJ^r;{_#e-f}OA{G7Tgzbt<4B-2g7(yOKu74kxe~!ev_aXW} z-~F=w_V>SE{J8%0r}c|h=Rf?h{{GeaZSYli z8ZGG=#d|#B*=4YHdbDsU=T%)<&aWbE ztYi{o^mz9&nbX$}rQbvcXoyj;7kdrBk}e4+%K$80hF}8}YIb2O4_$&mE8$Z7VnMkW z7*-;W%ysZE@1en4GnjcVXk##sE(Bi$Z32eS1p4vl4MEu5&-YLXp3J$+IMlUaUdfh0_GSPEM~g{GX%s|sLNhvqel192zrUYfaT&a`LX{7tUTiq|=t#_9 zV0Yuq{!~7*)2%mcFhlJ6mIJ^k47)gztrz5W%BDV?LP2l@)qInS2`o%CKJkl^@#V?J m|3Au$HhV|~m24^Wg8gl!H#f<@g^!~hgLxY@2B>owB*A~18(sbY diff --git a/codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_read.cpython-38.pyc b/codes/news_recsys/news_rec_server/dao/entity/__pycache__/user_read.cpython-38.pyc deleted file mode 100644 index ecfc921bd99daea75c5b78d22a112502576bf369..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1350 zcmY*ZJ98U15C-nWQd~1`PrqAo`NSos7fTriQ8L z%%o9gJ1M13Y7%!T{7=wkMv4AHs_cT2%-{yg#qDzU`*s)icsR@mtkZ|5;w&WOFFb6n z2o85)w+4hHl2$}hMs>gf9kNhIETZ^6sA8S4#OqiAq+4Z9O2eC(~C%rfO>yva#&%kx6y|dC4Sl$^EA4bnTKoYsz}=dJk)>=E}JA z8)4O>QmYAdW2uTt7-7rjia!ypnglMpSI#eeO^mUq+v!?mxKToKE1pz}*TVmO@a>{~ zR_!VCTp4hS_Pe?$F#qPl7%= zkkN{Qo`XIFkNAkH(ec&^k;zd2d_8-%E&DQiMc7Aj0Q_^8J_fMg0oLwwNci{b)AL{c zy!h?sfBtxX{`TF)ukX*_ygNVoJ-Y+z5zrkRm{JR~;6JK`8;Z_0WnCDhRc$XM1OQ&C zg=!r(HCAi{!GSH8YF~7fEhxBLtpF65334BHdmDr$D{@Rv$WPP;KZM88lAciPD=%`iIzDw4=XT;osK>vO@ zd)jCgu^TK?A25h{! zg95v6#k| g1$k`n^K^ZO(y!JP{~B*cI|6YF7>yLa#JAA9dE$+}reY^BEjOf@#PCZ<2q7GhIdD_euQT{>~uY`b>v zUKPV!Ls@-N+QK{)O4;}*zLdoWUj+LQ>VweYqL2r{Kf!`fpY)tf33y>XoEc`$o%8*k z`F`fsjQ5V~S`5FVweM;N=I(p{WWke$-W(G~7r4^Q+=lqKI!jF)7FSX`F?m+r(aV(w zf~{CRTV&TEbrL6Wxz3F*m~gsE7gD#CO`I-E-NZ`_%GHIF?EQ_@6Nd1LEcJ!o^SQYW zT*Fx)0^v5Z@zeG|`bKd#2bPAwR%ertTios{x1LWU5%nSwaA$v5x%$cjA<`{ZZ#AK_+jrPkt&TC!Yi@aok@Cz05I6>G) zI7yf%oFbe?$U(j+pX5&j=V;|E!g<04IbXO0OY&Y}gqkHZK+&)(AOr_F3ePF>&q4&y z|GQ9!5+!DU4Hr_N2Dr&mhE)5XjwImHBmHp3)KY+al zH^i6$HIv6nlQ1ch(tLinx0f7%Q6o6;;cf9NU@vcqlx>1yZP)s4<&_G&9X+s;Rnnq0tBrQ-Abxh>Aag+6y zLzz_$xuPc0TsT!5PE|z@mBj&0z^PI><%E3jfeUve4&3_QCUU5y{r1hC6$BdhD z+Z-zWA;_wg+QQhSNj}cU4re?ke=|#)S?w0RK)!sMxPl3{OcGCcsmHBd%5_-z!WT}< zfW5Vi{ykTa6f&?+VUe_NY9ubSEO@p~NfJI^fARf*3n+gz90`g+uC$369Sqb*O{BXmrDZuj-53MKM#{{D$M2ym0m7XYGK)K@Hu)M#GS z)S_a^ol==v-O#x~>7;DCv&JQOGn(!G>VBtyvMYtv>ndl$%$%8Ur(7L4W9W^mi506n z2Q9Kx5jso?+Bsa|`w_ZiX%3%)VJo(|#cgQ>!<$bNN0`C^XRdHlhv&hdx$jftUohwj z532nGgWkYkAu|ZZxSJUaM35GQ$GroCD)&&A*ynuv{cT{6rQB)H2Kt3{QsXR89HO#KT9`h z1vfV+oWcMCqWNlGwKSjRsik%2kHpkD2kD;9@2;(PqG`EzbcGJfGv$h^SW0)P{4=#* zhZiE)0fr}!S8DXUoU2@1$g-=LZ35;Ns?L6^IFgx-_SZyV-2{>c{C&cu5|QSNS~7R 0: # 根据news_id获取新闻的具体内容,并返回一个列表,列表中的元素是按照顺序展示的新闻信息字典 @@ -190,13 +190,11 @@ class OnlineServer(object): self.reclist_redis_db.zunionstore(hot_list_user_key, ["hot_list"]) # 一页默认10个item, 但这里候选20条,因为有可能有的在推荐页曝光过 - article_num = 50 + article_num = 200 # 返回的是一个news_id列表 zrevrange排序分值从大到小 candiate_id_list = self.reclist_redis_db.zrevrange(hot_list_user_key, 0, article_num-1) - print("candiate_id_list", candiate_id_list) - if len(candiate_id_list) > 0: # 根据news_id获取新闻的具体内容,并返回一个列表,列表中的元素是按照顺序展示的新闻信息字典 news_info_list = [] @@ -229,8 +227,6 @@ class OnlineServer(object): f.write(news_id + "\n") print("there are not news detail info for {}".format(news_id)) continue - # news_info_str = news_info_str.replace("'", '"' ) # 将单引号都替换成双引号 - # news_info_dict = json.loads(news_info_str) # 需要确认一下前端接收的json,key需要是单引号还是双引号 news_info_list.append(news_info_dict) news_expose_list.add(news_id) @@ -247,8 +243,6 @@ class OnlineServer(object): # 曝光重新落表 self._save_user_exposure(user_id,news_expose_list) - #print(news_expose_list, len(news_expose_list)) - # print(news_info_list) return news_info_list else: #TODO 临时这么做,这么做不太好 @@ -262,22 +256,14 @@ class OnlineServer(object): def get_news_detail(self, news_id): """获取新闻展示的详细信息 """ - # print(1111) news_info_str = self.static_news_info_redis_db.get("static_news_detail:" + news_id) - # print(222) - # print(news_info_str) news_info_str = news_info_str.replace('\'', '\"' ) # 将单引号都替换成双引号 - # print(333) - # print(news_info_str) news_info_dit = json.loads(news_info_str) - # print(444) - # print("news_info_dit:", news_info_dit) news_dynamic_info_str = self.dynamic_news_info_redis_db.get("dynamic_news_detail:" + news_id) news_dynamic_info_str = news_dynamic_info_str.replace("'", '"' ) # 将单引号都替换成双引号 news_dynamic_info_dit = json.loads(news_dynamic_info_str) - # print("news_info_dit:", news_dynamic_info_dit) for k in news_dynamic_info_dit.keys(): news_info_dit[k] = news_dynamic_info_dit[k] @@ -298,9 +284,7 @@ class OnlineServer(object): news_dynamic_info_dict[action_type[0]] -=1 else: news_dynamic_info_dict["read_num"] +=1 - # print("update",news_dynamic_info_dict) news_dynamic_info_str = json.dumps(news_dynamic_info_dict) - # print("update",news_dynamic_info_str) news_dynamic_info_str = news_dynamic_info_str.replace('"', "'" ) res = self.dynamic_news_info_redis_db.set("dynamic_news_detail:" + news_id, news_dynamic_info_str) return res diff --git a/codes/news_recsys/news_rec_server/requirements.txt b/codes/news_recsys/news_rec_server/requirements.txt index cd178d30..38b789c6 100644 --- a/codes/news_recsys/news_rec_server/requirements.txt +++ b/codes/news_recsys/news_rec_server/requirements.txt @@ -3,6 +3,7 @@ attrs==21.2.0 Automat==20.2.0 certifi==2021.10.8 cffi==1.15.0 +charset-normalizer==2.0.8 click==8.0.3 constantly==15.1.0 cryptography==35.0.0 @@ -37,15 +38,19 @@ PyDispatcher==2.0.5 pymongo==3.12.1 PyMySQL==1.0.2 pyOpenSSL==21.0.0 +pysnowflake==0.1.3 queuelib==1.6.2 redis==3.5.3 +requests==2.26.0 Scrapy==2.5.1 selenium==4.0.0 service-identity==21.1.0 six==1.16.0 sniffio==1.2.0 +snowflake==0.0.3 sortedcontainers==2.4.0 SQLAlchemy==1.4.26 +tornado==6.1 trio==0.19.0 trio-websocket==0.9.2 Twisted==21.7.0 diff --git a/codes/news_recsys/news_rec_server/scheduler/crawl_news.sh b/codes/news_recsys/news_rec_server/scheduler/crawl_news.sh index 0da80e1f..3eb94c0a 100755 --- a/codes/news_recsys/news_rec_server/scheduler/crawl_news.sh +++ b/codes/news_recsys/news_rec_server/scheduler/crawl_news.sh @@ -1,9 +1,9 @@ #!/bin/bash - -# 这个脚本每天凌晨2点30会自动跑 -# 设置python环境 +# python 环境需要换成自己的虚拟环境中的Python python=/home/recsys/miniconda3/envs/news_rec_py3/bin/python -news_recsys_path="/home/recsys/news_rec_server" +home_path=$HOME + +news_recsys_path=${home_path}"/news_rec_server" # 得跳转到这个目录才能执行下面爬虫的命令 cd ${news_recsys_path}/materials/news_scrapy diff --git a/codes/news_recsys/news_rec_server/scheduler/offline_material_and_user_process.sh b/codes/news_recsys/news_rec_server/scheduler/offline_material_and_user_process.sh index d1cda05c..43620f95 100755 --- a/codes/news_recsys/news_rec_server/scheduler/offline_material_and_user_process.sh +++ b/codes/news_recsys/news_rec_server/scheduler/offline_material_and_user_process.sh @@ -1,7 +1,8 @@ #!/bin/bash - +# python 环境需要换成自己的虚拟环境中的Python python=/home/recsys/miniconda3/envs/news_rec_py3/bin/python -news_recsys_path="/home/recsys/news_rec_server" +home_path=$HOME +news_recsys_path=${home_path}"/news_rec_server" echo "$(date -d today +%Y-%m-%d-%H-%M-%S)" diff --git a/codes/news_recsys/news_rec_server/scheduler/run_offline.sh b/codes/news_recsys/news_rec_server/scheduler/run_offline.sh index 0793542e..a55d0443 100755 --- a/codes/news_recsys/news_rec_server/scheduler/run_offline.sh +++ b/codes/news_recsys/news_rec_server/scheduler/run_offline.sh @@ -1,7 +1,9 @@ #!/bin/bash +# python 环境需要换成自己的虚拟环境中的Python python=/home/recsys/miniconda3/envs/news_rec_py3/bin/python -news_recsys_path="/home/recsys/news_rec_server" +home_path=$HOME +news_recsys_path=${home_path}"/news_rec_server" cd ${news_recsys_path}/recprocess