From 8fd57280b741642aa6246e5102c08d4ea9050edc Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 20 Nov 2023 15:18:46 +0100 Subject: [PATCH] Testing - Improve PDF text change detection tests (#1992) --- changedetectionio/tests/test2.pdf | Bin 0 -> 7328 bytes changedetectionio/tests/test_pdf.py | 47 ++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 6 deletions(-) create mode 100644 changedetectionio/tests/test2.pdf diff --git a/changedetectionio/tests/test2.pdf b/changedetectionio/tests/test2.pdf new file mode 100644 index 0000000000000000000000000000000000000000..083ad8ad81e3e6097156f63bdeba4d7e58961901 GIT binary patch literal 7328 zcmaiZ2{@GP`*zu4s0byLhq7gv%`gbr$(mh+iNV;0nIT(AvP4OwWM9gbEwY7d31!J1 z$}W{%3fcb8P;d42``&NHVaD~G*L`j0eIJhV=G9eF5rK<}L3q2SKaE1*01Uuibby>b z4OJvyF(fjQj@lF}4^2?uU~g1gcBG z+qhd}!KnI^c!D#==@$s~3#5W`B7vzvRh+;al(5!#8!QB> ziFL6f*#mG%l&ma3B!NwHhIqzb*6x7R!5DnojVDYCG~$e;*(m$87UgvXSNEn;%Ga)K zTlLP#UoPN&ckKQ|HpBf--0H18!OKRE_3l4v{BkT2_Wh-|g2x0aQkqlLHFi`{(`W$A zgZR`^8tHs?qO4Xk=y>RU>+NfN3QrodPkCSlL-qT#IFe1wx^FH*r2I6}RL5aMDLjR@ zp5C{Z@=!@Nag8n~MwRBkhmr@WKaS}{Zr!)}#4mVZS+aqqBkaZ?-I1;w`wVL1@eRB0 z@}Kv!sHxkMd-S84AKgOyYJ`E8D;9w2V(h^B^{_;|JHZ-D1VGM!{c&l|)HOiUVjdxTqvtQW6a=sR(*vT}VWLJT^TDRG&m}w)71fmh7|5gl1i)ow!Ae1-fD6b#*v?A~ zCip8Ya-jdpN(tvd&IkQZK09^j{kms76K;F@S z#6NL%I`CH*B>J~7@Si7;*=|z6&)j}Y3jRN6NFJ^M0Sl5*+ua!)6Aai{<&R1Iw%0Xr z&Nz@EX1}!$OyQ4i!Ue%BwK2{htN#t#>DnJ)nBb2j|FFUZcL)AEUC3nq(ZgLr!FMSC z8-(1e|IuY~b$WO_*j$aDW7{P%`GGdL%QUcFWSagm0y~WY*`=;T#sKgi8|mlS2mkF{ z+vTDnhJoC*pJ;XAw4beGoWK$d@s=|fxS(t~`i=f|yOD}$H>wW~=jcg1pB?$5Kz z5@HoZ0$X#4HH7#NNWT^?WYUA8c}bt#;yzQK+<6P1q$((EPeCpF8G$lse)yU{l577S zE1f9Pz`Mx)NA{7TOvBn6spSVnE|tnS7ElOC?|JwzwR4RZ`EXwwbAraK&xdvkBM8l@ zJ+aT-w^9w26X@q|?p@fXsaZ8xtn3fucDs-tRrR1-_F)XwOB&qa#rlxyv@)BudS(@l z;k`8hC(WMPH^1w@qc zGDeN^QQ#`~O`Ah(@}K9@?TgFo)Qs3@4_;Luu$G+TSXhrahHiS8 zV)@?AO3IMIrCRNx0MqE6r5ha66$zD}W=+Ga>2}E+*bf(6<5zvVAt+3M%Erit4yO?C5 z1EZWJ?iS=jt|rXO4nI!*nyma`M!81)j)V17^6H)Q?_nGtYAH;pG^~#1oP2oZerw>` zNizlwlrq}BHHrhYu9ium2`f7H1 zE-_Llc(KxZDC4-yOYeT>%cF9Rm5f{xxU+eKjn@hhKCQN_Hjyy?`)apNz?+K@U&V9# zyysFZJ>;u=T%N4ceZBEka-S+BIkKJjb|X1Zx6*QRzA!}z)lpGSU{QCp3f{8ae`UYWM`_}r zpzx0>YGyI`GDd&!@}ikF}+hfRVQq2 z^-`4FL%2O)Xwi&4;`vxCB4;j*_9|?!)@;pa|G{jtwM*R;jCrdp8Qig{Z(Ijk4;0FG zUT}>Zf(H1w@yUivnLOeY$$uf9d1nv3eSG*AqXs%ukbUO4(6^_`&p-!S1fz^zQO_9K zpNJzasE(yI6&>?_v1k2Z)`sYnBMs(Cg~z@ZrB7vLx-#T5&&Bc9@->aSiWT2&j=~7d zJe07eJ9?Ej=4=(zAMyN2M$Kb`u{D((-gPN4!W<%0m{sV)w21uzpO}zr5CM z{&r{BN+1|pX}Es(SVS+ZZz$u-T8wN9b@A0@b4mp9kY^Lcaz=oxyIPXxEu*g)a(i#J zq<3BM*_KVby$F*N;BPxik3i6Cv7Fq4^5MJiS}h=`CAlj!uipK9?_{RN$qRLrUKD{f zd4w4jPsNe1HXB(?e!`88#*f!f{(Z5NkBygBdmJjCsXXsws?VPoWzB!b6d>oD)Z|wd zKFmK>4cX8hsXjT5<-cMUHal^#qQ=8=g$`f3v9joyl^bE1-B6;ip)%6vM?Y>!zhTC< zWjNBXZ$Zf45BGxJGH8LmZ}BzbY0EH6+70s&yD9Sxr2mw;XUSnXnenkC$mybvy7saK zlH`@h@Z1+q)5it<`Q^mFL{Z3zkJI^c(VgD9p1?I9u+16LNOe57;o#%b8;bs1w7%7x z!_2FBHGYRYZ7R6%Or9nq-2U^lPJBMC+P;IKT6okFpFjRs%M>0pX685%aO?OL7}r7X zNtJ`o1sBeUe0O-cut~v@=};E#$7i-q_556j5B(ySv&npo9G&k4&x(cH6f3m8t^WBC z-$^WoAMa7il2I;?!8NY$qb}0Vp9&>b8-!PkEv`^pg7}`ZdQZ`&;YH!k`t)$$B0XufD+!{N9(nQ<~yWS=p{L(_kmqlED zrSgbAg#>eTIg^A}K~PQR(EuI2r#ez@Y1EI^buWY%Qk{?9msBuL<3CNE$GqzKC_zrN zpfh|MaLuh59zd?*i3@Ol$Kx+URbP7r)~LRIa+-pR$;7t4hr$@*t2PokY4p5~%XpQ| zv;EcU+_Fkjiw`we3$Lu2ebo0L=7s2BXjVf5@8;?lr|_rXFe=%IoAlJ8YA?s8(bY_0 zRBg;A0^on?8#sqE09m|eS4Dg zl&e~0)Ip{gDY||9flIYfu)u5<|K^7=!mf;{gOG-Yx|%0X!P%HK{ZFSbjgM1!Rx*!Y zj_h@gcbIb9%a&1YCUlTJ5$QqfH z-`T%WO|0F1O1a%CWG=nN8g{-sW6e!CM^McG0_>uwgXx&IBL zQ>KTKVfpZZ{O4)SPek6{rhd&WUUadQ^g7hBdrxs%)uSRA$^OK9;vwHF-gfkNOs>>-9z0M*c~{qWJJ!8X*<>=;S^cWmpz&BCOi7%6@u!rpA91pW`+A@fKI!lg82( zhs1N;F=p;IHaE@AdKHi?J~d0M>&$sOT^ik-pK&_PvbCDc9dP-{6a5T6d`DYvu%O;S zWp;`iL%NR9mPHUl?pFVvOryA1S2O#o^^jL(y2WX{ruB0?fhVLy9~+DEdo>{?-WrDR z;LYhuS4DjIWaSJ=p~RN@37?YT=qldr+j#Lg!M9R;&Ld~UZ%Sq{a4@d4#VkYRq^@9A z1@V1 z8OuDp{f7He>Pf14`x+zk(eI};-Wsi91|J>#;Cy6jT1e<|_9>yhaXQ?L&MXIU&bt0V zSbzVU_3M*SMM((`GMB!k%avab3)^U3dU*zzjmXyIh_0bYH&}miNgjE zeJbL8W6HPhy$WgZy_7x5&L4ZiUyChGJQT`4!qulWp%-~9C{aF|C|&Kn{`K95Na|p_ z5RNfnqTfn0Dyh=z^5k*DSk#67?V*CFn^YF;^j1b6SS>mG*Unazv_qMhZB+5;+6ym( zOit8Y)cTkSp%pV!r7~JSF^RjrJ$faW=0(uei^uZOoadMYw9LHE89*(ZKB=E8{#y8< zJUruo`R&rZ+BXit#8KA@U7FNB9gYal(|7&;tg%328&hu{3AuJWL@QDIS zdM@9bS5q9zOND(Fci5lto!#FqTfSYK=N?+*Xk7b_e&A!LplRE~EzOaAbVH2RlqO~^ z?X}#>4iRpJfr7pWO$3M2=PafpgVL(@2_^iNGbiIC*+kg$G7Y>7S&YRfJz}FtqPDza z6k*G26W7{a#ImHuJl{ZeR&oR72d4vNt+M9zANw5X`5r@3abvGA3GVAF>Xht0W7~&2qlPo zbqub^G7g}w4R0Ta)eL5^Ropod= zq|zzpbFq%Upi`(+!(5Y{%oaDQkK-l#f}&huQHtHcp{L-USaL)D0_2Qhb}9Ce+CI}463 z+gVtC`@;Ts?*!pgwdU*Y+8cTlX}77AuQkr)y%vPVmHUka-J0c^5)+R1TNvuOZ3(xn zTIlXSm_C26>l^RMcTq^(#it@9)wQ{oA3vsdcb9uP(x{g-iLgSq1hc2Awb!nj?=iAn zu{Aw;w}lTm8`yHVs0GVBRd~n-#_k!>a!VzE<$7eaK|oC`=EEWObN-5l(|Rt7ANASd zE@lH3?IZ3@oRCs^sw?d6x?vXJFjIw!t=1vUw+^>YSM&|H0K;a^@aB7JAvU9W0^jc7 z8toy-;PX7WaK`%Y8t}^M&S6I9n?>pBik`8NwrcS30^6fSTI0gZxm`EQ)7OQ)dU!}VDs-KGt=Q?XRj|m^rKeF@EvXdxLJh$V9`pzA(kz|p6p*buT^ny_As{l+@BxuU zVcg8Re=R;HtL z$|^p9YVLoCnDD;@7P8i;ge6)NaIT=*y{kQvl^*~qzw;7O!a87#-1RXoL@m4v9t1dH z>_B}H0+lCPgTfOai9m}Y5D0N`2$Z}I*(8EMqD0Y1NwgSQ6~Q_i0Vw1S1_go?F|KM@ zoSi)hK%&s1WDtl_2elWRwY-a+6BYnfN^(*PJ5r|z3Iw4fkrF6C1SugVih{!>5P$?+ zLR3NmDJBLgPhfZ?0)~WxiWS)nR8z1BU`K5sYpFkFv%lm!GIrPFSM|V7dqB}h0Yk)+ z0sqe?=(}Ge?TEmjVo3&)B_sq`8>lwnRn?WWFs`7^YXkNINAlV+>w6KwM*LKg^ssg~ zB8lJy2*`sx_B}F^ZR`j}JMnb%*h=G@%OPOe-xgm*7l%AGLm z3&XqAFE{8ky0IH`U`>-96-fNql#``rl$m@l1=r7`Wh;W7%-nf;guvr$r2hn{KdGCj zxA-7blC}Af+Dd)Xm5TOz>rD4da=uCiHqz~N?hv@uxc;EoDSLLEdYkLSX&4TK)@ioS~&SPAdW&IQGhM*kBuyN?mTh%$A&_I#r(^LK!F1DpEhv` z*x$YgG?>MoIGBXQU+cle5TL~SCk_t&Tk>xkOcD*=CH#qlp%H(@fWyVXhWv>GFTwuu z1#iXviUCKW|Mo>8{Ef&3qlsPSF^ literal 0 HcmV?d00001 diff --git a/changedetectionio/tests/test_pdf.py b/changedetectionio/tests/test_pdf.py index fcbce8d5..d8694a57 100644 --- a/changedetectionio/tests/test_pdf.py +++ b/changedetectionio/tests/test_pdf.py @@ -2,9 +2,8 @@ import time from flask import url_for -from .util import set_original_response, set_modified_response, live_server_setup +from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks -sleep_time_for_fetch_thread = 3 # `subtractive_selectors` should still work in `source:` type requests def test_fetch_pdf(client, live_server): @@ -22,7 +21,9 @@ def test_fetch_pdf(client, live_server): assert b"1 Imported" in res.data - time.sleep(sleep_time_for_fetch_thread) + + wait_for_all_checks(client) + res = client.get( url_for("preview_page", uuid="first"), follow_redirects=True @@ -33,8 +34,42 @@ def test_fetch_pdf(client, live_server): # So we know if the file changes in other ways import hashlib - md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper() + original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper() # We should have one - assert len(md5) >0 + assert len(original_md5) >0 # And it's going to be in the document - assert b'Document checksum - '+bytes(str(md5).encode('utf-8')) in res.data \ No newline at end of file + assert b'Document checksum - '+bytes(str(original_md5).encode('utf-8')) in res.data + + + shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf") + changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper() + res = client.get(url_for("form_watch_checknow"), follow_redirects=True) + assert b'1 watches queued for rechecking.' in res.data + + wait_for_all_checks(client) + + # Now something should be ready, indicated by having a 'unviewed' class + res = client.get(url_for("index")) + assert b'unviewed' in res.data + + # The original checksum should be not be here anymore (cdio adds it to the bottom of the text) + + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + + assert original_md5.encode('utf-8') not in res.data + assert changed_md5.encode('utf-8') in res.data + + + res = client.get( + url_for("diff_history_page", uuid="first"), + follow_redirects=True + ) + + assert original_md5.encode('utf-8') in res.data + assert changed_md5.encode('utf-8') in res.data + + assert b'here is a change' in res.data + \ No newline at end of file