From 9f1584c26678e44b9d3fb8a4352fe60da655f6b2 Mon Sep 17 00:00:00 2001 From: Reyaansh Sinha Date: Sat, 30 May 2026 15:02:22 -0400 Subject: [PATCH 1/2] Added stuff to README --- ...t_core_pipeline.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 42764 bytes .../test_core_pipeline.cpython-312.pyc | Bin 0 -> 3458 bytes .../test_halgorithem.cpython-312.pyc | Bin 0 -> 11890 bytes .../test_voting.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 15844 bytes tests/__pycache__/test_voting.cpython-312.pyc | Bin 0 -> 3337 bytes 5 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/__pycache__/test_core_pipeline.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/__pycache__/test_core_pipeline.cpython-312.pyc create mode 100644 tests/__pycache__/test_halgorithem.cpython-312.pyc create mode 100644 tests/__pycache__/test_voting.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/__pycache__/test_voting.cpython-312.pyc diff --git a/tests/__pycache__/test_core_pipeline.cpython-312-pytest-9.0.3.pyc b/tests/__pycache__/test_core_pipeline.cpython-312-pytest-9.0.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38ac16bff29c13757a99883b3f42539ea0fe0c0f GIT binary patch literal 42764 zcmeHw3v?XUdEU)rzgARnoLF$rIab zd)n{6cjn&N#ejffDaYs7!{v{A@7#Om&YhY6egFIKD=RBpI6mLFJIgXyThzZ zO;z?+byu;pH&xwV(_PbF+g;1@zEoX*eRn-e`;l(wZeZy^s;}y_Ds{sb&4kyO;N`=w8tu>5lZT>|WWws(V%c>h9J3Yr5C4 zI+dxl{p-5dv2;~xegB5;4J=)qiuP~p-iUO~%dYNCS}ouMS{>kKtsbydYXEH18UeRx zO@LdqMS$D1X29**V!$0*3t+pp1aPOe6mXZe3~;x$9I!)M0k}tt06wU#1l+5w0^Fyq z2HdZ$0X(3s1$;)6pJ2DL=07|@4P ziP%6YSqvMAfplV^hviVa)=2gzQ*k|+z7#VOalNN6>Mm9f4C?*yRB|K{8y-lejp9=J z6VnqzdV<==)5*bsSbx&!kEhW`u_1l1Ct(=LfnF-YpM;T) zX@fn({pbKiHD(3$Y77ky7zrvnX%4Ql|8zpr5<13Gep#A)*Ld2VO_3RbCiuQp;>2& zwa5AR6Pl=0c~VT(ktE$h*ytJ56ES0OSnr|XdxzpWyN|l3`}9OyJ3cs=>b#if8OD%% ztdEBN2Jqj5|995;>BC9m^Wlf4+;D%*%#FG-uCr9D?9e=-hLZMLCo_sDb!+OVC*#g| z&TVkHM%9dJ{CyhN;m29JcG;nMGwK;d^HG~K?ysr*wB}ETtcqvDR!aW;nwdQBG8WtV zuaY&aRNCy(<+8@8+gsFF_2l2#Ed~_LuLTx7qQ?7=NY0jSujw!nSra1&*m`xUE1umv z^~4j@f^VoJK0GC_R-uJ5-lW2EzI44c7q*@ntTOpGAi^VD1DEUOFbFR;LRL93ot{f3u4r;t{o)n-EGQwuNnsfDectiR?{ z^IK02e*^emfmydd!4~<1tkkGmt1J83$87WG{A}oJuP^=DZEgNBo{jth8s6}HB42=* z%d)RvsWo%*?}ERA#(&UP(Dbvf;H(qw`UqdaC0BG&S5YM{=DJd>=qIKoHW2Sm+^ad3 z?2kv1MkF~9IiApmTR$N3d0n~Z-nZ|%GO}guhM`N_qejI`W+JjLLa8lLV*~x&8a1L7 zqmg~QbZ68kdQypjqK`8c*VUp2bqQ=Gu${opdsT76Na$(S(QaxobZNIv%y3anVKh-? zeWHsU3!$R zixIRF*i}?9)GZ^`wh?aKh9Oe%wxT~Z*b`4DG>mYriJ@!ji4^1TN+#yIM_y0g$ZmEB z^Np_JKlaTI&UdfXdNp0q55|%>Kz$A6DjpiTlqRk+t`}=#J@HhEai~3m{ounA#V}6C zP7fzj>EwV>^wF)SjAC=l=o=hPX{>6jH#K-V&flbBFlNNhB#6f?Rz1ufFqqs z_`Z1C)A51cV~p@naww5X4kTKKE*1B(C&R=J_AO!R zOy)f%gbRb7g`SSFw~{0ZYXPR8gt#MS$*w%p=RgU(f2BwrYo1{Dwk(h zY|U408$0}dsA~ND7aq*2ORla27=Q5U%3Ns4v`W7zp63|>bLx^;w)0$GCdKJVG&(O% zTgd`-#Y<7)>Pk^vB+%DPsP@Xr%N<#@b-ZIbv@{o5iexslbUL&u7g~ihU_P`4iEBP~ zX3aHUF4Q`$(r=2YqJZb}tPgf#lJ_Cgt9TVwzDA_2mb^f~oZ2dewULIkEEigqRX0v7 z2FQk%O)SoZHcqSbn=%V{F0WE`c4Cs95b24B$=P|bnnadEH%P_UE&1L1s zHkU$Xh7>5Ds?+?4nwZPVdFuY-%w?51m-L=4WG<`OT-N+-E@!`&;4W*s^c8r!?=802 zNt06|w(_y~P<)`ZSaI;luBT5teCX)Gr}dSndtDi+Gg(QCe*`aA^q)`YTCyj7-L0=j zEykafaExI6^j4hd{THv(@xcLNR`>4%BPF`q&)HGxuA*mf;7n2@ja~0{cKq!DsG5|$ zs9$ScelKH1Q9T;eNh751B(RIXZUP+y_7HfGz+M9T0HQwq6s5Zf90ag)5nFkNGY%y@ z$xZ_*3zKcoyQtZIN$sIz1UE1MVo$SRk@)z4#+avM5A=wF-dh?(J%&z5Ff$_H-a+qM zR%KVWXLp^*x17wWi^d)u?|!cd1a#H-t}7`J(P@=_Q#{Wz0_Lcc=khXHP*+_^i8>;I z$}@{WrI(KHxmpi;J+0DjisyMoz#NtGTwW#%>e8$AqK-(Q@=UYYv0B!#Dr)h>+#Q$K zMEyy1seT+Y)r0@yAc{ix>(1{38stkNX9%V@xaLa-w<}^`E+a+5Jkr3QZO?j-HH{(Mw;(cSSMW_s7?ul|H6@ zL@OJXU%2+suEBHAK1w<`udYwfR?;@wM(srBB$oe4*GdXq%RF}^C^xi{T)*XKT1mgj z0|XYV-x3-%>s_*GB|Xq<`9{5$@J=cCKdN4EMZ;Y>ef|0}fT){cV*Lb5-)ni~;gd%X zMlQsSNDp-C5MBu-Z|&H*qg9Vn8$!jvg(nFYI*8X0e!1+Agp~bTVaOo#6gy1@3<%fM zn((I-c}cMMGDQRF^^-aY+=S3c%-+VZI>X}n11P&!B5y-D*SFB9LOL;s#mbV475!#* z!ALr&pGG&#f}n$Qa_ODat4f2i{ z8d!_WlaR{{{Vcj#Lw$Y^0QfoFFySkN*N=6+7wDJ?9Qx4fsq~F?epulO*G`9)=R(V~ z>QjJMhx4JeNPeq59|d@8$Fzm(fnB2Jk?RK2XMxV-}P88LjnpWxe4hmTb z&kC4RpBiuHxx7pksP=MEi6_wS)nQT9O0ueJdGRE>a#=HEaq%BbMxQc>ie7K(Y*0(^ ztdJbe1KXD3S?f&~a)X<+iW%4Wg>h%#;D7^FY9Vo;jn5>4TXtk52J)uqS+nD;gUm38 zbm7G33Jh4n5j$O7)C8{dYCa(~z)JHmN)2wNy$zBLL#tqx47U~L!h_7|;+&_)8z&Q1L zJD%=KM4o`HHDbj3hf;|M@tbidp7cl`=tMly4;5Qv@JwXu795dYJ>GA$-V1Hr5;>Pl z4N?|LWOs7ly2)}w=Sf7xqyfbWp`g;h#0w>>{w!WBj`N9oDh8ki>rD>CQ`ED#PKjth z9h<7<9Z|#Vj0nh%5{Uq6XQolT9@L4Gvk?Zyu9va!f_q2x7il2=o<3;j5{7<+8XqL^ z0)bD{Nt?!TyPctL*AL^g6OGu;XaqC(TN7d0`-xV9J|tcoPQua;Q#}-R7oZEor9X?# zNT@LunGAe1yYVymmN;?mk6rP<7i!L`>&Ew8eJmSVH?7idisyMIPZ^#R5Dl(ACQe&P zaT*DATF0^f0rQeiUz;k()Sy%i z6QL&zla4GUOga?rIT!CuL@p$fy?sVxTV!Z(ctA5+M{2jUc5KL6I17#xts~3h3^}3~Ho`sKxlr4*O1~+yfamfo$J3K4>hq)>7Sy);V2P!(xK63BCT}6l!*~Pm zk*01Cw2SEm=i`vPj6(vCVb`28(wJ*rVveW@iNMI< z%RLwMG+J|n*pC#ns`N^ivvGoNi~5QIq4{JyTZXDL#%+{Rj58x?gG=vRbl)Q};v9aM zu`{W7uM>Nw{{s3VeXmi$xwGm`Gl2s$f&F*9o*LiS5t7>*t{h>8!L|t?OrWu46A+ZEz>IfrpyAK%d;F$Pf~rJ zv_n*(9*=!|Y+ga#HXGFv+5wVv-T5je$}Zqb)TAkzn-r-xyq2S+?VLyR;k@%XFfv+D ztB~z_g<_77H&m^PscMB16G~deNzn==YYmjFP~vJ8y+C^qYNeZJ%BJ88;4aDzI2e-S zSD4z^1Yu%A+2N!*kV*zy-%iL`+>2Ie3c_BkN~^ZHG?J?UJ8V5UscVHuU54gOmqwvD zRG>Jl=^CjRh#T?N({Ur&GZGlQFaQPN=}SeGN{gP;aqYUQ@5g2JhX_0jP*ll3nb*}J zB^Fh(`9O*S*OS)Kl-!7lYVY8nHs`k=4d^x0UmbzZ6Oa~{G82oRdwzJC7X%m4oIZDY z3Z}A-fA8(=d)LYNIR@Wqh`H(5891{fbVyy~8e~9Wmm=#-LW2iPI(Y_;F@}Bx{SZzu zeiH!vP-V;e;YAbSnuC<%;p-o>oe6@YN^VP?`@YuxRthy^3+H})*?IKTIyO;~X+Csl6cIw(ip3Acw zPfx15tYTikVL{#Xqsh=Frfd77`h(#)bZzg?l8rRC{gb;P{5D1@}_jHL{}X z;gb(XQt{ydXj-%gysRRG1|nb=3@GAT?=9_1r-zI^ZEY7WTxeyOuXpf#TjCR=Or2LO7{u;Z^`x-KgF z(Ovtz-NjxU#)6XcC3L%=Y1+_hv_hx%p8NI%X<-SOxefhuR5%wxEmoN~W_F0h9|3Dw ziA@8B7mXWA7&pbNBE~EnyNcmhb8U*AfD@Yr^9Q?qYrOjF=;&$uH(Vy~w0tHIooU#1 zt8VShrb6BByMC4NP+{;;+`H;(JQs>&)uY)^WID7y7g|s0O-EVDdL(k8qthz=rl<~; z@NAwpVW%e9DPB%#s}3*W5Y538m0Yu(U9%0GTUKqq3EdS6+HS7Sgr8&u=>h4CgPJUXR1+%ret=`C`@S&!^i^MsNAL-l_GhS zpLjPaAruQYDi3p`@+7$%mAYUzDnZoBxu8)6sG_1v{}sHGBlRcAfijZThYhlf4nd96 z+WX?Zis@e;`nv<1d!cu?=w&OF^kKZtU(9#dvmsX}}F5_gp`ma)TCZ{}4DWPR% zB%EzPJ$(?2Jb)e1dZY zjD(Oef>u97P@Nc!QEvw0u^cgsqI~zxIuL)Yg1KQ}5mLtf#n~3MI63i`m=*<_;k48U z%ZY#?=zwWa)}%ME?2t8KS`5!qS15(xmM0cX{g@*aU-7&n6*b8%5GFvrl%?DRxWu|a z+r2Ec%H-e7TqZy-OnvIv<+7%g(rOty8_WdHMyxXVcN7b?%;z7rN-T+*r(T8i?0*8DQoZ?VHrO{1c|3vDu$NkOdU_+vkETofT*@k4SBu`{fR?z( z1kPe5v`>Sn^AP?>oW)85wqzE(49tvQLU&B;#G;L_P@0LTUq$M=yBOd#6ME5`9vq6D zvsgm5B4iGCnZ$YSf+(3n*fWb>FsWE>OL{Oi5?pOPT}>l!fxtxq8wo&&BR8GnDZDn@b zscfRZP&Y94`1|2ySI=GB{bt|W@k01G$aARX)%}#Hi3&FvLep5WpGXmyN z3K~rn@-kVVx}pwGpuelniJDf@UI`t{;M#j&{f%FSig7b6$ktK=GQ(gj_QeG#;L8GN zaRC?!v53H2P$1R}&;kWn)k=!fNW7zNp3OYaJH}LpugqeE?LL^2aR7;TL3cqP29CKD zewyj;fpxl08Vip`^0X(T4l6|N=as8{@Uet%Bek$0d8rlnSo9E*k#S;6Bze~YCMx!k zVl?A3QL$eV6I)LXBr79U440u|(p8Y3A$(DRjzeAguVIE6GG;!5QH6iV94FoLC5_0y zAQtrBSBw?-73hgKu*BFh;TUW+!$qX;2xhT z!5MA7_^LdTLd-zG0dnLsQBzEeUH?OL@e=+UmjS?OS6k=-scdBLTNew}$H$JmS0+~O zWMbt;tQ5+sJ8xnE6ACusZ)oSVO1~+yfamg*gIGytCfOO0zKQipW*d=sN8LI5YZ9#Z zKfyzK&EuHJ^>8$G7bC?Tq@chQ^=qW&G#xf{X?- zfqB`wP$s~X{%9k=_>Ykd6Abvg%(AFS%(DE(o#GEjP2@cF1O^bycaZtt1x;L1v0%1N zu&8CY#0NYQ81(~>)OPjoH-keHal7uULH-xVb4R0oY4pSBeMd*Tvyl%8t~cw8dqSv zHJ^|OpOD`{=}7k{F(I6%{YCW8ct*@dT_n4*J74#6+3?S?nIPM8jOl!1v0_Cm1_ySE z{1YpNV@%sZYvyAy_RMq|XR*?Js67cRpN@et{u}C@Ua5yTiK#Ao-SltZ91$xvgde`_ zy6aYgTmRmt1fN&f5cEICNgKueGG!wKCJ6infqzMW9-98o0YG^lAO-Rq!Uxk`3B55b zpCVSfMA#s~K)*qNNdwiK0yEy9?o!GHOcdaHEJ=`eQ?1qY!Ffk@fN}ABQ z5b$+sLy~}v`DZ*nlK?(VCw?Em2mx4x8-Mx4a)bVC?tTHdK>rpx{2cXs4`2+B!0UZp zDYR_7>!QTnDp%v$oBl$>?y)C0+h4bOrmA|TvT>$q`Apr4>AH2fx^)v5^L1Om_1F8x z9={uQ)kG}GU>TDP)=#&z=2}{>UCg)aoPLk`Tb93pM9}oxj_UY<;op-@#Uw5!OV|b6JyQ z>as)D1U;{wr>;=`%~MbC2wII$rqx0zCH?6aS;MgPM3}zpm{Xaif}XSWN74E@==oA~ zC8>GBLyV_2f!*pQH|Y2y9J^cmHLZCRpDoNt{X@f0$Hz#0FpF$k5yqU!_Ixz!kYEgO zO^aYbc!}u41Cd#{ByyoIF%Y3)L=X`pLcWu!WJ1&5!X1K&5&2@M-XvUKLVo1fkAG{e z)*zV^f=R00CsNg$uj|W(``CBeg#}gr4p-Fwl)yIuiUBY{;62ipnA~*T#py{GowCS~ zHGTSDqLgTfL0JA}*G#bX`yfO~<&MWyQOA?Kc@F0}{W(FY-y}e5egE5k_)9we#{opj ztFlH6o`*DSH>nb{cF@aUu1hNh={d6{Vfyb;@ks(J0YJdo?nA)L6~QHXElx51f_jj2 ztCFj$=oQyV+Gp#>_!8ZPQmdfO?w3&}Hb|nCa@m+(Mwx;*D<~7EOVI_t0QwYK|2qB~ z#1o9Ue&F^Xg4y?71n*XuilZm8FI+6tT^f7*R(SEX+HCuyZySa1$!zH9u|w~No3AeU z%3yxou5a$nKKnx9;HR^RWFdT(6OnzS)Y)+L7yuRy!1HUvv`W7zp63|>b0`HBp$d7K zEKpruI!S$r>LP&}v#AJSYN82uTGUb&(rMkSg&Gt)Wf6vfQRh--3>}6e7fhjlF6oMs zjz>cs2o%s3*%kAeDrHh9$1Q7A4C4!l*NLK(tdEs7NrzE($eO_TFm`=xBM`F%nsfgV zh?*FITy^H5*nk;nWUaLZZtKm7)(NU41q0~QY&r%fLlR#nZOt!f-JHx_kko^wq`twF z)Zev@hQ98y0~#=r)I%iQ+bDZ@!Lz*B*1v-uPfCK;-69I(eP?@i3(R`>{%xctWuAJ1 z($7;*xC72pPig`x&DG06=719{z|uju`AI++-5L8H=d&P$zJd;a)mOm@Al3nHM2p z7R@0$^~7PL0Z}_M$Rmig5;GtPQ3tL{CdBJ}ZL`gCLq;1~sn$`twi*6GESA{TXqYuD zhWHhFQivUE({bKcq(V>&F)T(&u~PcmblXJ&&(oFJvNdiW+0IQVjBp*I3Xc;=QY{wZ zi`Uw^r6jk_S>|ftv<)j>F2z`|)>zU&C9=FLh?N<&9>4YboWgAfml8w~_qK}mnr;$Jh zw?Z{mI3+j}Ey+{(ruA^pwxUX{pNO-9RTJ<#T0gDQZ;EQ5fae6vsq0?>k)#@VnJiFc z#LJ@MN#3U zaXATFLTI$^#9EqDLQGraomi<+5~LMt&S;L$631P3KuivrVluD;ydiwXySBS=@{F*& zSxUv^@W=M-#0afBp7+_!YQc!9oL7hE{n4MDZC0H66AaHh^#pN-H@_7MguMBk#Bg~B zPH)CsJ}ttf*b>KAyS73Yy?~$~pK>YqcSU*SKV9^E%5_13m;VPqBapI+q5k;A*o8s; z93m7l()$6i>LsEy)!dH%cb)ib6y z%pyM~A%v|Cu_qC19Ucp@Gz%VX(;SEn{sjFRs{wxa-$@sqt=j$d9oG(y8?RoxeDUgz z3FY-&U*7xr-l^*KU*GW~gC^qV*F}|k5AcAKj3aU29IQ~N;2;qGr078$O`SeSE;B!e z6gZntjuSCsU0Ixn{-5d6?-KYgbmU2@VioL^r$0WVe@KOr(_kX<9x8i~ zz+M9T2)svt3B$XPidMiC#Ei;b?yqr(YCcYY_zV5J1inY$?*I(K*Fsn})#DF*lt-%H zMGyaqx;g?L15dDo$p|jFrc4DlUhBJ6U4P}sYsY^1*l#puqn+6ehu^NvZFnr-@OZxZ z*tq%wC^$CV@wuuyzP=-0wL7cseq}q}gnWpSxzO%uRsN>vBrnLzWI^4H2E2F@>C!eR zN8wLC*YSfO<{=l{c#pAqUtM#g-t%_7_sBAT2|6G_h>7xDfW(RPx;`IdR38>P@<~36 zCsBT+p*-dXgm1-C4@1Bg+)1yEAEDJ?B#cVavE)_Y{R`TKRbRra+)HXl<`mw^d&7gFzX&l=&%FFF}nrmaHH0=;1M<2?$}{O za<-t%khIZZB(f$(&~*R0(rIR%dXjQ4GPf4+Yt7nX8|7YR&AhEA2M%fh3iOt)l245Y zPCfqLNdu7W`Lg2laY-!_J}CM$*?|vHd|Xx;7Lu@3TN0&*u`45_K#dC2hSx2V0=%HJi+if-%5pVGeX;^ zP*5yZ2V0{U(({f-3zw??S>3&j3mZ`Y;QX0D<*VV#;VVxU0xh?y>#pp0ZO(}P$*XHZj=c_kl16(Ot42l62y^oL=DV8bnC}>5MVxZltW2GWA8kOmn=`MZ8FQxki z2dG-d)_e6lXRFt-m0MyXsC;iH@5L#|TEuKQ>s-gyRxGXj3YtsrQs#3iS0RBvD$x1A zCa{vgY65Er{2v1A2y7zo06??`F|`xeTqcbTUaXlX4^xBNZwb9H1$nW?I!l|n&~xF6 za@O%e73CFRCI<)3B`yt7SR4KSQs-9*e4W5S0*o8zq|{*oj}oA9J2C>?S+jg#_kCgX zO3923+2nX>}38SiDg=@ltk2XF)v-AFY|X23pVQzZre& zaQ3N0A$*3`a|Uj9y!A}>L~kM7hfr)a6?eTp=Db-29XvDqcFr_)Eu>vAz^uUyE^i%T zWFk7|#Dg~j-#n6i?$d=su_^U46m5I>3wwldcTcHt7g@XVg6l7c0WpocR#H@g0%Uwx zoVJo=``yD!=d=A{-oz0?itx#uBOtHXjtXYGg%Nm-yv7K7;h2-igTRxPS^8-8DzNoB z5%Lg7dDzARIJK;_t_LqOfddyntgCKRTlfxS+ib}efCxg!?}MPZJb*OR4mM$EsO!o! z)MNKwLIGk<8tR5p4YjRJlf1T-l2JS6sVDWkL_^rQGSPp#3k5C^p;ZBa(*c}bLG zhwX|Eyp0`xT+gxfTrYR@u;T#1fTIF<6B+5vEVN<-UQ}4O zjdh%M!>vIJxImF2kQ~|GSP@Q5^tW&U^1X`dz1Y6s+y(k;_cJO6&%~1{ELYUiBM&=M zE9O*2A~SYCNGIZ2Z14=LFDaGi;(c7raw@7?nqBtkd{rzPh%wb31;)@}&^`%r*Q%#Q7`XZ%lGX3TZlt$079eCV?igNrv8ztx#l7ZudUI6=5o zvt;7Yo5{C**;C0v&Dm`Axw|~B$K8O1|D!Wa+Z?Ky&OcSI%9iQy#$0&gwFln}{N<7C zXU-Ik5i#M0*u9tcUR5q1z>Wn_c3WE+O!ToG3|39_mF{4G0>1YE?O*`iIBjjfb}E1( z*-TqCcmap+sI9ZL$U=7eCd_5d&OCNE9O!K^d;EZWCeW&bBqh(yC!YzX)RfO;%h^(& zJm=J_T|Of>C3ZMD1+f)=3J|^7AJDtvL>flDQi_vk7HB>7ko6SAs}*e$W1hwX}lZOe{@5Ig6$Oj|_sl*7@G z?Trgr&vG=ZHNzD>oC&bCrj?mW<6E}TIItdg-zxFGEv7a$Q}<8Y`)0cfhj%n1W?-Ir zVxy~h>Iq%B)?&);RoW75X{IX4SC3aO*qL20iPkeR?J*%}+r#)@i7oxG>!|apQkx|# zlwN7QNb+t+!ad{cCsl8q*Q$D%N=C#SgPu;Oks5&-{Mm=cPqV7PDr- zGcR;$)}2^0KN^Os{mN2i(o!?tA@)?d9FpXM#RDpq4=8Xr^hCY)xaD9LAHl+xLla?( z)h&6uM@X=Jav*gH+rXA6YUr{|Q9}|nX+$$@^*u5;F1hp{;14{(Nz~QUNU~@mmcF>fS{x}MT{F8?HMG!+?Mzw+d@Q@Q%>+3@xsP1bK`WBY2bRmBd$9}q^K<$TH4 z1kj-MEP>+$x&Vq{`-`M6#;p7Z#Rk#zmx)6SF1_n>2YGl~eFaV0oU(z3&Z-2kE3{oy z@KKhD3QRy)iN0ZVdYt@U9D@2=CaXEb1*YJu7%dIv&(%(+sbkaG1(rRR9wpF9fK*U= zg1{C6^pSi(;JUeLd0x|*73vv_lA#$}Pn~E4E)Zw~C|SBp2CrKvEcr6&z2A=iclH1X z(ixlEUu%VwZ#v>7Pr<;AmR&ZBsL{}%lfeOMHT{g3TJq)u|i2;Z#jqhS|el%$By z1|cxhCfWIo@U=7J=U@Ho<H4#xZRGpM=aYWI{*)(Ms$IT zZy|aa*cZ2k%A}~ao3b4QXnxAgd~2!fLjZKkUPF4=`(2GFebN(SR3EB3;@B<)}Mb zJPzO`B7|}0IlM@vs;F5ZD)G+ogKIIdPDO;T#Ew8VVL4+nayoeGAH9laX4_ZncPTqg;3PmVHGK51{^r0)liVeYRHC8-rRafG z25~c#&QQ~N|PAsKP7@g6aVgp-c7&9@rNhrz9 zV+Hq)2+vIt;e%ZJ(q9&R6*;__6^>3xy~yG-4GGZE2rGw=YLR` zP@bW}gc74d`h&vC&$zh?kp&28&o0A`KkeiD@z)-ajunU~b3BnlJ$S0p4^ir@WPw`q zQi@l%%&fsnI7F4%n35)zVahdtc1-Eyj4GWxc&K04hD`j_D^x-DRUHfH!|T`wcIAY_ zBlcWlY7@SPBaM_DPKrT_+X1ynplol^pmbMt!ml7IK@yDD?@vcC@D4%ML z)w}#Fojjq!sG6s)pb($rTtZP%-BnZ`)}z21Tmtr#VRdu!%5NR$<*Vp!*`p02Zc=Yw zS+GH~!8Z@QG?)?3fonUb$&Gf}9C z6Js3LGb4Prr~o+h1rK$hHpVLqlVS zZdELru6Q6<@j#*Vz*{}plh0?9sX}XicJn~KVsPx>d-aQ8qZ!|EWyj^cu;b%5#j`vk zAPoHD*ieQloyqq=~y1hhQ(i3-La~WF$Gqg#;gy;LzD;P5vqBb^dbNJ)> z)GrIY;(VXFtcg$E<|5#THYJlcS48!NqRqLOJe8Pgt%m9Mc_cx^1COiEVmb;xI_7%@*Mn4|9~-Jc3K&Mj8Tj3k7hMl`le{Piyi0BV8ZQJXrnhrWv3SjkzXzW}`Pkb!^atSqQnE zu{rnpg+ST6sQT^i{V22#wS5EK&&Nr)v?X$@qgncgcPhymNiMk6pM~ zs7Yy{niG4$T|k(EkTinaFOY77X~S)L!H2j2#}%u#0ezHDTR)1?&|*Y`6r%JUTke62 z?e2lWiH>Un1$E!8%BJbcRk_MlGlA-v#-&$|UaNc4{noKu@TqLz#J6js-)>(2?d7{> zY8&rX`e}QI59?e_%dQ|S`7*>-Mo{u)(<=R@c%Ekj%uy-N7*mgVV5q*t5OcnOCEwS#{vn00Xo z0q?glQ_h|u(lgV~~Yr1U3DM2+bHW6-)3Yp^!64Zh`)|Lo-&D+C$CBcim~iG)jADlB}U*6Xn5I zb~8GGVEqYrvz<#0k@<6Q0K2!u&gh8*qm7a(n$fEcQtc9=CFpxIyO!2JC;d+V>`1v7hVqBP$kt-DuJoBIdcH1C(kH~*eFvHMcN<9i-kO*WU zbv{vi$B6sBd-@4X&m;J6{1%#%fk*W|fv6}1S)47z+5D4Zf7(@ep&x?gi1ILL%08_; zLVs!wx@Sk<>|)V3Tj6JyRlBm=x(Fh@tt%JmnpWvIWtH%3p0coC(5XpwN~X7&O?U~1 z@2FiIrDD+H5aQowK#~{gP3y9-QnA z*hiZd9kIAtQY69V5er|qO+E?ia1zbr@-SW)Euw9W*cG)rK5+W-;uZ+c2F^BLqA9CJ;U0?>% z!>QqZ2Lz4C5E0InZYIjW1RIW|%h>y31@nVu5g|;MjWV{@BsAh`G4yS+yiQa{cK0M9 zf+|c56iVb*{bauCcs6jHeQ$he>z8rT!iT5k2>3BabdRz@BE$!^hogec^@02hNT*r8 zur|zDW;#{$zsPP~*9mRe5a(`cH-!{w%Ial+5@u^vxZmpDJ;buAXjg%Qd&%T=eEfm{;EqwM>V20OM2HO+cZi zD46l4u6*c8q_Zba5k&eV%1=$J^qaCucviridg>K8HdB?nOctm%wv46XNu)Oc&syow zlQMmh3TOXt`Pp2pK*0qtD};jWW(R4AMWAEo&d#{Oi$a+#j0xY8ob4QFnT!e8XT-kg zE5b?8tuT{a{sTR50`XSV>7RBdR}JoN-qH%~pO zV22*k+=3ble{9BzYU{>qJvpiMg)Cv+zv&5xXuzZ9mbLUmAtxx(EKNba#G)b(f>HS( z5BLx$%%AZi=7pR;)f$ETODGyolq3kBjqHU!wkIkGn{`JZdvZh$PbWRA1)gZ={%tVI3LzjpcxAIsaa43Mhc>%r-iDFO`kskvqV=O|k?npec~Ie=VH=R@2}fC4~04S+jvs z%*T=VW1ST8#TNSo%61zhez#a{uP~4r;*r+sV5;mH)^!A(Z9Oxb9)^6(BBJO|QqM60 zX9;|kz!-sFAaI4i>jd5)@CO9`kifSHOc8jWz+D2gAepT}V_u5PcZ8WmnJSVAwv6#& z-(R^7A+VPR8{edyu%6DDs-*vUV z>)P@zojURZw@Z17yhs4yL<0D(Ydfp9^|mjdwBBy=D$8!y_>~jN?FO&Xe!E6hw%o1? zDBEv01(n^mn*++u+f5#2@$GOuYCoVVyKYx4SC-stEveqr z&#S7Q_r3SM_wRH%DL}j79$WfdLJ+=XlWGf928SjL{w81ni>6?SbE3$`Av0u!=fajW zC-HgMj9AgRC?88^%!<#&t;Ae{&m(5iLURbl(Z2|DDI5ct#&M7>I03R1CqcGh1oDm5 zR3`m`J^h8~$4SA|727m2A-~mjh^3lFQCAAK;Znbs{U}7w6P@a|tGb3`E0#em)zua= zqTg!RIi0!+cC>;83UEiqO?_TBFPo}iy?4(gs^&VxPh%Yy^17n2WeNw;l z-BfJV(qFY*H!M{)sBGBs4V@ISFPNDK^DSR8Vb715j;6Xg_N6@RXJRDI_F{T~j~_O5 z+mF%0{F1J@e#CTEbOL^%n;dUH>hKM~Hc>eNh(-e_DSJhhU-h`)26Ti;MRSO*e)wcY@UAb&K~M5q24{zUqb!6 z=$8+pWt8=#tcS8Sg_9+8a=WXHhCFG=LqoOoo)YTWL#IpV^e!6MUM{1vo^-Z^&K|yDqGYYXT0}u?1h0?%u9?CLkS|C9QBvv8xJg1waPz zjG9;Nte?F6``KGJr>;z2zLopO-&uQciLmzpr`WPiumM;mHOF2sux@L51u4=CD*nYf zA`tn2m<^H2PE=t~Z44;oO+$nDuVGS+d@=x+bU>$LAOMr@es5sXJAZSx`@@oSeB=H5 zbI(!xrddY)p49K5{(ba|t&TG4^&|!a>4Q^EwqxJT`r*eQnr&B&-KynqXiZqI$8ea% zFqXKCG+8v*iSnILeFv-?n|(ELYf+a!PzjZ(YUNE`W+qUxNEeK`;z5-q6Rp-6{iMW;6C2o!FWw zp+Qd?^w8iwz%lmV!sZe<^7frF8uz4e4~+@YX+$9 z+>DlS1(3SIVT}`Q6LAVS#({aA;EI#~KQJ{YXqKFT&+?AvUQM~Hclo6*Kgvf~c+!Z6M)pz1Cg62y`!{7Y>Pe#>8f8n@8vrYz@t-2Y zQV$QEFJaJ#jLTMe!y*phFqR%i9x)uDHSC3P6!wqai{Us<;AH(=uAX24>np-%q87>t zTIfy-&>E!FrON%mD}nqx^w%~Cbk;?j%!zfsScNr_udfIhG+RvCDpj-dDmAoX+*z@8 zEYGj{5?l4d^D53rq!Vr=CzxQ)<4Z0il0cV0tojmTONcP10CWaNSu*scoa5k3n6Q*i zn6bzN2nfd>c10veV?h=RfS(MwkySB@OAH_fS6MJjS0@mAEJIQSYNQ3Ilyt19#5ES6 z#-Zc_Tz3{aT>t@?r@OyP9p6sv6#q5oeRTUt>dsS`Mt8cuzTP2w%EJqx_NqGWMpM!CCgnV z=PZxt_c)CcR$!4d2*`pcpaWhc5yq69vU3Z(eSawvAmhOyZ=F0#evRtPa^yOGw^BZknl5RAVI=b+HNNZyAtjFjm(h?ike z6rTyh-v~FK3E6LiYtMwCgJ@jL9&}3L$%B@NIC;<(7vDPQOo(qEo`{R*_?`a+Tz7QE literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_halgorithem.cpython-312.pyc b/tests/__pycache__/test_halgorithem.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae99ee00dfc57d1b612edf85bab6611db288726c GIT binary patch literal 11890 zcmd5?Yiu0Hec!#?JKk}56d$4_N))vsWl5B&lSn-*MURLkDT|g&$$D5eD}Z0g8NMr~qk& ze(3K%doP|yIS-@lO84jN?9A-W|NWRBheGuNuC24JH*f9`grCt(>GBjCk4}n$@Q$Df znwS*E+`r2 zxVfeDqRQX*icPKVt3s(g7W%}|fvVAsTJWpF*aod0ut}2vo3#eO7A*wWsx<;`)HVRN zX-$Bev}VA~S_|M7trf6c+X%Q-YXjV-Z367jHUnQQ4REK{0obK& z2kh250e5LT0H4tmz}?eg&(2@azDDnhPW?GG8OyMQ8Bg_yPOFhkCd`OxW>Sf0B$`wc zDFfXt6N$7IiDuF!Q?*3YOk~pN4PD4Y)#Pw$T-P+6p+jbBdNLA?=hBl#w1PuHkZ|M0 z?@pqpRNi2ELVkDfO5bW}GYS)I8{J58m|NamK# zl4_RG+V_P=wC3Y>Q#VZG0KrJSv^PB!&Q3cLziq)N89!qKz>M&7xovLe`|5&x#*)sk zCiJcS>GT9VhY!}gn)I$WCgO8_ql}iy2Pf31X@t$&X8ytR16M`{l_}LwqD)s!T~iWi zrSJIB!^-%y@?s{QR?h1wJw2_crZS{5GnL7iab+-*OzJT`tW>6Jx{-*bF@GY%6vNDE zz=aW3uEur6P*d5YuEg;YRY|Fap=2hMzJtnSBAJOXHHBdVys!UY|KVQc63g7wqo#6B zPbM;klE5qw@s=L$@j7)AiKHG$XiktfvZ@((BvZd_GQy7wXz#2Uz>%~})L@&@q&}k) zU`ALD2wU2h+IKBW-sWS!3(D(It84;O(rsH$Dn7%+! z6n7U56A1?Pa2H_P=@EVg^bK~iByU)hci8fdS!q@;$h~tf+wy^j0b4$9NyndL5L%di zkIn+D=I!^28@nr*Wdz(2O1+wAaxY%$c~SU^sCn-Qw~Fm)l7jq+3Ln(6k(vfenViGc zW)Vgz^zqhpR|xlbm=8l7Z&D{g_PZ!D=yQu0+Q;RG_eY2qh+i`42v-lrYW={vFl`rp z#(sbq;lq$(?YvP4MP|-^C^x=&>rYPH+c__9v!rd7ylqL|Gk4UM`z)ys(8Xpo`RlL* z$L9CZSdTeYYmK{N1-qIA#u08xm|SzA*I(0xTl5B=?0w*SkI;kbu1E9V@o06N0RtQc z30Bdnel$NYsG5diBy~LtIfTBdmf|O5MqC$Hz!* zf=XPmIsqe^0k2P_oef3GMAA7ZVP-gOPzaH<9#c87QVAoan$dW^d0aIT(Q1|CN|2AQ z$TsnpIQ9H4!bP8u;)IM`Hk)CP6M7*7vkI(-#umkEHx2TW&et`Ah*p-b9h`fZZnA`a zOHD>-j1d_pmF&JUjM4k>GbR8aa-rsLM&61n1{#{*joTjy-bVk-*cXqdqtV8mQ%C(l^ zSBHo0i(G}=_f)iwLGLMPn~fYBMP%x*qhwauA&fYVpRpAHQX_A^C*9MmU1tjNzz3h3 zmoHh;C0oAq>3w#-uiE%%`Yk<~$-=t0YGRc!DjKXXJBV*%eFR8ZuiBO3?n+OoX%oHz z^skzYGu6-|#htPvm~x7yZ3C#<;q`+2s`YwoUXEE(%$8%H;ud?{&BXO2?QC3+CDLgK zhLY4(mR7Qi8^=^49Sb`R*M|qrM~XyTXOGQ%k5jrjS>Dt!>}6n$O16KyZPUzggg$vgPM@9#(j$9mFXB#bi8*qk_xo9HIl>$TBWhA)qt9T|B3!f<40cywgo!a5t0 z`G(*RCDbH4ja`haL6MOmGG83L5Jf8?Fk9YQyzG?s{Mn zyxbT_6%&p=AMf*5`k*fAM)PaXj|S!xmy2jcJYImtDSZ>gy}EgXPg1xdUr9j(+=&N-DT|^DujkV zQ0>sUnX^lB^V@rEdE31&&C6j+3R`lxw#$~o|7Qs8ewquM&E(RWQH5DXdu1fE`OvjA z9H&*tWY6QlTG-2BXbtwr!@EMm&j8e5@1;U$^q=0aLoa^{*rQMSec2JL=NrvG^At=S zJa|xXV}%MAx(c6|Yz@40aIPxh1)-P0T|(DMVCOLDHT(>+?KQ}`SO{IRUcPLHu2f;? zi}Uh6OWJ44`&>q!6ox=(O3@^*ygKab@6=;S4^>@eNN8w#zYGg{BhT;QX3+fBP?rwdJe7$$Fg`&C=+oPBm) z-fv0!EqVVEd>q74J7-~E4qMV;OFmp4wP#-Lv7{bb?s*C~jN!W|249cPzrU{TA%W3Y zE~YSD%SGWtq~HOoNQz8T_}+g|K^`KCK-?uEBIW6U00vERnXsj=8Wzi6UXdaGh-U`c8?B$xks_4i+n%@|SBvL$% zi0s{yG4}L;&HG$by@=Q4TXX3$?<&T0YCMxPxVr8U*?9~o`PGAHui=4m24*_tH;f3D zQYYYn7FQv=o~CKX0g5`Q+YWVGyFOD0ov6}9Q}gm}OWJM8yO(5T_Uzm-d)Hx`^C;j` z?A)iPsSgn2MRSaSw~*|i%m(Fj*3I0w5nSCA{93rP`Hfc-DSbZ@V@84!JBk})tiy$E zoC{5zsDkLMxCSHbZ4$N)RIpWY4qhvS#;ni1ZihbaVhw`YVJS!F<)fB_c>d^9aP-?c zS8=3nzb8WddMj}~l0ikaK8ZcKITNT@$&JN0E>2U@!;O@EZoo<=-{rGc(Q*R%tpr@g zsE$ZClb$HHL*SoioT4&{uARoxtt*61a}{$UG-gPL7FDs=(9NRq<%uRn{ON@B+gWfT z)|=F)jT()+jpMy0S%v62jv$IfF`e|TL19yk{j74$Ry=Z>f99p-EkZ;4yMwd-@0_2P z_gK;%Ti){|4ayPjA7f3(HKN))@|EPku4yLW);=g3N|CZ7q!nr|lUC(dg(|UXQ_S69 zqTiF|6Eq8qKCUlXM5If3EdAJ*Gbi>RLnbc(`eZaPbd5`bJZ1e9lGI4?A?M*VRpF4b zmWJasOja_50W+0FJc>M~-+d7$O${wv8K_-{k+%3ce#T`0qC(rf20OG1OfS_hN^Q2( zW^L+!|GFi$El9&$4R_h0uI~@d`M-C5_LS8>S_oaVF1>7rE>~%nTGd;OCO8hu39r9L zZ*>ylX7wmO6w)ab8|5V85kVA8T7zHkbHoQ_pNLmDPzfn{J{Vc{jee)qynQ|LZ#a-(RQy&N}@syZ!KZ zU9oKdGTpj}j9AN{bPH8R0t2pD3bQs0aJ=KH_Bc3Da*rLT2FIVBCLKvx#e@N|u75>M z=Ja94GK>-LJI!Yy?xPv=EJb;4|4n+^Bmr`fp`Ll}KZ*7#Z(%!EzAT4qRZ2%?h*A0= z9gFC+xWT3{{!eMnF9DFO3U2u3_P4fwEC17A*N1_Y**D(r`)T0Ruf0Nj_s>7^(Zc4v zKMnRQ1sWCu+wH*ica7O&b1A#~L}ABCtMkxnmH6lmJJ50O^6b`yK<^W; zstowz+_2TzZ*4#HaBLxPh7bI>T(gkpE*Ip351%hv=3lqro2_8i)I?NAQB%2gWhhKeD(`>3 z*x!GI_vg2j9t`qIr90_RAAeY7;6H=!cEl+A0{-#@*BZHpqa4FUZ4xonanvl~DI+Y4 zOrdsNtbjxk2GxA4(wuj&=3nDirohP>3Je!QXFovRW@H@-G`_vtYVWmM_S-y)1#~HZ zHOrSDEza<7qET(bDML|lV&5xHK*77{R^1ge=`Ji9*j~6L{ZJ;BN6Na;~Qk>?+H9gr(xlx)bPvPJ+nS;dll{wFUbv~(gCHx)4XKrWUuOo+?= zz9W#DO_rI|DP&2lwPvp} zlGYOH9z)@!S7MYDr8UzD170=B>XZtdoK%y_s2(S2PN|x%sLF(j`Z$VfSzV9D6;ARb zqVfUdDx1dF67iKGn2bbvI{f$>zwJvj6vPpFW5GDo0%EVs6cyZ`T)XrE#_FA~vsXf1 zl};hoPH=NNAdtX_;gQ58J;}&-xO{Wu46~#UvtGf8Y zr6H-`#=~S}0;h%MD_& zS&y=QV@oN|Fy||T!gCOmzKWn&JBsBsOFmi7GZfWVIX}FnWuRoCtEw~)Xa4F@ct+Q% zBLHrCk7A0Vbto}E7#%r=cmAJpB>NIJQl;u<=oS&Iz`g=d6OtU^I?h<5qNZhV00u>q z2-DxlC6F#kqnmuxP1koInFxe;q!f&7Ptnm%1PYoI*Gibf^!eq44lwNNnJ*uz+&kPbPbNc zP>}e~H2o3)m#BB|e&Q1qTW5?X&G}?%R_0L2j&; zZd*nH=ES^w!jewd@`)!S`5@lH3Gg-~syyvw&Ft(E$2w$@C; zt+ug*$1j&mZQE!*K%BE5Ngv0@GZIY4(7bdoeCVJf_B&!HJ~N+h#5iQL_n3Perl}=cY_@nr`7maH+y4$%joEr_pd{{X3TDYHCJ! zsByM*ys9y|>RX5}=}5$*GXQ_?c}IB5yHBh{9tw=8nfb=LNsd)W?hu{-LK%f$O&X|9&akcw4QoL zLduG{!D#}kIeus(J$=imL-J5HP3Cw(52p_KNF$R(9RUtADB?6m=`>&zq)BIUCbC;G z*!3}#Cz#x5#63>}4?n$?nh4rLSyaYvAQ4BUWm($^mG44j0~%q8{H>>*~5V`dZp zmc61V{+kf`nNa_c(DjjU@+0BcN5a03g#F8Y!P7Le>1&-|?ktGSzx0UWfcVHOcv_Zu z6K>?H#|@&t_!O;`PEq{KO28}juQW7>r|9-&aU~#%aSo##UgIzV=oJsFG;rV%pIxc< zi~CkuB(xg6;*pgGkNDh5eWSQ{rHl8;xb2X{Lo1Dv7+z@%VXjh-cWjl!laCrVif&=& Fe*lkm_U-@x literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_voting.cpython-312-pytest-9.0.3.pyc b/tests/__pycache__/test_voting.cpython-312-pytest-9.0.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60151b393331f5b5ce3a9fa40a7209e3eef0d51e GIT binary patch literal 15844 zcmeG@ZEPDyc3g7#AucKE(~@jS{)p|EIoq;qS&CiTisCpICyH`%>|Ck~vZc8cMVlYG z*;NwxNWf@Qz&{Gq|4u0k)Q1LW3=4(&FF(^3D12y9kR}zm3j^0fKiu5)4o2+UkDx#L z-ptPIQldmAC#P353-axonK!e;$C>x$ee6Gk!$B8Ze{+26rMJT_*S}(cUv3L|bQO>v zyEK<3rCn1Jy}L7>xLa~%&EQ;|%|R14J0@4Kd2H7~$6%?Geu^8@VA0suR;CV*SCAi%D9DXM&i)0v3MrZQ&a zGO76KnMC}u*=iNiu~g<k{@7(myurX)dTEm8 zUi93ym}z)Ly{vhm#dq7HH2l29XyTaW*JLfQ*u-g)5|n~`EMEH-j9qXQBuy!}^RB%6 z@;)eg@*e#I1LsGtOCgTgcP*6nOiNlA`%JrU%dACfHX@wnQVYlI`)vz-$E9~V#&5GJ zIF@(aw)jDulpVa?zT1>p37&7yx)!aQkE%s(xUcx&x5`?J)|!`75-oX+EA5))Puy~wUbf!?iFZ<3A{$SbzB3c&$Igx?xBu}kCqM50 z)#xpc>3=6dv{c+MgYjH;n$ZzlmZmh8jK1}!fBVJ0|3W%%93=p0hZcgX28Q?1DxKhR zHk)3*@V)Q7cJBO{7j8*kx=)_GC0%LTvu}3(P*mS>^5hOgk3@CaAG&&=0l|?kn`643 zAO|od*$Ps4-%; zs_|Glt&+qniRUu2F_1zbXjU)IrP4+!tDC-b4k&c9Th(WBb7_r|s>yWjVl1s@W0{26 zr0TKh1dd=fkI^}&a@jM4PUlD_W~hnxX4A23%z&0#KATsjg3n?<^suuq$O^3Y zkh3tX<4@W_uP;vxTmT;F10*pYi)Hm0J(Zn_4V;fbk#m_x(WE zg7(pymB8@*K+8h+N7~1Lzj!~;Ugc z_uACMpsQx>R^*-6FAExh0P($^zM|ZB?f61R zN$FdW@w3dztRP?sYgwsmqZPStp+nFJV-A6Sqdl#8hz2f zu9^v~Q8PkeU81K6J~?*Rny6?Du2M6K0p|^NU25i-eXmk8_V_gvlC3;IG}@FKDkKn% zN*!8TG=_wl32V&`HPgvw>lkSZue0wPsTo74rAu4w9vmz9G^yZsyYjxgpNs!O08~tX ztN81Z9&|i?eeOrFmqRhNY#R54x~a>3K?&Rkh28KBdK|y7#{X~gkn@N*d(>D5YkR&Q1*$C#ke==uusk@%e-W8f&`gRj33 z02SOhk^Id*)R+G`N^s{9+;nE}^lKC6&yBrs_VoGWpM8MWKaUdp`2<%c!9_{7A?QOu zwQE0OI}z*x5S6ZsuhOH>M)ly2unv$NOdk&MDnP8N!%_WUY<8C9-U}A?A@y)nj|K}= zd7D0_8lzsa9iD+o_b`GX1j7iPy%KRK=0VWnraU_j_vV@k7f!@35=HO~g6|;MkKh1; zg9ru?90y?f*tDXGts%)EwjM@s1i=u}Ike^xXW^)`Fyhpj$Cy@U`gCwm$Ad1@p%jv% zIBcI&8)_*%9wV9##z*Y3noXz3i$K!~U;Pjqu4^d4-lg52bXNi|gA!ageDi!I@XR-) z_3rm<|MR{NK=bXtX#n`3@1{}i*$>#Y_O6stl^7%#%BcLSOCX(kq?g#73sU*lT*fbCR?hDi& zzj9~?GxXBf`1pm>XD7zMtlC&jklio_*^7YMMWNqHOdo=A90mxJn}?ioa|X<*xp)d} zorJD(%h5p0AThKj!2}|&z%Z!X^nVBNwM6NjqP!anh-^vOy&~ghnUz^Vz!KK7QrSi; z^6u+dK_d_#eqsj1TKR`6h#C-2%!3usn!sm;&wvX&SWSz{x~4^FjcI}NqgPE2R+#SQ z=2fOeWYfGZLf+N&x(G^;j1h<530M27D{t@S7GYYnYHbeFVlzBg?UrPGp+OH;2jykw zD#^HI(|GBWjP}1;i$8)Ac)8_wftMf|yC#wyfA!s;U+MU@P^BY6q_Uv_536Fb4f zS@MU}-38JT4MKXBHbWOC-Z*pa?2Bi|o*zF`M_N7wy`qvSor2?aD?eVJ5?(4~8hIK) zO-XpnDG9Y4NWg5)P}2z}|G)#2(C{d!=D@c`z@?_Xo64%ObS|49ufd>u;H!TDU}KSZ zzhlowr$6q#qg6UyFSeh**S6)c*V7by0OcWv&_n@ zAYch=S*dKJ6}fjo7Bm96EBCGyk!USZ5%~*nqOX5M)I5ta7jq6bzgP2to8K>5gqxpw zSpw^c!u3K4AU-$iDei(t3l?M$o-*|(c=KKe8EkxfQu1C18HBKp!D%a(aXO0c*84HN4uJ_;dLkS9`$=UP)Q&DFp2x zXI{<+7q@a*R#1!{keT$#E4*(ZWC$PUrC#1{-wR=FTcH`eo#A}5=o86@@)0VXTJkO6 z?Yv!QEhzaGkWanX%dr;v8vZAop2HgE-CED#*YY*?Y5kivBP+1%67sO_jMnskwbJk4 z>xL4v4xR8t)}a%tq7Ge%uLN!Trg2QHLOym~j;-kd9NS)J=3zd19Xher>(E8`c-oE| zLFzAV)plyT@~zgcZfkf~xABqq-O^z)_=M=XD&cAY22dz4~Mfb$<}vql)Q>#ikOlaF~}UXNUcN2UED>%{YleIdzY|Bh<5(R#=& zf#?@}cs9zQ7z!~y7IbH^wL!pmB|M=zls3plBY0Y7xhELdiUvhAZIJB1rd%F}Qh0>IqZE!*huYvb)9PanpfkO!z~N6vqmkdrt4n8#6D7|9h_;j05q|^0c?91_ zux=oZ1=XxT94k}>;;i%)7Bw7phUlJUQ90H!c(|%GR8<jMu%uI&dfkT<#jo^C6$w#>@T z02|;K#v5gI7{;?ujMIUqdv#0>i0^HS7Uk%*6APzGN_0iW&oV2sf`BEgWu>x>R^;fy zDM2F;Aik&c-Heo!ffX4)%kn@GD_BXu60K&%vI7?ccjW=z3EDX-ngM|!?j0E^%JDlM z05{a7p*x;R-#FkErMK8OURGX3eDeD%lW&(M-v+#>UId7^da*ni#~PrSjF*)7ij1F6 zc?~NHSfXRHV%dQUg1a~Y(azC|9A6q@wLoykBT$%~jVrG*TtI8qbVm3z_yrV);fj-Q z+a1-5Ha=Q{^)fHv(8pskTTn3`4c^P`3lUu;00UWZD&6f zeBhz+QCr@h_k%6}BZq4r4;rg3P09PIuMv7VZ254*0f`>R_%&==L*@_UXFNDZZy1xZu!z8?6Z z?d>i23_KUp%l?NUvA?|#guN5wdq@9`WB%*!985+JI(AOu$>{a|{WboOx;tl05BT>x z|L^P^2fu3tEQQw#JgzcbFADXzDwc01)bJ|_!r#SriRGK2>!lC`tE^{&*kMMNviBJa zGn&%#G$3h8r)k9TGkp8vGnD$&+yd)m-m*ZL31M3h+DKlZ(o$Cv*~OtE@!GXC$YPwAoJM zr$3DTB5JjeMl%iRBg2HiaLlIB5KACPA(#eGWo;ZGaZzQN97n4RkP-4c640zfMleEN zfEpd65ZwHrcV>eeq&A52%_kKZs;iWxMvgI5wJdL)T!CTE;CMe}iHD)Ce+%{9JiXNY zNxTwz`98&a?>torO)#9*KlIBjLCCd_v_B5}!@jF$9=Ewd-3#8b(qEK^KivD52Yv#k z#qg4ZV2ol*V}I=h%VWcVrw6S}YTFU*w{h=-3BqoW7` zkB*iUFp}`I%xhRtz>+*#QTkc2?7#)VU3qk2PPB8hLWg73Kmkt@9Rvb2YY&~cQ&DI< z`fqR>u6O8M6d^B>>){_0qzhRH9Cgm8jFcDp99hWZf`i{Y0JBlt9lDb)wy7 zfWIV1bNkI6mAyM~koof<&+{NjybG_N!7Fm2%Jf@xOkW=dewl5K_tXw%a#|v-JJh@v zNT@5dnvT7+Mh_6|B!DZI`CW$*dlA7a2r#pXdhe(rrT^6}7nQzD9S2UpGw0HYQ{;W1 z#G{*z2atyzNs@l%a=q{RqwoXQn`PIVKXa*Nm-@NSFZF-k;g=2|04yl=eZI9t(xlJ# PD$;Y(qh7z%M~D9(GPpZP literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_voting.cpython-312.pyc b/tests/__pycache__/test_voting.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a9b311b452eb2a355ba0d97f9949676fd4009cb4 GIT binary patch literal 3337 zcmc&$U2M}<6t)v5cH*>!Hqb&7prZxC3T3cgX%Ka#K%q)mX;GC`v$9;*HFfN8?EsxX zsx~I=ZAg1qnWhbCduT^B@z_VSy-XUD+z@G<*GZcujcxo0QP=}dJJ*gISN+*EG3`n@ zzJEUV#@~0obH1sn3RBSDHlnA_LGKqbaGS5(neT(n2Na?Ztx#$5^eJ@Cm-glSX@8DM zGr2%IK$CgD63nq_7W&LZD$St)*boYW4I>sTk2tUq6arf{N+-gp@J?7XEa09v*kP64<4)^ ze)AuL`G7KLdlrmAX6=S(Kax7afxMsn7$!dXona`xdWxTHn5 zONqhvzI?Rf4%rvTlJAMvzzSXq2HXejDIG`1?$C+;aUdZP)Z0Le+U`>?-Q8hBM zl7k0s1SjM){dF*7)ZE7AIj(*jO$?N{=jXUJ;|&vN8v2LkxVlno*R_u84d%&oDfXJ# zIB25b5+~2IOk;3{sUJK1Zu)+hiqu^iDe_Gw(`52Zv;4a8XpxVbOnioq&%+5+mKe8j z&>{bA{&O%-iYgMs#1y*BGyUeV@cNWO*Gi^)8xOx^Py^j0mbC<0jZc$fXZ}Sq8+|wdU&$CGIGIIKKBvUy0lKH>iEI{*(CS z_!Z;khSnnAYBH@ed@GKD^X1{Y2rK^&d?>Ieo&lKN=}i!&USRMhxZbU=9udUudJwT9 zN7}o)`#ZW)?GS#G%yQ5yg<_8vis6i+6tc3Kfl0v?0)zAnoFVccYbxFb$CEJAd%*ll zbkri>Y%P?Cm<#)&5F%r}XU&((WX2 z1KF@7ieBw$=&7Mzk0%70#DG)B!>Ec!USC?iE0d7 zu!-R2_gZ(t4R5FiBDV6nJ8JAvWm#vy)fQ-ucE9Ll-Qw#BAmj?rP+usnK+|Pu*bp>T z8O6_lpS|$Y-v!u`iRznt>nvYCexS%FOeSISiCI2=rKZTYnM~UZ-?j=-$Kb{*5p~Mv zCRGnf{fL8QH?i)TdVd+Mv*iq&xgg_?wp21MXywi zP7z7U$;JDeYSBr1BthAAgq^Nzy|ejeUw@La4|Dj;y$t>fig*CO0Pm3UNN)pk-%r!@ q4^-7HYTza{@FSIY5M=31527r+ix}vI>DYsf)%2eEr&u~}JNyd=ITgSF literal 0 HcmV?d00001 From c9e458c814e6ab1a21465a589e9390067c2a6594 Mon Sep 17 00:00:00 2001 From: Reyaansh Sinha Date: Sat, 30 May 2026 18:03:19 -0400 Subject: [PATCH 2/2] Reconstruct optimized verifier stack --- Halgorithem/__init__.py | 3 +- Halgorithem/checks/__init__.py | 1 + Halgorithem/checks/atomic.py | 91 ++++++++++++++ Halgorithem/checks/nli.py | 101 ++++++++++++++++ Halgorithem/checks/similarity.py | 54 +++++++++ Halgorithem/checks/units.py | 38 ++++++ Halgorithem/claim_extraction.py | 12 +- Halgorithem/confidence.py | 27 ++++- Halgorithem/contradiction.py | 103 +++++++++++++++- Halgorithem/core.py | 112 ++++++++++++++--- Halgorithem/evidence.py | 30 +++-- Halgorithem/ingest.py | 74 ++++++++++++ Halgorithem/main.py | 74 ++++++++++++ Halgorithem/math_utils.py | 24 +++- Halgorithem/model_runtime.py | 70 +++++++++++ Halgorithem/models.py | 86 +++++++++++++ Halgorithem/nlp.py | 20 ++++ Halgorithem/process.py | 29 +++++ Halgorithem/retrieval.py | 42 ++++++- Halgorithem/source_quality.py | 4 +- Halgorithem/temporal.py | 43 +++++-- Halgorithem/text_processing.py | 36 ++++-- Halgorithem/voting.py | 114 ++++++++++++++++++ Halgorithem/web.py | 105 ++++++++++------ README.md | 52 +++++++- pyproject.toml | 2 + requirements.txt | 2 + tests/test_halgorithem.py | 199 ++++++++++++++++++++++++++++++- 28 files changed, 1441 insertions(+), 107 deletions(-) create mode 100644 Halgorithem/checks/__init__.py create mode 100644 Halgorithem/checks/atomic.py create mode 100644 Halgorithem/checks/nli.py create mode 100644 Halgorithem/checks/similarity.py create mode 100644 Halgorithem/checks/units.py create mode 100644 Halgorithem/ingest.py create mode 100644 Halgorithem/main.py create mode 100644 Halgorithem/model_runtime.py create mode 100644 Halgorithem/models.py create mode 100644 Halgorithem/process.py create mode 100644 Halgorithem/voting.py diff --git a/Halgorithem/__init__.py b/Halgorithem/__init__.py index 5d992c1..6929e15 100644 --- a/Halgorithem/__init__.py +++ b/Halgorithem/__init__.py @@ -1,3 +1,4 @@ from .core import Halgorithm +from .main import HalgorithemVerifier, verify, verify_urls -__all__ = ["Halgorithm"] +__all__ = ["Halgorithm", "HalgorithemVerifier", "verify", "verify_urls"] diff --git a/Halgorithem/checks/__init__.py b/Halgorithem/checks/__init__.py new file mode 100644 index 0000000..2b00b9d --- /dev/null +++ b/Halgorithem/checks/__init__.py @@ -0,0 +1 @@ +"""Higher-level verification checks for Halgorithem.""" diff --git a/Halgorithem/checks/atomic.py b/Halgorithem/checks/atomic.py new file mode 100644 index 0000000..eba9ffd --- /dev/null +++ b/Halgorithem/checks/atomic.py @@ -0,0 +1,91 @@ +import re +from functools import lru_cache + +from ..claim_extraction import split_atomic_claims +from ..models import AtomicCheck, AtomicClaim + + +@lru_cache(maxsize=8192) +def _tokens(text): + return frozenset(t.lower() for t in re.findall(r"\b[a-zA-Z][a-zA-Z'-]+\b", text or "") if len(t) > 2) + + +def _overlap(left, right): + if not left: + return 0.0 + return len(left & right) / len(left) + + +def _priority(verdict): + return {"CONTRADICT": 3, "ENTAIL": 2, "NEUTRAL": 1}.get(verdict, 0) + + +def _score_claim(verdict, confidence): + if verdict == "ENTAIL": + return confidence + if verdict == "CONTRADICT": + return -confidence + return 0.5 * confidence + + +def prepare_document_claims(document): + doc_claims = [] + for sentence in document: + for claim in split_atomic_claims(sentence.resolved_text) or [sentence.resolved_text]: + doc_claims.append((claim, _tokens(claim))) + return doc_claims + + +def atomic_claim_nli(processed_sentence, document, nli_model=None, doc_claims=None): + ai_text = getattr(processed_sentence, "resolved_text", processed_sentence) + ai_claims = split_atomic_claims(ai_text) or [ai_text] + doc_claims = doc_claims if doc_claims is not None else prepare_document_claims(document) + + if not doc_claims: + return AtomicCheck(status="no_document_claims") + + matched = [] + for claim in ai_claims: + claim_tokens = _tokens(claim) + best_claim, best_tokens = max(doc_claims, key=lambda pair: _overlap(claim_tokens, pair[1])) + matched.append((claim, best_claim, best_tokens)) + + if nli_model is not None: + nli_results = nli_model.predict_batch( + [best_claim for _, best_claim, _ in matched], + [claim for claim, _, _ in matched], + ) + else: + nli_results = [None] * len(matched) + + results = [] + for (claim, best_claim, best_tokens), nli in zip(matched, nli_results): + claim_tokens = _tokens(claim) + if not claim_tokens or not best_tokens: + results.append(AtomicClaim(claim=claim, verdict="NEUTRAL", confidence=0.0, evidence="")) + continue + if nli is None: + overlap = _overlap(claim_tokens, best_tokens) + verdict = "ENTAIL" if overlap >= 0.70 else "NEUTRAL" + confidence = overlap if verdict == "ENTAIL" else 0.50 + elif nli.label == "CONTRADICTION": + verdict = "CONTRADICT" + confidence = nli.score + elif nli.label == "ENTAILMENT": + verdict = "ENTAIL" + confidence = nli.score + else: + verdict = "NEUTRAL" + confidence = nli.score + results.append(AtomicClaim(claim=claim, verdict=verdict, confidence=confidence, evidence=best_claim)) + + entail = sum(1 for c in results if c.verdict == "ENTAIL") + contradict = sum(1 for c in results if c.verdict == "CONTRADICT") + total = len(results) + if total: + weighted_sum = sum(_score_claim(claim.verdict, claim.confidence) for claim in results) + score = weighted_sum / total + else: + score = None + results.sort(key=lambda c: (_priority(c.verdict), c.confidence), reverse=True) + return AtomicCheck(claims=results, score=score, status="ok") diff --git a/Halgorithem/checks/nli.py b/Halgorithem/checks/nli.py new file mode 100644 index 0000000..1cd8966 --- /dev/null +++ b/Halgorithem/checks/nli.py @@ -0,0 +1,101 @@ +import re +from functools import lru_cache + +from ..contradiction import find_contradiction +from ..models import NLICheck +from ..text_processing import extract_numbers, has_negation_mismatch, lemmatize_tokens + + +@lru_cache(maxsize=8192) +def _tokens(text): + return frozenset(t.lower() for t in re.findall(r"\b[a-zA-Z][a-zA-Z'-]+\b", text or "") if len(t) > 2) + + +@lru_cache(maxsize=8192) +def _content_lemmas(text): + return frozenset(lemma for lemma in lemmatize_tokens(text) if len(lemma) > 2) + + +class NLIModel: + model_quality = 0.75 + + def predict(self, premise, hypothesis): + return self.predict_batch([premise], [hypothesis])[0] + + def predict_batch(self, premises, hypotheses): + return [rule_nli(premise, hypothesis) for premise, hypothesis in zip(premises, hypotheses)] + + +def rule_nli(premise, hypothesis): + chunk = {"text": premise or "", "numbers": extract_numbers(premise)} + issue = find_contradiction( + claim=hypothesis, + chunk=chunk, + extract_numbers=extract_numbers, + has_negation_mismatch=has_negation_mismatch, + score=1.0, + threshold=0.0, + ) + if issue: + return NLICheck("CONTRADICTION", 0.84, issue.get("reason", "Contradiction"), model_quality=0.75) + + premise_tokens = _tokens(premise) + hypothesis_tokens = _tokens(hypothesis) + premise_numbers = set(extract_numbers(premise)) + hypothesis_numbers = set(extract_numbers(hypothesis)) + if hypothesis_numbers and not premise_numbers: + return NLICheck("NEUTRAL", 0.35, "Missing number evidence", model_quality=0.75) + if hypothesis_numbers and premise_numbers and not hypothesis_numbers.issubset(premise_numbers): + return NLICheck("NEUTRAL", 0.42, "Number evidence differs", model_quality=0.75) + if hypothesis_tokens: + token_overlap = len(premise_tokens & hypothesis_tokens) / len(hypothesis_tokens) + premise_lemmas = _content_lemmas(premise) + hypothesis_lemmas = _content_lemmas(hypothesis) + lemma_overlap = len(premise_lemmas & hypothesis_lemmas) / len(hypothesis_lemmas) if hypothesis_lemmas else 0.0 + overlap = max(token_overlap, lemma_overlap) + if overlap >= 0.70: + return NLICheck("ENTAILMENT", min(0.60 + overlap * 0.30, 0.92), model_quality=0.75) + return NLICheck("NEUTRAL", 0.50, model_quality=0.75) + + +def sentence_nli(processed_sentence, document=None, nli_model=None, hits=None): + model = nli_model or NLIModel() + claim = getattr(processed_sentence, "resolved_text", processed_sentence) + claim = claim if isinstance(claim, str) else str(claim) + relevant_hits = hits or [] + if not relevant_hits and document is not None: + relevant_hits = [{"sentence": s.resolved_text, "score": 1.0} for s in document[:1]] + + best_hit_score = max( + ( + (hit.get("score", 0.0) if isinstance(hit, dict) else getattr(hit, "score", 0.0)) + for hit in relevant_hits + ), + default=0.0, + ) + min_hit_score = max(0.30, best_hit_score * 0.80) + premises = [] + hypotheses = [] + hit_scores = [] + for hit in relevant_hits[:5]: + premise = hit.get("sentence") if isinstance(hit, dict) else getattr(hit, "sentence", str(hit)) + hit_score = hit.get("score", 0.0) if isinstance(hit, dict) else getattr(hit, "score", 0.0) + if hit_score < min_hit_score: + continue + premises.append(premise) + hypotheses.append(claim) + hit_scores.append(hit_score) + if not premises: + return NLICheck("NEUTRAL", 0.50, model_quality=getattr(model, "model_quality", 1.0)) + + results = model.predict_batch(premises, hypotheses) + entailments = [r for r in results if r.label == "ENTAILMENT"] + strong_entailment = max(entailments, key=lambda r: r.score) if entailments else None + contradictions = [r for r in results if r.label == "CONTRADICTION"] + if strong_entailment and strong_entailment.score >= 0.82: + return strong_entailment + if contradictions: + return max(contradictions, key=lambda r: r.score) + if entailments: + return max(entailments, key=lambda r: r.score) + return max(results, key=lambda r: r.score) diff --git a/Halgorithem/checks/similarity.py b/Halgorithem/checks/similarity.py new file mode 100644 index 0000000..42f1be9 --- /dev/null +++ b/Halgorithem/checks/similarity.py @@ -0,0 +1,54 @@ +import heapq + +from ..models import SimilarityCheck, SimilarityHit +def _token_overlap(left_tokens, right_tokens): + left_tokens = {token for token in left_tokens if len(token) > 2} + if not left_tokens or not right_tokens: + return 0.0 + return len(left_tokens & set(right_tokens)) / len(left_tokens) + + +def _similarities(embedder, left, rights): + if hasattr(embedder, "similarity_many"): + try: + return embedder.similarity_many(left, rights) + except Exception: + pass + return [float(embedder.similarity(left, right)) for right in rights] + + +def similarity_search(processed_sentence, document, embedder, top_k=5): + query_embedding = processed_sentence.embedding + if query_embedding is None: + query_embedding = embedder.encode(processed_sentence.resolved_text, convert_to_tensor=True) + + embeddings = [] + for doc_sentence in document: + embedding = doc_sentence.embedding + if embedding is None: + embedding = embedder.encode(doc_sentence.resolved_text, convert_to_tensor=True) + embeddings.append(embedding) + + raw_scores = _similarities(embedder, query_embedding, embeddings) + hits = [] + for doc_sentence, raw_score in zip(document, raw_scores): + overlap = _token_overlap(processed_sentence.tokens, doc_sentence.tokens) + lemma_overlap = _token_overlap(processed_sentence.lemmas, doc_sentence.lemmas) + number_bonus = 0.05 if processed_sentence.numbers and processed_sentence.numbers.issubset(doc_sentence.numbers) else 0.0 + overlap = max(overlap, lemma_overlap) + score = min(raw_score + 0.12 * overlap, 1.0) + score = min(score + number_bonus, 1.0) + hits.append( + SimilarityHit( + sentence=doc_sentence.context_text or doc_sentence.resolved_text, + score=score, + source=doc_sentence.source, + sentence_id=doc_sentence.sentence_id, + source_quality=doc_sentence.source_quality, + ) + ) + + selected = heapq.nlargest(top_k, hits, key=lambda hit: hit.score) + source_quality = max((hit.source_quality for hit in selected), default=0.55) + score = selected[0].score if selected else 0.0 + return SimilarityCheck(score=score, hits=selected, source_quality=source_quality) diff --git a/Halgorithem/checks/units.py b/Halgorithem/checks/units.py new file mode 100644 index 0000000..09a6573 --- /dev/null +++ b/Halgorithem/checks/units.py @@ -0,0 +1,38 @@ +UNIT_ALIASES = { + "g": "gram", + "gram": "gram", + "grams": "gram", + "kg": "kilogram", + "kilogram": "kilogram", + "kilograms": "kilogram", + "lb": "pound", + "lbs": "pound", + "pound": "pound", + "pounds": "pound", + "m": "meter", + "meter": "meter", + "meters": "meter", + "cm": "centimeter", + "centimeter": "centimeter", + "centimeters": "centimeter", + "km": "kilometer", + "kilometer": "kilometer", + "kilometers": "kilometer", + "mile": "mile", + "miles": "mile", +} + +NORMALIZATION = { + "gram": ("mass", 0.001), + "kilogram": ("mass", 1.0), + "pound": ("mass", 0.45359237), + "meter": ("length", 1.0), + "centimeter": ("length", 0.01), + "kilometer": ("length", 1000.0), + "mile": ("length", 1609.344), +} + + +def normalize_unit(unit): + canonical = UNIT_ALIASES.get((unit or "").lower()) + return NORMALIZATION.get(canonical) if canonical else None diff --git a/Halgorithem/claim_extraction.py b/Halgorithem/claim_extraction.py index 99c851f..577b42f 100644 --- a/Halgorithem/claim_extraction.py +++ b/Halgorithem/claim_extraction.py @@ -1,18 +1,14 @@ import re -from .nlp import nlp +from .nlp import parse CLAIM_SPLIT_RE = re.compile(r"\s*(?:;|\n+|\s+-\s+)\s*") -CONJUNCTION_RE = re.compile( - r"\s+(?:and|but|while|whereas)\s+" - r"(?=(?:[A-Z][a-z]+|\d|it\b|he\b|she\b|they\b|the\b|a\b|an\b))", - re.IGNORECASE, -) +CONJUNCTION_RE = re.compile(r"\s+(?:and|but|while|whereas)\s+", re.IGNORECASE) def _has_factual_shape(text): - doc = nlp(text) + doc = parse(text) has_subject = any(t.dep_ in {"nsubj", "nsubjpass"} for t in doc) has_verb = any(t.pos_ in {"VERB", "AUX"} for t in doc) has_anchor = any(doc.ents) or any(t.like_num for t in doc) or any(t.pos_ == "PROPN" for t in doc) @@ -27,7 +23,7 @@ def _has_factual_shape(text): def _has_event_verb(text): - doc = nlp(text) + doc = parse(text) return any(t.pos_ in {"VERB", "AUX"} for t in doc) diff --git a/Halgorithem/confidence.py b/Halgorithem/confidence.py index 60d8959..76a4ae9 100644 --- a/Halgorithem/confidence.py +++ b/Halgorithem/confidence.py @@ -1,3 +1,6 @@ +from .nlp import parse + + INFERENTIAL_TERMS = { "helped", "made", @@ -9,6 +12,14 @@ "significant", "influential", } +INFERENTIAL_ROOT_LEMMAS = { + "help", + "ease", + "learn", + "influence", + "matter", + "signify", +} NEGATION_TERMS = { "no", @@ -28,8 +39,9 @@ def is_inferential_claim(claim): - words = set((claim or "").lower().replace(".", "").split()) - return bool(words & INFERENTIAL_TERMS) + doc = parse(claim) + root = next((t for t in doc if t.dep_ == "ROOT"), None) + return bool(root and root.lemma_.lower() in INFERENTIAL_ROOT_LEMMAS) def is_negative_claim(claim): @@ -42,7 +54,14 @@ def classify_support(score, threshold=0.30, contradiction=None, unsupported_term supported_threshold = max(threshold + 0.10, 0.40) hard_contradiction = contradiction and contradiction.get("reason") in { - "Date mismatch", "Number mismatch", "Unit mismatch", "Negation mismatch" + "Date mismatch", + "Number mismatch", + "Unit mismatch", + "Negation mismatch", + "Entity-role mismatch", + "Location mismatch", + "Source qualifier mismatch", + "NLI contradiction", } if hard_contradiction: return "CONTRADICTION" @@ -71,6 +90,6 @@ def confidence_score(score, evidence_count=0, contradiction=None, unsupported_te confidence = max(0.0, min(float(score), 1.0)) confidence += min(evidence_count, 3) * 0.04 confidence -= min(len(unsupported_terms), 4) * 0.06 - if contradiction: + if contradiction and status in {"CONTRADICTION", "HALLUCINATION"}: confidence += 0.10 return round(max(0.0, min(confidence, 1.0)), 3) diff --git a/Halgorithem/contradiction.py b/Halgorithem/contradiction.py index 372baaf..da039c9 100644 --- a/Halgorithem/contradiction.py +++ b/Halgorithem/contradiction.py @@ -7,6 +7,9 @@ } UNIT_ALIASES = { + "g": "gram", + "gram": "gram", + "grams": "gram", "kg": "kilogram", "kilogram": "kilogram", "kilograms": "kilogram", @@ -36,6 +39,19 @@ "euros": "eur", "euro": "eur", } +UNIT_TO_BASE = { + "gram": ("mass", 0.001), + "kilogram": ("mass", 1.0), + "pound": ("mass", 0.45359237), + "kilometer": ("length", 1000.0), + "meter": ("length", 1.0), + "centimeter": ("length", 0.01), + "mile": ("length", 1609.344), + "celsius": ("temperature_c", 1.0), + "fahrenheit": ("temperature_f", 1.0), + "usd": ("money", 1.0), + "eur": ("money", 1.0), +} def numbers_conflict(claim, chunk, extract_numbers): @@ -46,23 +62,26 @@ def numbers_conflict(claim, chunk, extract_numbers): if claim_numbers.issubset(truth_numbers): return None - def skip(number): + def skip_year(number): try: value = float(number) - return 1400 <= value <= 2100 or value <= 31 + return 1400 <= value <= 2100 except (ValueError, TypeError): return True for claim_number in claim_numbers: - if skip(claim_number): + if skip_year(claim_number): continue claim_value = float(claim_number) for truth_number in truth_numbers: - if skip(truth_number): + if skip_year(truth_number): continue truth_value = float(truth_number) if claim_value == 0 or truth_value == 0: continue + if 0 < min(claim_value, truth_value) <= 100: + if abs(claim_value - truth_value) / max(claim_value, truth_value) < 0.05: + continue if min(claim_value, truth_value) / max(claim_value, truth_value) >= 0.5: if claim_value != truth_value: return { @@ -84,12 +103,60 @@ def _units(text): return units +def _quantities(text): + import re + + quantities = [] + for value, unit in re.findall(r"\b(\d+(?:\.\d+)?)\s*([A-Za-z$]+)\b", text or ""): + canonical = UNIT_ALIASES.get(unit.lower().replace("$", "usd")) + if canonical: + quantities.append((float(value), canonical)) + return quantities + + +def _base_value(value, unit): + dimension, multiplier = UNIT_TO_BASE[unit] + if dimension == "temperature_c": + return "temperature", value + if dimension == "temperature_f": + return "temperature", (value - 32) * 5 / 9 + return dimension, value * multiplier + + +def equivalent_unit_numbers(claim, chunk_text, tolerance=0.02): + claim_quantities = _quantities(claim) + truth_quantities = _quantities(chunk_text) + equivalents = set() + for claim_value, claim_unit in claim_quantities: + if claim_unit not in UNIT_TO_BASE: + continue + claim_dim, claim_base = _base_value(claim_value, claim_unit) + for truth_value, truth_unit in truth_quantities: + if truth_unit not in UNIT_TO_BASE: + continue + truth_dim, truth_base = _base_value(truth_value, truth_unit) + if claim_dim != truth_dim: + continue + denom = max(abs(claim_base), abs(truth_base), 1.0) + if abs(claim_base - truth_base) / denom <= tolerance: + equivalents.add(str(claim_value).rstrip("0").rstrip(".")) + equivalents.add(str(truth_value).rstrip("0").rstrip(".")) + return equivalents + + def unit_conflict(claim, chunk_text): claim_units = _units(claim) truth_units = _units(chunk_text) for value, units in claim_units.items(): truth = truth_units.get(value) if truth and units.isdisjoint(truth): + claim_unit = next(iter(units)) + truth_unit = next(iter(truth)) + if claim_unit in UNIT_TO_BASE and truth_unit in UNIT_TO_BASE: + claim_dim, _ = UNIT_TO_BASE[claim_unit] + truth_dim, _ = UNIT_TO_BASE[truth_unit] + if claim_dim == truth_dim and equivalent_unit_numbers(claim, chunk_text): + continue return { "reason": "Unit mismatch", "claim_units": sorted(units), @@ -137,6 +204,30 @@ def source_qualifier_conflict(claim, chunk_text): return None +def location_conflict(claim, chunk_text): + import re + + pattern = r"\b(?:in|at|from|near)\s+([A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+){0,3})" + claim_locations = {m.group(1).lower() for m in re.finditer(pattern, claim or "")} + truth_locations = {m.group(1).lower() for m in re.finditer(pattern, chunk_text or "")} + if claim_locations and truth_locations and claim_locations.isdisjoint(truth_locations): + return { + "reason": "Location mismatch", + "claim_locations": sorted(claim_locations), + "truth_locations": sorted(truth_locations), + } + return None + + +def missing_location_evidence(claim, chunk_text): + import re + + pattern = r"\b(?:in|at|from|near)\s+([A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+){0,3})" + claim_locations = {m.group(1).lower() for m in re.finditer(pattern, claim or "")} + truth_locations = {m.group(1).lower() for m in re.finditer(pattern, chunk_text or "")} + return bool(claim_locations and not (claim_locations & truth_locations)) + + def entity_role_conflict(claim, chunk_text): claim_rel = _relation(claim) truth_relations = _relations(chunk_text) @@ -197,6 +288,10 @@ def find_contradiction(claim, chunk, extract_numbers, has_negation_mismatch, sco if source_issue and score >= threshold: return source_issue + location_issue = location_conflict(claim, chunk.get("text", "")) + if location_issue and score >= threshold: + return location_issue + temporal_issue = temporal_conflict(claim, chunk.get("text", "")) if temporal_issue and score >= threshold: return temporal_issue diff --git a/Halgorithem/core.py b/Halgorithem/core.py index 976de59..d81fe29 100644 --- a/Halgorithem/core.py +++ b/Halgorithem/core.py @@ -6,10 +6,16 @@ import pysbd from sklearn.feature_extraction.text import HashingVectorizer from sklearn.metrics.pairwise import cosine_similarity +from scipy import sparse from .claim_extraction import extract_claims -from .confidence import classify_support, confidence_score -from .contradiction import find_contradiction, numbers_conflict +from .confidence import classify_support, confidence_score, is_negative_claim +from .contradiction import ( + equivalent_unit_numbers, + find_contradiction, + missing_location_evidence, + numbers_conflict, +) from .evidence import best_evidence, build_evidence from .math_utils import numbers_close, safe_eval from .retrieval import rank_chunks @@ -25,7 +31,7 @@ lemmatize_tokens, tokenize, ) -from .nlp import nlp +from .nlp import parse class LocalEmbedder: @@ -38,11 +44,19 @@ def __init__(self): ) def encode(self, text, convert_to_tensor=False): + if isinstance(text, (list, tuple)): + return self.vectorizer.transform([t or "" for t in text]) return self.vectorizer.transform([text or ""]) def similarity(self, left, right): return float(cosine_similarity(left, right)[0][0]) + def similarity_many(self, left, rights): + if not rights: + return [] + matrix = sparse.vstack(rights) + return [float(score) for score in cosine_similarity(left, matrix)[0]] + def _load_embedder(): mode = os.getenv("HALGORITHEM_EMBEDDER", "local").lower() @@ -73,9 +87,18 @@ def similarity(self, left, right): class Halgorithm: - def __init__(self, sentences_per_chunk=2, sentence_overlap=1): + def __init__(self, sentences_per_chunk=2, sentence_overlap=1, embedder=None): + if sentences_per_chunk < 1: + raise ValueError("sentences_per_chunk must be at least 1.") + if sentence_overlap < 0: + raise ValueError("sentence_overlap must be non-negative.") + if sentence_overlap >= sentences_per_chunk: + raise ValueError("sentence_overlap must be smaller than sentences_per_chunk.") self.sentences_per_chunk = sentences_per_chunk self.sentence_overlap = sentence_overlap + self.embedder = embedder or _embedder + self._claim_embedding_cache = {} + self._nli_model = None self.parser = pysbd.Segmenter(language="en", clean=False) # ── Text prep ───────────────────────────────────────────────────────────── @@ -152,9 +175,10 @@ def chunk_text(self, text, doc_id=1, source_name=None): "sentence_end": min(end, len(sentences)), "text": chunk, "tokens": self.tokenize(chunk), + "lemmas": self.lemmatize_tokens(chunk), "entities": self.extract_entities(chunk), "numbers": self.extract_numbers(chunk), - "embedding": _embedder.encode(chunk, convert_to_tensor=True), + "embedding": self.embedder.encode(chunk, convert_to_tensor=True), }) chunk_id += 1 if end >= len(sentences): @@ -165,14 +189,22 @@ def chunk_text(self, text, doc_id=1, source_name=None): # ── Scoring ─────────────────────────────────────────────────────────────── def support_score(self, claim, chunk): - # semantic similarity via sentence-transformers — topic-agnostic - claim_emb = _embedder.encode(claim, convert_to_tensor=True) - return _embedder.similarity(claim_emb, chunk["embedding"]) + return self._claim_score_fn(claim)(claim, chunk) + + def _claim_score_fn(self, claim): + if claim not in self._claim_embedding_cache: + self._claim_embedding_cache[claim] = self.embedder.encode(claim, convert_to_tensor=True) + claim_emb = self._claim_embedding_cache[claim] + + def score_fn(_claim, chunk): + return self.embedder.similarity(claim_emb, chunk["embedding"]) + + return score_fn # ── Math claims ─────────────────────────────────────────────────────────── def classify_claim_type(self, claim): - if re.search(r"\d+\s*[\+\-\*/%]\s*\d+|(? MAX_EXPR_LENGTH: + raise ValueError("Expression too long") + if not ALLOWED_EXPR_RE.fullmatch(expr): + raise ValueError("Expression contains unsupported characters") + for exponent in re.findall(r"(?:\*\*|\^)\s*([+-]?\d+(?:\.\d+)?)", expr): + if abs(float(exponent)) > MAX_EXPONENT_ABS: + raise ValueError("Exponent too large") + return expr.replace("^", "**") def safe_eval(expr): try: - result = parse_expr(str(expr), transformations=TRANSFORMATIONS) + expr = _validate_expr(expr) + result = parse_expr(expr, transformations=TRANSFORMATIONS, evaluate=True) return float(result.evalf()) except Exception as e: raise ValueError(f"Cannot evaluate: {expr}") from e def numbers_close(left, right, rel_tol=1e-6): - return sympy.Abs(sympy.Float(left) - sympy.Float(right)) <= rel_tol * max(sympy.Abs(sympy.Float(left)), sympy.Abs(sympy.Float(right)), sympy.Float(1)) \ No newline at end of file + return sympy.Abs(sympy.Float(left) - sympy.Float(right)) <= rel_tol * max(sympy.Abs(sympy.Float(left)), sympy.Abs(sympy.Float(right)), sympy.Float(1)) diff --git a/Halgorithem/model_runtime.py b/Halgorithem/model_runtime.py new file mode 100644 index 0000000..f18a574 --- /dev/null +++ b/Halgorithem/model_runtime.py @@ -0,0 +1,70 @@ +from functools import lru_cache +import re + +from .claim_extraction import split_atomic_claims +from .core import _embedder + + +PRONOUN_RE = re.compile( + r"\b(it|he|she|they|his|her|their|its|him|them|this|that|these|those)\b", + re.IGNORECASE, +) + + +class NoOpCoref: + def resolve_text(self, text): + return text or "" + + +def maybe_resolve(text, coref): + if not PRONOUN_RE.search(text or ""): + return text or "" + return coref.resolve_text(text or "") + + +@lru_cache(maxsize=1) +def default_embedder(): + return _embedder + + +@lru_cache(maxsize=1) +def default_coref(): + return NoOpCoref() + + +@lru_cache(maxsize=1) +def default_nli_model(): + from .checks.nli import NLIModel + + return NLIModel() + + +class RuleClaimExtractor: + def extract(self, text): + return split_atomic_claims(text) + + def __call__(self, text): + return self.extract(text) + + +@lru_cache(maxsize=1) +def _default_claim_extractor_instance(): + return RuleClaimExtractor() + + +def default_claim_extractor(text=None): + extractor = _default_claim_extractor_instance() + if text is None: + return extractor + return extractor.extract(text) + + +def encode_texts(embedder, texts): + texts = [text or "" for text in texts] + if not texts: + return [] + try: + encoded = embedder.encode(texts, convert_to_tensor=True) + return [encoded[index] for index in range(len(texts))] + except Exception: + return [embedder.encode(text, convert_to_tensor=True) for text in texts] diff --git a/Halgorithem/models.py b/Halgorithem/models.py new file mode 100644 index 0000000..1cfb340 --- /dev/null +++ b/Halgorithem/models.py @@ -0,0 +1,86 @@ +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class DocumentSentence: + doc_id: int + source: str + sentence_id: int + text: str + resolved_text: str + context_text: str = "" + embedding: Any = None + source_quality: float = 0.55 + tokens: set[str] = field(default_factory=set) + lemmas: set[str] = field(default_factory=set) + numbers: set[str] = field(default_factory=set) + + +@dataclass +class ProcessedSentence: + sentence_id: int + text: str + resolved_text: str + embedding: Any = None + claims: list[str] = field(default_factory=list) + tokens: set[str] = field(default_factory=set) + lemmas: set[str] = field(default_factory=set) + numbers: set[str] = field(default_factory=set) + + +@dataclass +class SimilarityHit: + sentence: str + score: float + source: str = "" + sentence_id: int | None = None + source_quality: float = 0.55 + + +@dataclass +class SimilarityCheck: + score: float + hits: list[SimilarityHit] = field(default_factory=list) + source_quality: float = 0.55 + + +@dataclass +class NLICheck: + label: str + score: float + reason: str = "" + model_quality: float = 1.0 + + +@dataclass +class AtomicClaim: + claim: str + verdict: str + confidence: float + evidence: str = "" + + +@dataclass +class AtomicCheck: + claims: list[AtomicClaim] = field(default_factory=list) + score: float | None = None + status: str = "ok" + + +@dataclass +class VoteResult: + verdict: str + confidence: float + diagnostics: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class VerificationResult: + sentence: str + verdict: str + confidence: float + similarity: SimilarityCheck + nli: NLICheck + atomic: AtomicCheck + diagnostics: dict[str, Any] = field(default_factory=dict) diff --git a/Halgorithem/nlp.py b/Halgorithem/nlp.py index de66382..7ce7b5d 100644 --- a/Halgorithem/nlp.py +++ b/Halgorithem/nlp.py @@ -1,3 +1,6 @@ +from functools import lru_cache +import threading + import nltk import spacy from negspacy.negation import Negex @@ -40,3 +43,20 @@ def _load_spacy_model(): WORDNET_AVAILABLE = True except LookupError: WORDNET_AVAILABLE = False + + +_parse_lock = threading.Lock() + + +@lru_cache(maxsize=2048) +def _cached_parse(text): + return nlp(text or "") + + +def parse(text): + with _parse_lock: + return _cached_parse(text or "") + + +parse.cache_info = _cached_parse.cache_info +parse.cache_clear = _cached_parse.cache_clear diff --git a/Halgorithem/process.py b/Halgorithem/process.py new file mode 100644 index 0000000..b76521b --- /dev/null +++ b/Halgorithem/process.py @@ -0,0 +1,29 @@ +from .core import Halgorithm +from .model_runtime import default_claim_extractor, default_coref, default_embedder, encode_texts, maybe_resolve +from .models import ProcessedSentence +from .text_processing import extract_numbers, lemmatize_tokens, tokenize + + +def process_response(text, embedder=None, coref=None, claim_extractor=None): + embedder = embedder or default_embedder() + coref = coref or default_coref() + claim_extractor = claim_extractor or default_claim_extractor + splitter = Halgorithm() + raw_sentences = splitter.split_sentences(text) + resolved_sentences = [maybe_resolve(sentence, coref) for sentence in raw_sentences] + embeddings = encode_texts(embedder, resolved_sentences) + sentences = [] + for sentence_id, (sentence, resolved, embedding) in enumerate(zip(raw_sentences, resolved_sentences, embeddings), 1): + sentences.append( + ProcessedSentence( + sentence_id=sentence_id, + text=sentence, + resolved_text=resolved, + embedding=embedding, + claims=claim_extractor(resolved), + tokens=set(tokenize(resolved)), + lemmas=set(lemmatize_tokens(resolved)), + numbers=set(extract_numbers(resolved)), + ) + ) + return sentences diff --git a/Halgorithem/retrieval.py b/Halgorithem/retrieval.py index f5e1684..c0720a8 100644 --- a/Halgorithem/retrieval.py +++ b/Halgorithem/retrieval.py @@ -1,24 +1,58 @@ +import re + + +TOKEN_ALIASES = { + "created": "create", + "creates": "create", + "creating": "create", + "creator": "create", + "invented": "invent", + "invents": "invent", + "inventor": "invent", + "developed": "develop", + "developer": "develop", + "developers": "develop", + "released": "release", + "running": "run", + "ran": "run", +} + + +def _tokens(text): + return {TOKEN_ALIASES.get(t.lower(), t.lower()) for t in re.findall(r"\b[\w'-]+\b", text or "") if len(t) > 2} + + def rank_chunks( claim, chunks, score_fn, extract_numbers, has_negation_mismatch, + lemmatize_fn=None, threshold=0.30, top_k=5, ): candidates = [] claim_numbers = set(extract_numbers(claim)) + claim_tokens = _tokens(claim) + claim_lemmas = set() + if lemmatize_fn is not None: + claim_lemmas = {TOKEN_ALIASES.get(t, t) for t in lemmatize_fn(claim) if len(t) > 2} for chunk in chunks: raw_score = score_fn(claim, chunk) score = raw_score signals = [] - claim_tokens = {t.lower() for t in claim.replace(".", " ").replace(",", " ").split() if t.strip()} - chunk_tokens = set(chunk.get("tokens", [])) + chunk_tokens = {TOKEN_ALIASES.get(t, t) for t in set(chunk.get("tokens", []))} + if not chunk_tokens: + chunk_tokens = _tokens(chunk.get("text", "")) content_tokens = {t for t in claim_tokens if len(t) > 2} + overlap = 0.0 if content_tokens: - overlap = len(content_tokens & chunk_tokens) / len(content_tokens) + token_overlap = len(content_tokens & chunk_tokens) / len(content_tokens) + chunk_lemmas = {TOKEN_ALIASES.get(t, t) for t in set(chunk.get("lemmas", []))} + lemma_overlap = len(claim_lemmas & chunk_lemmas) / len(claim_lemmas) if claim_lemmas and chunk_lemmas else 0.0 + overlap = max(token_overlap, lemma_overlap) if overlap >= 0.85: score = min(score + 0.18, 1.0) signals.append("high_token_overlap") @@ -29,6 +63,8 @@ def rank_chunks( if claim_numbers and claim_numbers.issubset(set(chunk.get("numbers", []))): score = min(score + 0.10, 1.0) signals.append("number_subset") + elif claim_numbers and set(chunk.get("numbers", [])) and overlap >= 0.60: + signals.append("number_anchor_overlap") if has_negation_mismatch(claim, chunk.get("text", "")) and score >= threshold: score = max(score - 0.30, 0.0) diff --git a/Halgorithem/source_quality.py b/Halgorithem/source_quality.py index 7d28f8c..708962a 100644 --- a/Halgorithem/source_quality.py +++ b/Halgorithem/source_quality.py @@ -19,13 +19,15 @@ def score_source(source_name, text): host = parsed.netloc.lower().removeprefix("www.") score = 0.55 + domain_matched = False for domain, domain_score in HIGH_TRUST_DOMAINS.items(): if host == domain or host.endswith("." + domain): score = max(score, domain_score) + domain_matched = True if source_name.startswith("inline_text") or not parsed.scheme: score = max(score, 0.65) - if len(text.split()) < 80: + if not domain_matched and len(text.split()) < 80: score -= 0.15 if text.count("\n") > len(text.split()) / 4: score -= 0.05 diff --git a/Halgorithem/temporal.py b/Halgorithem/temporal.py index cf341dd..924697a 100644 --- a/Halgorithem/temporal.py +++ b/Halgorithem/temporal.py @@ -1,8 +1,11 @@ import re from datetime import date +from .nlp import parse + YEAR_RE = re.compile(r"\b(?:1[5-9]\d{2}|20\d{2}|21\d{2})\b") +ENTITY_RE = re.compile(r"\b[A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)*\b") CURRENT_TERMS = { "current", "currently", @@ -26,14 +29,40 @@ def has_temporal_language(text): def temporal_conflict(claim, chunk_text): claim_years = extract_years(claim) chunk_years = extract_years(chunk_text) - if claim_years and chunk_years and claim_years.isdisjoint(chunk_years): - return { - "reason": "Date mismatch", - "claim_years": sorted(claim_years), - "truth_years": sorted(chunk_years), - } + if not (claim_years and chunk_years and claim_years.isdisjoint(chunk_years)): + return None + claim_anchors = temporal_anchors(claim) + chunk_anchors = temporal_anchors(chunk_text) + if claim_anchors and chunk_anchors and claim_anchors.isdisjoint(chunk_anchors): + return None + return { + "reason": "Date mismatch", + "claim_years": sorted(claim_years), + "truth_years": sorted(chunk_years), + } - return None + +def temporal_anchors(text): + anchors = {m.group(0).lower() for m in ENTITY_RE.finditer(text or "")} + stop = { + "the", "a", "an", "in", "on", "at", "by", "of", "for", "with", + "was", "is", "are", "were", "as", "current", "currently", + "created", "invented", "developed", "designed", "launched", + "released", "founded", "reported", "started", "ended", + } + anchors.update( + token.lower() + for token in re.findall(r"\b[a-zA-Z][a-zA-Z'-]+\b", text or "") + if len(token) > 3 and token.lower() not in stop + ) + doc = parse(text) + for ent in doc.ents: + if ent.label_ not in {"DATE", "TIME", "CARDINAL", "ORDINAL", "QUANTITY", "PERCENT", "MONEY"}: + anchors.add(ent.text.lower()) + for token in doc: + if token.pos_ in {"PROPN", "NOUN"} and not token.is_stop and not token.like_num: + anchors.add(token.lemma_.lower()) + return {anchor for anchor in anchors if anchor and not YEAR_RE.fullmatch(anchor)} def temporal_warning(claim): diff --git a/Halgorithem/text_processing.py b/Halgorithem/text_processing.py index 4127652..393ba8d 100644 --- a/Halgorithem/text_processing.py +++ b/Halgorithem/text_processing.py @@ -8,11 +8,21 @@ from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS import textacy.preprocessing as tprep -from .nlp import WORDNET_AVAILABLE, nlp +from .nlp import WORDNET_AVAILABLE, parse STOPWORDS = set(ENGLISH_STOP_WORDS) md = MarkdownIt() +NUMBER_WORD_RE = re.compile( + r"\b(?:zero|one|two|three|four|five|six|seven|eight|nine|ten|" + r"eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|" + r"eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|" + r"eighty|ninety|hundred|thousand|million|billion|trillion|" + r"first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|" + r"tenth|percent|percentage|dollars?|euros?|grams?|kilograms?|" + r"pounds?|meters?|centimeters?|kilometers?|miles?)\b", + re.IGNORECASE, +) @lru_cache(maxsize=4096) @@ -63,7 +73,7 @@ def clean_text(text): def tokenize(text): - doc = nlp(text) + doc = parse(text) return [ t.text.lower() for t in doc if not t.is_punct and not t.is_space @@ -72,7 +82,7 @@ def tokenize(text): def lemmatize_tokens(text): - doc = nlp(text) + doc = parse(text) return [ t.lemma_.lower() for t in doc if not t.is_punct and not t.is_space @@ -81,7 +91,11 @@ def lemmatize_tokens(text): ] -def extract_numbers(text): +@lru_cache(maxsize=4096) +def _extract_numbers_cached(text): + text = text or "" + if not re.search(r"\d", text) and not NUMBER_WORD_RE.search(text): + return () # quantulum3 handles "seven billion", "3.5 million", "$4.2B", ordinals try: quantities = qparser.parse(text) @@ -100,11 +114,15 @@ def extract_numbers(text): if d not in seen: extracted.append(d) seen.add(d) - return extracted + return tuple(extracted) + + +def extract_numbers(text): + return list(_extract_numbers_cached(text or "")) def extract_entities(text): - doc = nlp(text) + doc = parse(text) entities = set() for ent in doc.ents: tokens = tuple( @@ -118,8 +136,8 @@ def extract_entities(text): def has_negation_mismatch(claim, chunk_text): # negspacy marks negated entities on the doc - claim_doc = nlp(claim) - chunk_doc = nlp(chunk_text) + claim_doc = parse(claim) + chunk_doc = parse(chunk_text) claim_has_negation = any( getattr(t._, "negex", False) for t in claim_doc ) @@ -129,7 +147,7 @@ def has_negation_mismatch(claim, chunk_text): if not claim_has_negation and not chunk_has_negation: negation_terms = { "no", "not", "never", "neither", "nor", "without", "didn't", - "doesn't", "wasn't", "isn't", "aren't", "can't", "cannot", "did" + "doesn't", "wasn't", "isn't", "aren't", "can't", "cannot" } claim_tokens = {t.text.lower() for t in claim_doc} chunk_tokens = {t.text.lower() for t in chunk_doc} diff --git a/Halgorithem/voting.py b/Halgorithem/voting.py new file mode 100644 index 0000000..7935bdf --- /dev/null +++ b/Halgorithem/voting.py @@ -0,0 +1,114 @@ +from .models import AtomicCheck, NLICheck, SimilarityCheck, VoteResult +from .nlp import parse + + +def entropy_gate(sentence_text, threshold=0.92): + doc = parse(sentence_text) + if not any(t.dep_ == "conj" for t in doc): + return None, None, 1.0 + if "," not in sentence_text and ";" not in sentence_text: + return None, None, 1.0 + if getattr(doc._, "has_coref_resolution", False): + return None, None, 1.0 + pronouns = { + "it", "its", "they", "them", "their", "this", "that", "these", + "those", "he", "him", "his", "she", "her", + } + has_ambiguous_pronoun = any(t.text.lower() in pronouns for t in doc) + has_named_anchor = any(doc.ents) or any(t.pos_ == "PROPN" for t in doc) + if has_ambiguous_pronoun and not has_named_anchor: + return "UNVERIFIABLE", 0.5, threshold + return None, None, 1.0 + + +def similarity_weight(check: SimilarityCheck): + quality = max(0.0, min(check.source_quality, 1.0)) + return 0.2 + 0.3 * quality + + +def nli_score(check: NLICheck): + if check.label == "ENTAILMENT": + return check.score + if check.label == "CONTRADICTION": + return 1.0 - check.score + return 0.0 + + +def contradiction_confidence(nli: NLICheck, atomic: AtomicCheck): + scores = [] + if nli.label == "CONTRADICTION": + scores.append(nli.score) + scores.extend(claim.confidence for claim in atomic.claims if claim.verdict == "CONTRADICT") + return max(scores, default=0.0) + + +def atomic_score(check: AtomicCheck): + """Return atomic support on [-1, 1]. + + Negative values are deliberate: they represent atomic contradiction strength + and are combined with separate contradiction confidence in fuse_votes(). + """ + if check.score is not None: + return check.score + if not check.claims: + return None + entail = sum(1 for c in check.claims if c.verdict == "ENTAIL") + contradict = sum(1 for c in check.claims if c.verdict == "CONTRADICT") + total = len(check.claims) + return (entail - contradict) / total if total else None + + +def fuse_votes(similarity: SimilarityCheck, nli: NLICheck, atomic: AtomicCheck): + """Fuse support evidence while keeping contradiction confidence separate. + + NLI model_quality scales how much the NLI support score affects the weighted + support average. The rule-based fallback reports 0.75; a transformer-backed + model can report 1.0 to carry the full NLI weight. + """ + weighted = [] + weighted.append((similarity_weight(similarity), similarity.score)) + weighted.append((0.5 * max(0.0, min(nli.model_quality, 1.0)), nli_score(nli))) + atom = atomic_score(atomic) + if atom is not None: + weighted.append((0.3, atom)) + + total_weight = sum(weight for weight, _ in weighted) or 1.0 + support_score = sum(weight * score for weight, score in weighted) / total_weight + atomic_contradictions = sum(1 for claim in atomic.claims if claim.verdict == "CONTRADICT") + atomic_entails = sum(1 for claim in atomic.claims if claim.verdict == "ENTAIL") + contra = contradiction_confidence(nli, atomic) + if contra >= 0.80 and (atomic_contradictions or similarity.score >= 0.25): + verdict = "CONTRADICTION" + confidence = contra + elif atomic_contradictions and atom is not None and atom < -0.25: + verdict = "CONTRADICTION" + confidence = max(contra, abs(atom)) + elif nli.label == "ENTAILMENT" and nli.score >= 0.80 and atomic_entails and similarity.score >= 0.30: + verdict = "SUPPORTED" + confidence = max(support_score, nli.score * 0.9) + elif support_score >= 0.55: + verdict = "SUPPORTED" + confidence = support_score + elif support_score >= 0.30: + verdict = "WEAK_SUPPORT" + confidence = support_score + elif similarity.score < 0.12 and nli.label == "NEUTRAL" and (atom is None or atom <= 0.0): + verdict = "HALLUCINATION" + confidence = 1.0 - max(similarity.score, support_score) + else: + verdict = "UNVERIFIABLE" + confidence = 1.0 - max(support_score, contra) + + diagnostics = { + "similarity_score": similarity.score, + "similarity_source_quality": similarity.source_quality, + "nli_label": nli.label, + "nli_score": nli.score, + "atomic_score": atom, + "atomic_check_status": atomic.status, + "support_score": support_score, + "contradiction_confidence": contra, + "atomic_contradictions": atomic_contradictions, + "atomic_entails": atomic_entails, + } + return VoteResult(verdict=verdict, confidence=round(confidence, 3), diagnostics=diagnostics) diff --git a/Halgorithem/web.py b/Halgorithem/web.py index 3d0a73a..bedd7ac 100644 --- a/Halgorithem/web.py +++ b/Halgorithem/web.py @@ -1,50 +1,77 @@ +import asyncio +import concurrent.futures +from pathlib import Path +from urllib.parse import unquote + from bs4 import BeautifulSoup -import requests import html2text +import httpx + class WebScraper: - def __init__(self, list_of_urls): + def __init__(self, list_of_urls, output_dir="."): self.urls = list_of_urls + self.output_dir = Path(output_dir) self.converter = html2text.HTML2Text() self.converter.ignore_links = True self.converter.ignore_images = True self.converter.ignore_tables = False - self.counter = 0 + + async def _fetch_wikipedia(self, client, url): + title = unquote(url.split("/wiki/")[-1]) + api_url = f"https://en.wikipedia.org/api/rest_v1/page/mobile-sections/{title}" + response = await client.get(api_url) + response.raise_for_status() + data = response.json() + sections = data.get("lead", {}).get("sections", []) + plain_text = "\n".join(section.get("text", "") for section in sections if section.get("text")) + if plain_text: + return plain_text + summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title}" + response = await client.get(summary_url) + response.raise_for_status() + return response.json().get("extract", "") + + async def _fetch_page(self, client, url): + response = await client.get(url) + response.raise_for_status() + soup = BeautifulSoup(response.content, "html.parser") + for tag in soup(["nav", "footer", "script", "style", "header", "aside"]): + tag.decompose() + return self.converter.handle(str(soup))[:8000] + + async def _scrape_one(self, client, index, url): + try: + if "wikipedia.org/wiki/" in url: + plain_text = await self._fetch_wikipedia(client, url) + else: + plain_text = await self._fetch_page(client, url) + self.output_dir.mkdir(parents=True, exist_ok=True) + path = self.output_dir / f"file{index}.txt" + path.write_text(plain_text, encoding="utf-8") + print(f"Scraped: {url} -> {path}") + return str(path) + except httpx.TimeoutException: + print(f"Timeout: {url}") + except httpx.HTTPStatusError as e: + print(f"HTTP error {e}: {url}") + except Exception as e: + print(f"Failed {url}: {e}") + return None + + async def scrape_async(self): + headers = {"User-Agent": "Mozilla/5.0 (compatible; HalgorithemBot/1.0)"} + timeout = httpx.Timeout(5.0) + async with httpx.AsyncClient(headers=headers, timeout=timeout, follow_redirects=True) as client: + results = await asyncio.gather( + *(self._scrape_one(client, index, url) for index, url in enumerate(self.urls)) + ) + return [path for path in results if path] def scrape(self): - headers = { - "User-Agent": "Mozilla/5.0 (compatible; HalgorithemBot/1.0)" - } - for url in self.urls: - try: - # use clean Wikipedia API instead of scraping - if "wikipedia.org/wiki/" in url: - title = url.split("/wiki/")[-1] - api_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title}" - response = requests.get(api_url, - timeout=5, - headers=headers) - response.raise_for_status() - plain_text = response.json().get("extract", "") - else: - page = requests.get(url, timeout=5, headers=headers) - page.raise_for_status() - soup = BeautifulSoup(page.content, "html.parser") - for tag in soup(["nav", "footer", "script", - "style", "header", "aside"]): - tag.decompose() - plain_text = self.converter.handle(str(soup)) - plain_text = plain_text[:8000] # cap non-wiki sources - - with open(f"file{self.counter}.txt", "w", - encoding="utf-8") as f: - f.write(plain_text) - print(f"Scraped: {url} → file{self.counter}.txt") - self.counter += 1 - - except requests.exceptions.Timeout: - print(f"Timeout: {url}") - except requests.exceptions.HTTPError as e: - print(f"HTTP error {e}: {url}") - except Exception as e: - print(f"Failed {url}: {e}") \ No newline at end of file + try: + asyncio.get_running_loop() + except RuntimeError: + return asyncio.run(self.scrape_async()) + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: + return pool.submit(asyncio.run, self.scrape_async()).result() diff --git a/README.md b/README.md index 71a6971..60cfdae 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,26 @@ If neither spaCy model is installed, Halgorithem falls back to `spacy.blank("en" ## Quick Start +Recommended high-accuracy verifier: + +```python +from Halgorithem import verify + +results = verify( + docs="BASIC was created in 1964 by John Kemeny at Dartmouth College.", + response_text="BASIC was created in 1964.", +) + +for result in results: + print(result.verdict, result.confidence, result.diagnostics) +``` + +The `verify()` helper and `HalgorithemVerifier` return dataclass results and use the newer similarity, NLI, atomic-claim, and vote-fusion pipeline. This is the preferred API for new integrations. + +Vote fusion keeps support and contradiction evidence separate. Atomic scores intentionally use `[-1, 1]`, where negative values represent contradiction strength. NLI checks also expose `model_quality`: the built-in rule-based fallback is weighted lower than a transformer-backed NLI model that reports full quality. + +Legacy chunk API: + ```python from Halgorithem import Halgorithm @@ -63,8 +83,24 @@ for result in results: print(result["status"], result["claim"], result["reason"]) ``` +`Halgorithm.compare_to_docs()` remains supported for compatibility and returns dictionaries. It uses the lower-level chunk scoring path, so its output shape differs from `verify()` / `HalgorithemVerifier`. + ## Python API +Preferred verifier: + +```python +from Halgorithem import HalgorithemVerifier + +with HalgorithemVerifier() as verifier: + results = verifier.verify( + docs="BASIC was created in 1964.", + response_text="BASIC was created in 1964.", + ) +``` + +Legacy verifier: + ```python from Halgorithem import Halgorithm @@ -145,7 +181,21 @@ It reports accuracy, accuracy by category, a confusion matrix, failures, tempora ## Output Schema -Every claim result includes: +The preferred verifier returns `VerificationResult` dataclasses: + +```python +{ + "sentence": str, + "verdict": "SUPPORTED | WEAK_SUPPORT | CONTRADICTION | HALLUCINATION | UNVERIFIABLE", + "confidence": float, + "similarity": SimilarityCheck, + "nli": NLICheck, + "atomic": AtomicCheck, + "diagnostics": dict, +} +``` + +The legacy `Halgorithm` API returns dictionaries: ```python { diff --git a/pyproject.toml b/pyproject.toml index 59c494a..0fe05b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "beautifulsoup4>=4.12", "clean-text>=0.6", "html2text>=2024.2.26", + "httpx>=0.27", "markdown-it-py>=3", "negspacy>=1.0", "nltk>=3.8", @@ -25,6 +26,7 @@ dependencies = [ "requests>=2.31", "rich>=13", "scikit-learn>=1.3", + "scipy>=1.10", "sentence-transformers>=2.7", "spacy>=3.7", "sympy>=1.12", diff --git a/requirements.txt b/requirements.txt index e07f5a7..786f6e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ beautifulsoup4>=4.12 clean-text>=0.6 html2text>=2024.2.26 +httpx>=0.27 markdown-it-py>=3 negspacy>=1.0 nltk>=3.8 @@ -10,6 +11,7 @@ quantulum3[classifier]>=0.9 requests>=2.31 rich>=13 scikit-learn>=1.3 +scipy>=1.10 sentence-transformers>=2.7 spacy>=3.7 sympy>=1.12 diff --git a/tests/test_halgorithem.py b/tests/test_halgorithem.py index 9edfc89..137fbb8 100644 --- a/tests/test_halgorithem.py +++ b/tests/test_halgorithem.py @@ -1,9 +1,22 @@ import pytest +import asyncio -from Halgorithem import Halgorithm +from Halgorithem import Halgorithm, HalgorithemVerifier from Halgorithem.claim_extraction import split_atomic_claims -from Halgorithem.contradiction import find_contradiction +from Halgorithem.contradiction import equivalent_unit_numbers, find_contradiction, numbers_conflict +from Halgorithem.math_utils import safe_eval from Halgorithem.retrieval import rank_chunks +from Halgorithem.source_quality import score_source +from Halgorithem.temporal import temporal_conflict +from Halgorithem.voting import atomic_score, similarity_weight +from Halgorithem.models import AtomicCheck, AtomicClaim, SimilarityCheck +from Halgorithem.models import DocumentSentence, NLICheck +from Halgorithem.evidence import candidate_to_evidence +from Halgorithem.model_runtime import default_claim_extractor +from Halgorithem.process import process_response +from Halgorithem.text_processing import has_negation_mismatch +from Halgorithem.voting import entropy_gate, fuse_votes, nli_score +from Halgorithem.web import WebScraper @pytest.fixture() @@ -37,6 +50,12 @@ def test_claim_extraction_splits_atomic_claims(): assert "BASIC was created in 1964." in claims +def test_claim_extraction_splits_verb_led_conjunction(): + claims = split_atomic_claims("Python was created in 1991 and released publicly in 1994.") + assert "Python was created in 1991." in claims + assert "released publicly in 1994." in claims + + def test_retrieval_ranks_best_chunk(algo): chunks = algo.chunk_text( "Cats sleep often. BASIC was created in 1964 at Dartmouth College.", @@ -61,7 +80,7 @@ def test_weak_support(algo, docs): def test_hallucination(algo, docs): - assert first_status(algo, docs, "BASIC was created by NASA.") == "HALLUCINATION" + assert first_status(algo, docs, "BASIC was created by NASA.") == "CONTRADICTION" def test_denial(algo, docs): @@ -80,6 +99,24 @@ def test_unit_contradiction(algo, docs): assert result["reason"] == "Unit mismatch" +def test_equivalent_unit_numbers_support_grams(): + assert equivalent_unit_numbers("The sample weighs 1000 grams.", "The sample weighs 1 kilogram.") + + +def test_percentage_rounding_tolerance(): + chunk = {"numbers": ["31"]} + assert numbers_conflict("The rate was 30%.", chunk, lambda text: ["30"] if "30" in text else ["31"]) is None + + +def test_temporal_conflict_requires_shared_anchor(): + assert temporal_conflict("Apollo launched in 1969.", "Gemini launched in 1965.") is None + assert temporal_conflict("Apollo launched in 1970.", "Apollo launched in 1969.")["reason"] == "Date mismatch" + + +def test_trusted_short_sources_keep_domain_quality(): + assert score_source("https://www.nasa.gov/example", "short text") == 0.92 + + def test_math_checks(algo): supported = algo.compare_to_docs("Math source.", "2 + 2 = 4.")[0] contradicted = algo.compare_to_docs("Math source.", "2 + 2 = 5.")[0] @@ -89,6 +126,14 @@ def test_math_checks(algo): assert malformed["status"] == "ERROR" +def test_safe_eval_blocks_non_math_input(): + assert safe_eval("2^3") == 8.0 + with pytest.raises(ValueError): + safe_eval("__import__('os').system('echo nope')") + with pytest.raises(ValueError): + safe_eval("9**999999") + + def test_temporal_warning(algo, docs): result = algo.compare_to_docs(docs, "The current status of Project Helios is active.")[0] assert result["warning"] == "Time-sensitive claim" @@ -126,3 +171,151 @@ def test_runtime_hardening_errors(algo, tmp_path): with pytest.raises(ValueError): algo.compare_to_docs([{"file_path": "bad"}], "A claim.") assert algo.compare_to_docs("A source.", "") == [] + + +def test_verifier_stack_exists_and_returns_result(docs): + with HalgorithemVerifier() as verifier: + result = verifier.verify(docs, "BASIC was created in 1964.")[0] + assert result.verdict in {"SUPPORTED", "WEAK_SUPPORT"} + assert result.similarity.hits + assert "atomic_check_status" in result.diagnostics + + +def test_voting_weights_and_atomic_fallback(): + weight = similarity_weight(SimilarityCheck(score=0.8, source_quality=0.92)) + assert weight > 0.45 + check = AtomicCheck(claims=[ + AtomicClaim("a", "ENTAIL", 0.9), + AtomicClaim("b", "CONTRADICT", 0.8), + ]) + assert atomic_score(check) == 0 + + +def test_entropy_gate_returns_unverifiable_for_ambiguous_compound(): + status, confidence, entropy = entropy_gate("It rose quickly, and they said it changed.") + assert status == "UNVERIFIABLE" + assert confidence == 0.5 + assert entropy == 0.92 + + +def test_fuse_votes_uses_unverifiable_fallback_not_hallucination(): + vote = fuse_votes( + SimilarityCheck(score=0.20, source_quality=0.5), + NLICheck("NEUTRAL", 0.50), + AtomicCheck(claims=[], score=None, status="empty"), + ) + assert vote.verdict == "UNVERIFIABLE" + + +def test_nli_contradiction_score_stays_in_support_range(): + assert nli_score(NLICheck("CONTRADICTION", 0.90)) == pytest.approx(0.10) + + +def test_did_is_not_negation(): + assert not has_negation_mismatch("She did create Python.", "She created Python.") + + +def test_default_claim_extractor_works_as_factory_and_function(): + extractor = default_claim_extractor() + assert extractor.extract("Python was created in 1991.") + assert default_claim_extractor("Python was created in 1991.") + + +def test_document_sentence_evidence_compatibility(): + sentence = DocumentSentence( + doc_id=7, + source="doc.txt", + sentence_id=3, + text="Raw", + resolved_text="Resolved", + source_quality=0.8, + ) + evidence = candidate_to_evidence({"chunk": sentence, "score": 0.9}) + assert evidence["doc_id"] == 7 + assert evidence["source"] == "doc.txt" + assert evidence["chunk_id"] == 3 + assert evidence["text"] == "Resolved" + + +def test_web_scraper_accepts_output_dir(tmp_path): + scraper = WebScraper([], output_dir=tmp_path) + assert scraper.scrape() == [] + + +def test_web_scraper_scrape_inside_event_loop(tmp_path): + async def run(): + scraper = WebScraper([], output_dir=tmp_path) + return scraper.scrape() + + assert asyncio.run(run()) == [] + + +def test_process_response_batches_embeddings(): + class BatchEmbedder: + def __init__(self): + self.calls = [] + + def encode(self, text, convert_to_tensor=False): + self.calls.append(text) + if isinstance(text, list): + return list(text) + return text + + embedder = BatchEmbedder() + sentences = process_response("BASIC was created in 1964. It was designed for students.", embedder=embedder) + assert len(sentences) == 2 + assert len(embedder.calls) == 1 + assert isinstance(embedder.calls[0], list) + + +def test_atomic_check_batches_nli(docs): + class BatchNLI: + def __init__(self): + self.batch_calls = 0 + + def predict_batch(self, premises, hypotheses): + self.batch_calls += 1 + from Halgorithem.models import NLICheck + return [NLICheck("ENTAILMENT", 0.9) for _ in hypotheses] + + from Halgorithem.ingest import ingest_documents + from Halgorithem.process import process_response + from Halgorithem.checks.atomic import atomic_claim_nli, prepare_document_claims + + document = ingest_documents(docs) + sentence = process_response("BASIC was created in 1964 and it was designed for students.")[0] + nli = BatchNLI() + result = atomic_claim_nli(sentence, document, nli_model=nli, doc_claims=prepare_document_claims(document)) + assert nli.batch_calls == 1 + assert result.score is not None + + +def test_atomic_empty_token_claim_stays_neutral(): + from Halgorithem.checks.atomic import atomic_claim_nli + + result = atomic_claim_nli("It.", []) + assert result.status == "no_document_claims" + + from Halgorithem.models import DocumentSentence + doc = [DocumentSentence(doc_id=1, source="x", sentence_id=1, text="A.", resolved_text="A.")] + result = atomic_claim_nli("It.", doc) + assert result.claims[0].verdict == "NEUTRAL" + assert result.claims[0].evidence == "" + + +def test_similarity_search_does_not_mutate_embeddings(): + from Halgorithem.checks.similarity import similarity_search + from Halgorithem.models import DocumentSentence, ProcessedSentence + + class Embedder: + def encode(self, text, convert_to_tensor=False): + return {text} + + def similarity(self, left, right): + return 1.0 if left == right else 0.0 + + sentence = ProcessedSentence(sentence_id=1, text="A.", resolved_text="A.") + doc = [DocumentSentence(doc_id=1, source="x", sentence_id=1, text="A.", resolved_text="A.")] + similarity_search(sentence, doc, Embedder()) + assert sentence.embedding is None + assert doc[0].embedding is None