From 795d36b732c671d395b56fb847e50508fabcf4dc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 6 Jun 2026 10:35:21 +0000 Subject: [PATCH] =?UTF-8?q?fix(extractor):=20re-join=20=C2=A711=20windows?= =?UTF-8?q?=20whose=20Area=20cell=20split=20onto=20its=20own=20line?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sim case 20's §11 lodges 5 windows but only 1 surfaced. The "W H Area" cells tokenize inconsistently: a narrow Area column keeps all three on one line ("1.80 2.10 3.78" — matches _WIDTH_HEIGHT_AREA_RE), but a wider Area column triggers pdftotext's 2+-space split, dropping the Area onto its own line ("5.79 2.00" then "11.58"). The 3-decimal data anchor never matched those four rows, so they were lost — gutting §6 solar gains (5 windows → 1) and dropping continuous SAP 43.05 → 38.32 vs the worksheet's 43.6322. Pre-merge a "W H" line + a following lone-decimal Area into the canonical "W H Area" line, gated on Area ≈ W × H (the §11 Area is always the product) so a frame factor / g-value / U-value below a dimension line is never absorbed. One-line layouts (3 decimals) are untouched. Pins via test_summary_001431_case20_extracts_all_five_section11_windows (Summary_001431_case20.pdf mirrors sap worksheets/golden fixture debugging/ simulated case 20/). 573 documents_parser tests pass; pyright strict net-zero. Co-Authored-By: Claude Opus 4.8 --- .../documents_parser/elmhurst_extractor.py | 36 +++++++++++++++++- .../tests/fixtures/Summary_001431_case20.pdf | Bin 0 -> 82157 bytes .../tests/test_summary_pdf_mapper_chain.py | 15 ++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 backend/documents_parser/tests/fixtures/Summary_001431_case20.pdf diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 44d5325e..01b50deb 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -861,7 +861,7 @@ class ElmhurstSiteNotesExtractor: ) if not m: return [] - lines = m.group(1).splitlines() + lines = self._merge_split_dimension_lines(m.group(1).splitlines()) # Locate all (data_line, manufacturer_line) pairs in document # order. Each pair is one window. @@ -911,6 +911,40 @@ class ElmhurstSiteNotesExtractor: windows.append(window) return windows + # A "W H" pair on its own line (e.g. "5.79 2.00") whose Area cell the + # layout preprocessor pushed onto the following line as a lone decimal + # ("11.58"). Wider Area columns in the §11 grid trigger the 2+-space + # split; narrower ones keep all three on one line (the 3-decimal anchor). + _WIDTH_HEIGHT_RE = re.compile(r"^(\d+\.\d+)\s+(\d+\.\d+)$") + _AREA_ONLY_RE = re.compile(r"^(\d+\.\d+)$") + + def _merge_split_dimension_lines(self, lines: List[str]) -> List[str]: + """Re-join a window's "W H" line with a following bare-Area line + into the canonical "W H Area" shape the data anchor expects. + + Gated on Area ≈ W × H (the §11 Area is always the product), so an + unrelated lone decimal below a "W H" line — a frame factor, g-value + or U-value — is never absorbed. Layouts that already lodge all + three on one line are untouched (their line has 3 decimals, not 2). + """ + merged: List[str] = [] + i = 0 + while i < len(lines): + wh = self._WIDTH_HEIGHT_RE.match(lines[i].strip()) + area = ( + self._AREA_ONLY_RE.match(lines[i + 1].strip()) + if wh is not None and i + 1 < len(lines) else None + ) + if wh is not None and area is not None: + w, h, a = float(wh.group(1)), float(wh.group(2)), float(area.group(1)) + if abs(w * h - a) <= 0.05: + merged.append(f"{wh.group(1)} {wh.group(2)} {area.group(1)}") + i += 2 + continue + merged.append(lines[i]) + i += 1 + return merged + def _find_manufacturer_after(self, lines: List[str], data_idx: int) -> Optional[int]: for j in range(data_idx + 1, min(data_idx + 12, len(lines))): stripped = lines[j].strip() diff --git a/backend/documents_parser/tests/fixtures/Summary_001431_case20.pdf b/backend/documents_parser/tests/fixtures/Summary_001431_case20.pdf new file mode 100644 index 0000000000000000000000000000000000000000..40c67781feb9faaeb3bc97800923b278c8e0e2ca GIT binary patch literal 82157 zcmeF)1ymf(qA2PJgrEU}B!pnW3GTt&CD;TT+}+&+1b26bAcF*VcMopC-8JYP{jJLR~!^~`mlK@c9F{kx!l1fdnMurs$MV5XHY(zQ`#_~QTq z2Byd32pCxYZN>J-ijnSdrT+KW7@<%9U2I=uw2k$2Y-#1~v~3>~B4!3n0WHW#*Vd4L zfsT$=RL{u3(3XInkqLTAQwwW(OD!EeT0uQ~BON_C5q?^JBU>9;J!=6AGfN9|J!q2H zXr;Bxpz+Yr3LBZ)>RHnYn`+tW3F_%s=<3l*>X{orBW7k{VB_Ytv9;FIGDG;{yRQy! zqaYG_pM`ueP|8xV=jdmLYKppQwHN$;psQogvHOcAFF$`ditc+{ZQCdOSpLL_@6(^Z z-)d5MmN7YDTrkm?23A>@Q=Hs&c6-8bF68^1XQHs=`=z3i{`-X9Y@dJkXkYG<=Dc@W zKR1wPw5TUSX~qrqkK^B@`CR7;VU7@O*vDbVHTUGlKK$`gE&u}@(6IlV9am|f85~h- zdzREMlKnvK@$;Y~R)D~hF6Y&(uAF+^oVWjL3EFl4<|a!$C%08I(B!h^ul3nkIH%tX z9)6qcE0DS4t29cJ_2$K$9k-xLJ2s5`{k>TB)2ZF+)#IAfVmDjKA^Xg9T{n~6!KumR zh3=h=jpP1K232RWT(=Vuow>Z`8}N_^#NsGqVscqF{n)5^Ae=|(XNUb8`kX91O;Pt_ z1u>GV*xlJr4L4uHI`3r_NNt|msFk8uRnfibBe4w)*~~6M@rNn)X=lLarY;dxo&9OY z6%%xAGZA7YO>cA6x!QV)mH>a===K(?rlE@FU1T?{#-KRYdlc-i(sp}uZ80H(MbQbO zq!Jg{O|qs+eYR|BH8-Z7FGuDdID0%scd>@LtmG%t_(Bi@StfQr}4Gp^9BSFrFYIZ9Nk^=xv2pK z_uI9{M`LZ`x+Vr>LFe>Zmi8#kH>SpZayMIV+!r=F&%f-oo<=k)EFc*zqf5srEjo@d z)}9%&`h?>Wd>=EqV)xL1kfuwd1xtNnxz20ap21$ui8C>I+x6P%dBdkEOVFWfl5K7g zf?HI%WX-E|x0y2yi?UHe1vd2uoAcMD~fjNGgsJCYQ1Q_ok~>D+z1VXua-?6m>x$sj#vi*6y)k3&O1;d+n6OxN3g7V4R>cD8-IccY zBC-e(1*1U(NOXIfvWvTVW8$hcoYyEjwpFWNrIqN2cE{Pk!IMQl3{)zzCq#mT+;HNw zZubBqrPz_Ng_9bu$Nn0?tzU7I74FQ+SRt~W!?DLjtWy%Hhg1zBkI+iPz%LS<5INu2Bqm9&QwXSh zf=B|)e8%v-ZOMEx?;}1I@=sIUDs0*}=6$bHER5ZmIKOj#EjO6!`r#NeuR4=taN}W1 zn7lKg84j0@+QahHc5D^%NKrlNekCho)bxk`tJntZ!Is~cRLW{w*OtZXqi8lP$}m-K$X9BQO;!s z9<9q3 z-&74fP~L%^@=yB&6(lhG(Mu~rjoMzL-{j?zM1}w8)40hskjJ_?I7RTrCbh(&%^jq& zr)|NQM1Ai&Gn1-7Xz^AgKx^=XQhQO}uoAp48Ne>h){nCnLdMmnVP-m3H0(p*VZX_@ z)wo@(p50y8c-&7tLj4Y00Epz+|ruI^I+q<1mbiR1wvfVCE}@+0PvHEd@t|ZPd%Ue;#UB&o=C&rRu%= zMu_{vj{RAGVP6$Ky_;suXJh=gK6FNJ-<-^3v+TR@d+49u>r$Qal~6OQmjTGd;UWJCg_lT_WAE8M?9nY4(IB3VQZia4FxfA zHq+5D!o5@G5ulPg)cpjPVe_@<$e6yo4{NC8^M@HpVODlb8WKpP%Wlc>%4ByeGk&s^ z6%krg{YhVhPi(qJ3ZbK$t|-=U@ATO~lAvG1by4Q52!3c+{D=BHamRBopDtsf*rAtt zb$!|_(=xOYO|!l)NkHdzvD{NB8z$%@f-DY5VY%qGQH>kNJSkLDp8dG@1Kn1EU#VsK zw)pqJ8-BBTMBx zND%x4XzeZ2tim$$jLEhiG1O<2z^rfhAu3yzH*EKU_sZj#SjR4+*vpL_vl zENRkvVMc2jen_Dx9J9GT>?kw9+cR61Gh=2Dpl;jjYmX{}=W4bNq}~5fimtUA1rIq| zmMk1Oe6}|Q>AJKyCO4XEAk3j^w1@+>bSdq9)A~iluC=W1t3cRZCpgwp`0C_DOUOR> zV8Jd%e)rc=dqxDb9)wnchxeWPyu15*F*5nxxovRzn`gJf2Akt0s!Bzvmbx)_*L`2L zn{^`?yeRi*RQ+y-4jG;u2QIiHC&+MA82mWkepg!Xrd~LL+9a`28=`jZy^u%^_PscG zem*rLI76vYeaDSFPQ5HJi9uvRyk-8dBIH8OU zFA7<8BuQ7hK%*Ea$lX)%3UiHWoz*4<^X;PM?U^3Et=6NUeOhLsap55Su-VmoHf1OD z^H|)kMEZl9vQ5z7fVf|zLbrIv4(s}pWS?PRiSkZU3OsEd?WYxNKVlu zkAkefaNX`aGe0%d-R7-^IM3KevfnQ+Q5>2sd4l?V+)b(``gRIeV^A(!InHq1R0$&{ z+nBG?)8IclcBa-yeCQR1bE#tvv5DVP$WK1AX&5WhVY};3Wz&Aea+OyxWr599s1nQl zipDZ;`&mxts8ucRB(}zgSojkTep>M+t$oMNOtUrEo1DzKIp>W^au0hy4~%Z-V*F3R`BoobTm)t`%$5;s_VLCnj{{)usJot z4qd(*jq3&8G`rE55Gf_@__4fB%xeZ5-MCc7{r;DG3gl^6`gcM1435yY3^E7GYRcHu zxx-Mj;!N76m*0nGX_E9^*Ah104}~7}%ERs^w?9ndS|@#)LI$Fxv0~!hH>l@4e*qiJ zePP`wik+A!1&3tJQ7;pxw86~s@H5u0eq@?DhZQkCHa~a^H(2{f+18>>^)tJwDj9?8 zXSc%Ab*)*R%_qv$ZioTX-v)|V90mjLEE%5vvo|j9-naSN1RTI{Dn!v=f3iOo?E0V-^E$rBgkt|Idh!g zqK^q`V}x5s9MEoIfb+Uc1f@a(($O~GoBndt!-t>cBfyz6$A3#@?&jK9;bNX}oY{MV zVBBDd3wNjE=)S!3Dhzr_c{S{hX{URCj*yC7#n}Pb$_vE=?~|ghF6ZRt-3{LPFhXMO z247U3ABUn)@v`eQmtnrf5@{<8UZJpM#xZ;Qh9CLx9C>Hsd4b2;N2aEFlf#*hb&ccf zEa0*08fcr>Otg*hTSudWp;eRw?IHI&UgoYi@HM#|{9)fiaS7P=YBd78Xjg?Z+GQJh zH?=lKl_Pkj+r7p1-G33w&3h4*wnV7<7Q*=pQj}%KBz~E2!3S=#*704(^omzXOuTdgi(uR zSeLGPhvdJq<$3~c(UHVX^m&~q=Pqy&<0b8`E$e8pttjx?#T^Gw(UR98y(c@8P7@+= zf$nc`5uEsn(w8#ZKigPy_*nTrNR>An&NzTx$i>87fDMf=Xvi1xO1d^3(ZHW}?B3te=w+oFNlEalR0gH&jnS|%pnOMlB`hw#HFA#s|ALyiJx_TuCM?`Y-aFy;4{ zO(r7Atgx@y@iofe2v$>;>|D#y0!gi2w~EG$O9M)pgZew&_jBAyiSX}jeKDdzYrph{ zw+CB)QyrWSu(zwvrcp^c*W0Sjz+q5rmJz063mK{`fgY4D1jx%%L8EgSKISPs6u5Ji zbr5PqEVO0mJQ&|DeFK~8k%!MmDUST+RJNB>;a?rO(|z}^B+duJ*;SrOiD{#jH4tHX zBkS>VD}JxlTxT;@6=%9*xcWp=%OiR<-?(axK z`W|+T;B!$@#F(~qDH3mC^gHPll;>)_!sa#vgVX%q((pCvC{sOM31!xmtoiR(YY!}g ziSgcLpM2Y-)4&t?2-&ixzj5m|Tj=4tOXii!WxgqDAt^*uUaz_H$nHv#WMQt@Az)^QIU9$HWk!k@*%G{! zKT)*Gr|Qf8z{^~)zvnj;mfH(YbY_1kzctYIRoBU`&URReN|g&evS|17Yl!3x8wR>i zD&)?|PIt-a+mo4MZqj7aulyhd>lZ7S=kRqHvrUVI@o=NdvO^S%^xZ#Jz8?kdQA~@R zbGeE4VuJaXt??rn1>Cra)@Ods5vRD@HSxNO#(vxLjKdu%w&8yL4HSC={s`&0qfKg_ zTg5a=fLhI_-M@h_H*~-OcRK>hJ04z>N}0 zKMK6&J1$;(UJhrMr^}RoycN}hmpldrV0xCqmXdUx7 zP_a?6fuXp1x8XXkdZk0ypK$m`V1#xr)w980<-|*t z5h!^Tc12Mxiv3+`gLbjYdqGo)VLC794It=A2h+~0*O17jHIF?fmYao`!!w@Nbne8s zFloX(UhbfF1sRqBEMfnm_Ys6y@$D8tAYD_GL27b#)r&@b3ZKyqsxze7oJ*A2i|ZLs zi~-`_U;|c3gI7bBe$+FcZN*7sX#4x?3nUsPI9}Gi+2~i04(FP37{W=eFW`u*U#=F) z10L9T?}ccB2rn{(WMwi*F___v{kBxEgsQ)-;e9EjW0^FVyt4 z3bURDv}-||r=N_g^`$`80EhMCEen&6K(GHXk+-=(_V8Q&Z`!}Y{No&>2BW6S$>e^# z*O~djZ*on=O_Pn#MX`f48L*94+4dyb=78%M-Y!v;7p-`E%lic^RjSCrMCm-d2|=WaxL)LrMvB6(9IJO8uEk~T-)%0(2|3u` zKj3NQw3DoWgU6tkU9z^eJc27`EOoIF+lC495g^#AFMGyhas#L8;5htLM}udH|02IHXbbX=qRNU+bQx z|0mtktW3;w|Dk&t&i36@%tWf$Ote(FO9Oj=!&D=Obh2(()U@0ln|b6sNq;Q0vTx%h zf>No&XN-D;0{$}AQm@9>#9vxopu=$Rm43^ft}l&y1ON0X(#tWdAg0S(9|4s6fd@vL z2basc7ZKl{!6IOg%ck>s{pXD~pMZoM)u2K~4s}F?S2%2UGs;{_h*YZ_>SzeBP?X{X zpFbBUGcy)jAgd>nu&?`sfrmPu+Q7w3OnDek*ZRPe>fz0dQQlBjIv$f*x=GK#z`#Yc za)vd(byX+&E4qazVZwx^X{2)eJhRllthAJljg9RS12+RxY6s)c)_DJJwZ#6I_*&YE z%*MX$KKY)aq_i{wjQ8?#Mvm2ImSNV!OREUa)H#vy$WR+BQHPIY2_NAzwZEv>0}Nyhf^h1&A;k%B^h z?;4(64ZnUGvYa^&b=fJ?+T_{Th%_j+Ew%OR?8h{GiI$ za;oaCSejJW*OJB!s3nT=+6C_m_ySchdaNM+TJQC{1qa>q4k{sZH^n47>VYau!qu&b z0?AppM6jLVXdoD2Ka0Vz0Ph{{;I$?Y%T}+NMfuCZn91U}u^jrUk=k(wUacuDJ3B>T zwPG7zHEgmAoppZNw{l|nF|#=_qdIIAObz3gd|D2Cb(`WZ7^@GC;BfYoWwLEeu;I55 z3ShAl<_r9IVnLe@*GlAjL<56^z3&R^KW9v1UQacxE?s$G^5Z*!9H)%WN9F2%O28}@ zI@n+9lkedmA;7pbHmRx~&_8F6rUkDv0y&fq+G_-9I8+1uAeD`UN)}&-XYE58K;Cs zw6#*h_~P;ne1mcIL56a=%=@XxRHDW>Wf~pzO~Lcsf|+%*ly)RJ-xvOlbuPi7!Hb9G zHEJc5Ig0#hw|6v#5FLB{7s1ABmf<_bCaS6`>6Mx_!otGY>RN<6ZKZo@j4lJZ*NYL9 z{;uswuM*-Oyuo)w(>cmDhX|J)_KVLPVda#SzhrJ))_>$AL1sRuilbRN^llP?<{Msq zi;56f-(N6EIPU^rLalIcoI9-%tAa*OEq&eEP)^H5+Elqco#W0LH_udBy-6I5RY9CP zEzVuF*RaGQlNGbRH`DE`)%#d2g)N>oL!r_i+CN;@7sYO1y%t!Ff5&>@z_;|_NM$fs zs)B*Bx?+SdqcpcprL8bGm-Gn1FKB8JrI?u%>+~xXk_nr>;(40ybLp?naP@((wvN1U ziE-&onnf?cqb6Vms+-ymyS26J0sFPEfuO$6{Uuy3JGOG#WA<1GwN0U*OZ5DS`Jcq7 zFFUxc-jtMPpOy=cHo-KJ{^SsawQseDWp1Lr@T5h{CW@38gfJsqkeoGx&CkP4zcQM>eB;m@3&33xI2f=4Zu9USi7gZMn!II1Avewx?Q(jcB5MBg$ zkdcu|NQk2bp3FK$a=t9Iwuzt0gk%oov-gb8j#2)$cQr*C1!?hl<k&fW=&i{Kre_K5CyY$xEre~ul_p;s!B!!v*IEywS_$+dzBrW{1)F9f*yTc z(Qh(Wds#B?_i;y}kC@Kv&VrGHo9?c&A#uYS;4ad#y1Lofv1bN($skk}BotBK6e$zD{MfSFE>3`7_vR zr$dYfe~HC>Zr}{5XL5YuIpPy>7Z>Mcj9zAVa87RaFAED9i^_3><5(@@F7ptnV3{LF zPK-cf7bnL|j9z8Bq0Z{TCq&+&Tv zFUnw2wyb2(rK`4%LA#~yK74zhM|rWU8hb%_@)z1Vrh7U6V@Vf?rhKJ$039QXm-^10 z0^MD1R#tmg-)!p;o_h!ze-{<%^X}!L5z%Et-IC`ho|)-S{o9{<{1VhpIiZQYum6CoeCLJp{L0SU$$k+e&@fH_(A0LIVSv;nnlfbL1DQ z&hvI0cE6U|%Zm$se$h~citkz-Z?=(0ay*(_-aXYy-&I&vl>d;|?3DN!->~aC!sh(6 zPpncOVw`qq2PHxP^zQnAC%&K_AW+e7sdZyK4(2C8Q4Hx!r3WLxh#9!R}$}XZVYUAe*@aT1f4L}*Xa=saml%Z-sQJ`mf zH#i$R*QXf&^x#bsDGiIXHB19PpQV^M_;t-w7g%a)_Ht&ho)^d-Mb=F(Cx;<{P;)2^ z@$0&vTGm=3OjR1lVLzs?!Pv-jofR3TjECOf&h~@Od9uOyc-n9Gnx(z|rGx{6(_Pk* zkT*`XPAWKwGs-hWWpv<3AJK+@ul`9Krf3+xq7)Fxkl>gz#&#j1_Rb#BawIjHsh`s@ zfhy3{c>4%%f2C7;`Hjg&zJe+aA{uI%03x7MNOOJp5q(Zh-RwOlOt~;&>%k`G2@2CN z$z<&t&|Og50c=3o$tIV1_;vT4J{igT5U) zKXEzIOUOX5o|CWRQJ0-wpp{93xRlt$)M(T)reRcC9Axd|RqL;G#2Q2?X1gy{-nXt| z9w{($!&G#*gHG}d#%89g%yw|4OO?8Ja0O*Mj*=YrA*_aId)h2-b_ zf}x>JSurL?e2D1D@H_l>{=(Vr8%0zLu;)rRaF`>d3nHbYo*eEmyakUiQKEufV3i@{F z{cKn@QJH)N(@5>3&+)NUDPrh5yV#5SZ$(-o3{?>zUK<;mM#t>j+5#~l<8!7EOy&;d zGCZsA?n)8m&XONCo&-mKP^OYU#_I=gGXt`?j~Fc8#}xA962UavZ2^-RVp zy`dLXV$ky7dGd*bgmgpKAWNzh&4-1u)TG$w%(%G8RH4VYHSBUaVXHbj1T(_j-~@XR zRR^>1tMQwi?eUc47*TUDhs7-zw2T9}eU58r@8-5k{=>!5S97p!0C#`=Vt?-BH`*~F zOlbXE6Em%XPY`Hhw5`z2&dw{-eX%JrLq_+*#<(>OPPD?!4QzAP2NL$TRxpfYWE@n8 z9=zm4eNXCDR8@K$C+2K9s_z3~cMUw!h})f8E4GbpPU4m-A6DMa(i_PUHtHW=u-@Yo zIw&a?VRm@N`jM(VSE&mYNRE&9gyFJV$v&f_@fQ5H_#;|jJGtH;)5d!wX|ZiTGrp~qubz3=_lw<9F_2a-sy&AFS5!{f9F~xA~i8LuXuNTIbce} zmTl>fnUpSR+sNp*lapYd9z$zM!~A^UiS+ph4E`;wEsnyuQ(_aLJte|l`5|E?%8Dad4RFo}JJ{cSMPQTN!u?Owt7(P`i>QdjtKqLTS8Lg8r6Y9D)!odHslC}L#3;zZJml3oBa^c04L+h+zL!Pz4?}>{^x2h9&k@032 z#oUWQDhOXJb$`&QhU9MKvM91H zDiS}?L_a!##in5uU->A>sST7Acx97)BKd@KGn34)MTN6#g@5pR61MrSaN_t97vl|8 z8`y2d-ql(9_=?sS4juX*{fr6axOA!n5OT)LK4L48fRE7DcMX0}f>@A%9jKk(; z)4T}b*YQ0wD0B#OOi9+rW_jWCt@Ou_uXLz176QhEXj(d2d`}C%Tcm$8BoH_}Pfo1;6hvYJ3n=rb%lQYZwl%_1Ic{Uut$eeIay8Zanyaix$T=;y z4i7=-n2{%ey&=>mR?By?``2F2^~o?!cDV8$hc$)S9c)(B&<6$v&TJhVjz0N)ZJSV3 zblHoGi-mdqJjK=BLu1JsVYFIwe&*Y)=&Cj1lU`5F_geL?1KiBa6XOfx6En=5oUS6H z=(`xVZg*SKL!vn(os+Yq zJ`ow&1ol!Pkm|6Ctk>KTxO;oY6pgo0En_PyojpC1ll6!^RDFG13tVS*wgDvWFfh;f zL@i$qc1?yH>}+o=8y@~zktFjABqb#!`amSyX``mBs95vU@29beNxdbvVWO`4J@~1D zl8U%EO%(Jxjc|g{A76=Wyu=7`UL&~c;jl*_o}?0n?D{2`S20lZQq;|Unu36LclN&a z@{fG7-&L~GipWTHcQlNb-1vpq+QApkj3{qYnpsj}ZflD{CLJuJps2LHy#;Q{*F1ua zLq=jV&^C-tiQyuDldKW1aZ|?Gs;OpL6kGQK&)0V2_irZ&n;?~o+1PU}!VKXTehIekMwa~N_+$ZO zNpsdi_SbKl`=?26w)LpVety{<9fR>jYD&^B3JC@3{mpoYpRRIqOch>Az*^bmXxd0L zns;?Yaeox!a3u^u{%Er)RlwL^I?A#f1;QgZ^J3XkW zpbYIN(HFrd(LP=)4z}qZT@v4OrrX}L(17Hn)ZO=kMLB7w`rF5cyIGmo6w2lBIN{7N zmYUzB7uE(Cxv17ha=O4;t`kh;HfX_CP~;qCavW~%&_ntqikfFUZWBA_%$}EX3&HQ0 z?2g3d_{ZptjE!k$TTIWLUMe$Lm03-dfQ80O!d3ifd`>r?MTlJYgzU2ZB8nLh=YMTe zS`l@N2!?hCd5+a!Y*9zOy+62S9-D^YM?9FANRTc%M_ov|AgD3eT|>jie#H{$R=-IH zt@Q{&3fW3l=0adR#`JZ)yQ0m-@=r9>Wtv65V3-AhSrnhLVuRk=5WnKI+=dTVPDp^> zE>aNh%D`vmW~auLziJm~*YCpL~)lL^9WY#Ebg98k%gU4ME-#p`|N_R$NZ7th7PClDe!A9E~<8;WoP5iuf%)s*A{ zHwjja@t?HRt~z}eE}s9Md@+sg&^?Lo{`R&hd-X2;G=fLD$>y88t^Byo2>*8I(6^C# zmpTs7CwwU5Uyr0trnQy`nqxHH_K75dMn8`dVtW(z_K1632wVE2K4A}#m&NV)VjApC zO@mg=|C$o=q)qj!KYsyiaScIeG)16gBWL3Z{>XyWF$}?vU$0rSvH079z1a>gM26?D zR^7+z(RWMxv1N?!Si;fj3iuIE5ckZCQq2#{ZLuA&LB8;mot?dJwc>Qek!)R}=eyoB zJb`5EPzfg5k{PH;iSb?aEl$!6nWamu3wYed|E4*Rsm49DxBy3v6X&u4g&!&b6`XQK`J!`Dx4b4BD<)Ea%{4 zsI6^;e8y6sU;G8VvJ%6a2l3>je704HJ~P$w4PVAI1?ASr2NgxT_%6gs8he|=(Hnoq z+}rY{vIqgOy+p7pTH+_(Iax>a3aln_(uCGN3WM4tq{QTkCnk-^qaWX&CZ@S=UCb;Q zYT((kUu17sJR9?2RmC0n@N-Xpc=j0tb@jvioB{<5WZJaq7NkW;_^u0wi;GJdCG$diEENZz`Pp|;_ot+ui8_#*~Wg?{ma$OK+1#`yqaM^FNZtq2p{*$DskBjb8$K$;- zQZfNWdQ-D1GvxEz{P0yRg>_7ZcK5# zU#k}sIxmf^Pi-3%loH9rLX4j^OF2%sQTr^$n19A{L* zitUR_?$<$VFg7-2VqwA|IOLA>M+Y-ov*mV?`iNidOUg)8GpSAu6FlX|6kFmJR44td zMEr^}-4)tfhJc757ZoKhFQ?gbfV{O9$H*R$-0tiY)|7r8YaZVj8}uzKsG_P=&Z;!N z1t>t7=7rP)A{X&x2*o8P&o$6~FpAlHhX!CFvU@cjI|6|0r5ZS(r~Rb(+oP)IHO3hdQ&Wi`*s=iW;n zl(2-xR}ryiHMlGf;NGq;ojdGcZ(yU6B)m=frA|yny1rgQJ^!ray;9`Y5jjV|YTb_xHD9*L^_4q6TZ5>jrcTn&2YpkDq^>O<&&LHWWkzocaAY z+1lQI!DU)pWHkuw0AIvQJu(r@XS3gp4PsQMOOFQ$2nck&@O!t|DG*dw@QaoIvJ=Bx zJByHnx7~lZMyjpPGcX73f&RyQ*ua(79{m;) z_2uPlCHjRYg;=QY>w4=ilM9uLdR;b9FlE~cE2Tjq2J6{wHD#GG$5an{{^W8>Xz$;@ z|AncHjZpZ#uea68-2SyHGN-BNp>?{RYtwk;v|Uf%fcGBR4b~zf;e9JE>TMI9iwLTU z7Ck*l;a7b9kJQvNGb3YSEcU;utOunAtCk(uFF&#Qka!2Arj&G6X+uYLdMzUPyFM5b zB#j&#>|o*L?!=D1VvJS7iUZ-5;7{S<;n2+Al@TLif*N&+ZbV8d)YaS`k zyUYHu0bG4!GYc+_rO!esM;PRyI`;NeQA;zkV`E)rQMYW-oTOc3Y`D^4WB4T5|EB@S z|1>1&_W(zfkoNUI58g8WYlF9c*%mSUlfhfS76G>C|5I)R*do9d0k#ORMSv{=Y!P6K z09ypuBES{_wg|9AfGq-S5nzh|TLjo5z!m|v=>Lzl=<#>4{wr(|<3H)125b>vivU{$ z*do9d0k#ORMSv{=Y!P6K09ypuBES{_wg|9AfGq-S5nzh|TLjo5z!m|v2(U$fEdp#2 zV2c1-1lXeg@wSNdUmLvr%eIK=pA6mtwg|9AfGq;%Edu5(vH|8T0_H6O<}Cu|Edu5( z0_H6O<}Cu|E&7LF_Cf4nVX|JMd@|FSJ&`6q+7fGq-S5nzh|TLjo5z!qf! zwg|9AfGq-S5nzh|TLjo5z!t$D_B|ArfNifbP=G709^#=qW|8yh~Z!BpZ?3bi2a}RPXoFL&_#eQ0(23eivV2&=psND z0lEm#MSv~>bP=G709^#=B0v`bx(LukfGz@b5ul3zT?FVNKo#&n<2&tbzY1HHa{Je-myaedZ_dx5I|-o6#r0{&a!EvowMB_MC`CXqnO7@E z{A+`5=~T?d>B9Zp)jjFi^>NKyicX%Wa0I((#3#ihZcs9hTmqM91UvMIkT&DZ)5ZIT z`}+rq>)We}$yCj3QK2wa#Uvh$ECKy|VZ#Cug8~t)YytTMZiy(4$PSzBvz7Y?kNMp( z#|m|kFgE!FF7*t4Xh?I2xOKUtRhfj-4^^LPbGdjP)ilA%$&9=EyWGKWg+w0lNH$Ok zuU598X^EI)wTx?>oLimJ&|2Hh=|*UyyL1ePN2Tue>6&wewsb6qWb`M;QoX(!*<=npq-}(H!b2Qqx;MXLrWktHBPHvIS$I zXV*vf57;aF%U7otOWh!a>vT;12wcqaV5O3~on&--sy1kDs_Mq=)bamLK zNEiA6%EWSjGlZ+g3-z);L*KJx1YgZ~<>mF|+3%B$#23y=GD)ZpWd8+22f8E zmrvy831C)?lPnv8lnwKjXr{@@#yq}n(Qr1gNOp}ZLH{bZ%j=81(>?nlbIoiakqCC4 zKo*%OQQ2r==m#$x#wrlPA{@@9nJuWCB-y>t`S>-GPHjwSrK)S^h!+wA!b8c(SEXU|evZzLuutv6!dX}J8o`_1Skan7C zL2o`Z;0-k3V_=+}lU>LF#I3|dB~ei^UdA|EKd~ioa(xmS$<+gbp1JPdvMzMXLfaUD zp6S1)RM7*a3Mf_ob4nHSzf`K2|7u;u@MozKHr2A#`;YY$GYk9Q>!~lkLXpEVulX># zA4slE>dnqI3FwWv!c))lk99dk&&HlRqpBKt{fMsm;j2Xb6`xu71Bv2K)Sm?6Au%w_ zU{v(ynPVvltf%^Jb6so+d*|faT?$R{O^u=# z9_lK#%qpI{d??vyBU;`3V4ivwNRO}koDF|;@CUW~ga1quIEcTidxa*}@-$jC_=aFJ zj-4kPDS>sr+Di4-N*4Btl%@;Weoha$emXxXukq)Sm&DVei%%p@@)r8cl=E|6U9_ks z1?p3_%0vbF1S)^Za>6(q=^`^@x84n3(R7dd+4XxBEA-%;+iF6RciJ0s$M-c= z2h-4^2q#p+B|tOzamcm0ce%4p!~JO98Mf%nTn-<@J&Qjn-M|#6)DoItU&Y5^4;$@< ztI6S;a4r?91cgRr7K5b<>hmmFh!JyL8J>i}3HEjS-5R^0<&4PX@9V+j#whgDCv#^U z;E%&#OBo`y?cz$t--h~kB016*cyT~$MwCjE@;4pgsPL)q5&AP~vi10a$4qbgwP{$a zah1wz#?dR{G>Hf#_&&lFFe*LMuEY0m%~<_4L-&?yjGJm(TTEcBO`81m`!p&-b!7N! zaDNn|G|eO-C(hSkcvv3;R}6%Ra8`wcUj)HP?Dff2macNZDeU{ zVNLt^^p87lK5HW_(~tZXrnMeRWT{81U~Q+T@VN2ENrdTW`L%5H{s{5!PAFs!y~8&$ zH=qR>ne&<382$aZu#vTmt$?AH^2n1yiz`_(NNmD97au(Q_Dv*G0YqhLI)px4#ME2$iq#eZ`xq+wj*j|c)^4?2do;9be&s+p5` zQquE~FPmDkz!J)H9Hlg^OB%AMOJ<{cMbqbFcY8(RVr@bWCiN0#kn4-EDybRcrY3f` zHKKyH`S`@B@^W4&iZsV)HYZA%HG80weM%m=X-Q&QAxpO5Bm8OI(l{(G@IfGb&b6v< zFInASsCeLc{os624|dRJ^WNtfE4nh%Ob(qj2;%sj;5p4;IU6g_F^s*tx?A0@{UW=Q zH^ZxMGL=m_ZKLIoP+H3mwd_r>x)P0rnYhuPa8DY~5Be0Kon5~HTd)%^Zz?LStp_lM z%q?F=7I76y)-9|P2VY|xOa=BXocf_3^V&64rRVE7@XXU8)+$gN=KXHyXAS-%nFyMr^S>k>Cj0gTif2t`!IwLzZD`a z&)LMTEAt`+k5c?IF(%wpm)`BN?1fk^noNyU%4yEoBfRot`Gz- za$HWq^?a)eXK7CKXYSv=IA6rJVCCY}hwRWJC6%NnnM@xle~w4?WjOX-Q*SgUAS~D? z@ay~9*!vDHW{mgr=BY~iu{0`i$BF7mqHu^OLk68t{+;#Y8}1a zt{_hgy!GAoAMOOgA#HXAiBT_NWUp5&!4Rq)C)D6J@0-$l`=mr;vw(~{LQ^d{z$LT{ut4Cmj_ zw#(zCh|>rlSY?*=8sr*HCl3U+@XnM8V@yuRg`dVGxard|Grl2D`2B9oY>wb(wB&dM z?Mvm(UoSE#La)TI3sFAv!#Fwv?)Hopr75mbW0K7k)@sfSYiL1i>7d@I5~N& zz+^jWnm3)6^`i-nC3sGEMAB1QkkPB9QD~Cak-NyV>ihCuteuMNnq^ux*g)wmqlKn| zRvNjVLcwsQ9kV}wgd59`YyaWV%PoJrZ(aCd7=jVv*&ZC;Z;99F5IKK){w%K)ohmgeq5 z&FY^c_#=d^9L*iRsb|!4A4B##gJ_P91%QlQXx}ioG#4o@om9 z@K13F{WA>%CuU+KA(^jS21JUtg4xcu%d)uo$W>U^SKohMCAOlwzCr#}V-3z1bUFF@ zD(2ZiZBX;Fv3aV?Y!}((G$KKWHbKZuY_`A^2<@$%SQwtrb+_R`Kgd5W!R?maWx2YSUB zN6oWxW5)#WTNB`0`IzNrQ}`C58|LtszI!!HCzqLp6WW*rnah%}`o_l2sO(4SU1F1$ zF0d>!JOd6oJ`0R0bhR%91 z0xhMLE3PpnG zV2|;pF)^YaS5);nf(Sxm6GaOF_vF~pv{xre3o!A3*E@xR*S>O3w?+SKz46|`YW|%p z;AeySU&{h+Q}VyE03hIJEAUPh@OR_;|HuNs++7*4>eg&LVcmA`^FjN)As7e{aUgRBjsdLOd3Q z3ZbOD!pJ5;GDhe->ZCv+Io~RQPiha%Aw52)Rvn?4gW*7^cUZx|LFoDSy_Y?^=;l7z z7Mqv#B6L8H6!QYQgNo+q#;F z{D!F?U=x6)2UPGz9(`Pcgq%)`4%jSe9ZcP?1gbc+@KMC_*AiTAUCeQ2yw=D#(cw*2 z6k=6P#WbTn-es1NEwIS)L2Zgy!mw(7=AZ(vQG=oy5MF+hN$G@l^)k>2j!WsEHwIVw zlujj;n%1!|f-h56=eIr(YM)CbfPJ!mgc`E>h8FuOs8S?6b{*P2tY?N>owZ+};|Wn| z#V?DHP>JWvG_*z%HO|DV+%v};))3aV^mIy_>Dy7U~L+=4T%=t&~yg>Q&xRbM&BK0Re)HL z9t}lQX0gC~*MUO4hfiILtwhD`=zw%MgEp8%%hincG8MyuSwr4}3lx_NK?loSVpRm> zwpew~<0Y~;637)A7Q5I-Cmq^s-B94Aox$mE7m>hnU<)K@G9*>;|8}1hZ5q*BJskUNi z{V10%n8w?9DO_Yd;%jA36U^KHg0AS&M91IC$t9p-BStTf=~Qa4)=9-A$H1m>qOFjK z^_h|B5^YIjdJxto$4N|k(})ybqp>jUj}gH0P%TP7LNJtR&1 z_!5@}xPk_0rg^HY@lZyu&Qyu94<+@-16z%~Az(UrVvTiI!vl^`8534SIm30Ail?ll z&#n=)EMLFJ3+}ah{dOc%0=w<2#und8E}|2`rt&5xv-+hF0}hq<$oWpEz062lE;a`A z;L&6SYn9qCUNEiD;~X5v=vV!SrY^M1EGTJ|wwWAOb4bkTH?O=bT*D>bh#9o34tVtj zrb%h&yEVxSmFSk1pe(?u7O3O9ucIW;!}70|GZgH;y;P1vhbZH%GqNI5NlLEdWzZ4W zWzY>tj`@iSDtp6e53Q@qydYadk_}PZjTzBKCCrFaM7QI@=?F6MeA1W^^$9DFLkQn4 zg=S!b!rdW~RILWeNRL>61c0374!mbR{f`D$-$Rj<<4}+|+W2`UxB{R+jb-f@;q-CN0VzzKFn~hw|;W^^9#ZK+_`VZ*{S+x!Tfj2 z|9=mayxjcYe}>AR1@ja9uZL`&K!JEC${HF=p|a7@6A5L$9LkT{{%WccScn+ZuR*?i zz5?TE(rOWO^?-ACTC%nxU3LBHI`WqJ_)y>_*NUHMcsc(#^mP_`a><-82HXmCe!8+N zQ&t-mtkMzX%Y})*Jj)HP`&AnmOmLoTiI3w69?zZzJ*z%xn$b ziW>d^3Do-xE17I-d2&o18A16t1TW{3%4m&fpYpgjv{Hk%PTA*U@Snet)|)buT#WO? ztogJSBHk{6=GGZ)KHg~Tru}tITUn?qyGO^VJ*nQtj#(kp{`uAzds(q1Ji&0MJYy!N zs|mo5V{#Mdkl(=>g1b4RK|p0|ZSPc+C}+rt(WEUm7}B|NFT}Ix`U*Q#0zR$JmeB1%`I6T)kBI?m^Be8D&KL1duoFcpw>*t4JR(@9Q{$v zQD~%CS&r@{SpCQh?71JcJN*`V!`$k&rYFuQ%C_ARVpi(3QP59mZN5<8;wO5e%^afy zVk~upl*)6xi47NdH=3VJ6g(hzX(WeEKtliC1ln3kD1Q7Ztgzu;3gOK)OIMS@wa1e9 zk-m*snw7CAy{j~Hyw*GbB{cQ&JO0TvDrt1X)YSf7S^i8m&ZryOmI<6AP1LOtKA#Rz zqC(~tWw3i||3afpVrv1Ad?^(#FWk08?@nnl>l<0U2O86yf}p1)jR;cfw1(1$25xw?%4VBeoOE zyop|3eO>`9GeFs|5ui$^Mxtb(34HinD%UD)bls|Zajp&Jx&~K%I0iGU;eBc4mv%8L z>Sn0Oqo7PF=F!gwt#kOR0kmr8Mp)`1q(B`J(yMX4OD5wi>d79>PpA~6*&i%evI|h} zcYi`%{`6hO2vJc&lq|?kgYXy-nz1cHiYVxRV&_eN0RJ`j=bH&5)byFm5ot^&&sfIfOf=d0OxZRLUHdzXo?JItjk zjE_>lT(lkPb5$qk+2?Z;LyT}bHmfTbb(rDCw<|a8(UsHSulGfx2;cyR-o5MtHo4K# znZk{}4^JYo;zB!z8U_z{ODj|_Cq`!~xfyt&o>rI?^9s7npUlgp@F@^|S%88yuL$kr z3;{Fxs~#bmR&}F`)sqcW1`Oysd;ZsUYkh_N-?*vL%oWk}I|GQHki%ER>NKjz=FtV_ zzK&H*UeQVo<}kvQ4cg~0kI|OV>qYqH(kzclHHZ;Co+pgoKnN{2yN3Gm&EVcB=>Fq# zB(`U5h#@P;8kT;GL%p1i%(BzPMJuJGo2SdGCqc)BZ01>$V^g77>l)}|^wnM@7PJyOxB`M(q6x?rZ#1hBl*G56#lYck!jl#XayDLGVZods z*=yeIqxEowh{%>=>2#vSz6vIF_=NiKRH@7uY^diAA&(~Y_YJ=idTtP}X*npwrnS!L zJetbI)~wksx!pZ!yc*?ZVurDXX#@(;%&BVYZ9%XcxRBY$)a7@cAnYmbubOP&CHVXh zxCo0>L8QC5$J_oynwQ`L6^2ETZXYZv_G}6*`#C02Q)P5)(0%o+4m%>-;`0J&$Ler} z{PBFY!xM-2tkV;wdcJLOb>Ga6fe`T>#xJO%Yi)Q=UPp6+ZJ5;C!40+M;&Wu^wLq)` zVGlg^@50Kl1dTUc#FXbWf@~Un#Z~i-C&@y|BH$uz5I1=nlyFeyn-#joQlH6-97;Fww!l-76H&s>&75bJY*J+knJt<&s#9q&* zbBI31r5HGTKdGm-8+wtNCGXi&5ZPTRy<9N|<-C_RXM0YMW(5s}JgK$O$`QAD-w3mK zM_g(=da9mBSSwolE}Y(7wG)>^zxtg&YAc(;m=1~3!p>7FpV3`pJFVl<1`N3`f*3=> zleA+_?GE*OyMw$By+k8tTd3P$+KGD|D+~#g_died=Y_L;yU@VlejOCy2%?d>AvN}J zDsJBxMjwo|t+XPFeT7SgX?Dmbm3J4X;aGHl!^kpkK3S7g$;EiU_V2!}t#3?yUmOxWj$V`Sc zubOMNWDc5-$v)=Cs?auboo*Yg8(P_sv+RX&FPEck(zC>G}WPCdjaqhfm_+Q!@H}=qEtt5|cPPi>`0r z)PT?H^--4q@fg#J8mF8Gl`n%F+udn8ym2m zE$@-z(z6-^iwCFzTQ{aB(`!RTE*2OT>|pHzoA?j;)2Rdb*2v;oON>Ts;wlrX)w~5{ z7ps0mUaptWYj)o$lN;1mBmQbgoh7@k>w4*qymgm|@yj`04!YOxSPiq1j;78zekC9{ zwK!ySMd4CWt-7Bg1F-IJ%FoRemNUd7d2E+ZcUZ%EDJgV|XwaUsn{CEv7R?m6Zkty% zZiNxW_6$|lt!h?2cLTrBG;_*U8zj`F&YdDI=8>V;-@y`j#r&L^0iWJN`$U%%h?Ti& z$6&5$QbGD4*dV`ng~Gs|HpJ#^bCpG4#)0u>g6Bhc$UPwF_3%Ok)2YRX7t;w?>K&NL zMy}=Syo6_))sOQ4l5?EmG9fDo^>(8C%8gz_WH+l1vJ5omjGY92zBf$5BYn zXaI$as_z#bxzDNPrQCex4NbtYpB!V}3$IOHxGzP9uEdbHVDBa=RGJWyeU93Dl$-yj zKL-AEe}6%xznCfihCc@Wf@%LHG?yRn>;HOGxMTzXJb!`6w})Q<(+O};KoDE zEuIfDgnP+CI-gbQmvf4noijvd z3ld0MOaKBLFGL4&h1c}*05(xW27B66(G*msjQ~JsW*AAxu(8oJva7Q7>X}G~@~kk< z^)?0$4Ua`_RNZx_L%IWuK0F=)M&C+$b)d3TiN?&pD210Z=$hJH%`s^$; zpXzQ9d9dWZ^b+aC8qJ!Ewo0X6fi;V|%!7n+_)-W^m(oHMN~Y1L*pSLS`7w}Q;e%#^ zmV-zyMuRIuex2ES_NN-FMSw>2Bj?XE99`4h218s;@1EXo7^$?>Z2ubKlukMTImtd4e9qe^|udoK$KjG^J?8gI?1pMEch-)hJaM z5TQ&=;56t&b$8rQlIPsh;HjK`wSacRsl`PpH=)@=DEAg$x!+yO&2snY6WyTqg^{lx z-^b>(O!SLfe88GEg|Z$6F7%ok<2r~-Kh4#9r}n%iLC=Wy#d;j~*Gf9PSYqxzqe^W79y=mX-zYws-f#R0SoyQY-mZ#vyY&AamwVL!-Hw9=qSe- z?HoEJBQPlztW~l?tIj~`WkI(@viZ+qM%FH0*uce3;=FR<$<2UoH3MteRo>`q($~U; zFRdew`}seau5a(6Hc@v?%?LqVVZ*PyIB{zq&q|@Z5PgiiICN9~kt5dqfa|NP*F-9{ zf&hO5j3m~=g8i%6NOakCzbv17k;2MeaMzD03)!}WzGO&Sj1Vd+()_)p7Agl^Z0T7J zTg5j0_<5xuhUSGw1S7pKIo#A1m^Kg&<>_ogDypP+`vzj{u_fX}=^&!Ab=ece9(LF8 z=WyTS&-6&LOp#Sc?hGmC!(IuSZ*HBn^j<*GmZp}V7*ImRuygcpPRzb^n71OkEp=77Itw;AHk3t;!R40P+a z?#Ot4de`4&Ac3E3mfvJNK+rAM=Z;JO$b0MNejCRN769I6&%eolyb#{Iw(xNS`R>}n z2j;nD_xwH=1cdP2I?dno-LiFnclCiGJfORC!Tj9Zcjw+7Mg-qo8(83W!FS#P;pXGJ z_1nL_7sA5>xvLMtbITk0V;n@_PZ>Y|-F<=Zg8tY)2=A?hclGgsZpoZ?Y~h3av40RS z-_Hp2yUk$GpEmPDeny?&$KA4??%K?MOO^an2K?uGogIy>Y|I=ngoH4-RINPCetwf& kDlc9*1AfI>3=t6wCud_v=U-neKMx;>2ZMn@Qt`=u0HP?)kpKVy literal 0 HcmV?d00001 diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 66f0172e..d1bbbd3e 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -78,6 +78,7 @@ _SUMMARY_000884_PDF = _FIXTURES / "Summary_000884.pdf" # cert 9421 (Normal cyli _SUMMARY_000910_PDF = _FIXTURES / "Summary_000910.pdf" # cert 0036 (Flat, party wall U=0) _SUMMARY_000890_PDF = _FIXTURES / "Summary_000890.pdf" # cert 7800 (two electric showers) _SUMMARY_000565_PDF = _FIXTURES / "Summary_000565.pdf" # cert 000565 (5-bp Elmhurst-only) +_SUMMARY_001431_CASE20_PDF = _FIXTURES / "Summary_001431_case20.pdf" # sim case 20 (storage heaters + RR type-2 + wrapped "Double between 2002 and 2021" glazing) # GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the # Summary_001479.pdf fixture. Together they drive the API ≡ Summary @@ -127,6 +128,20 @@ def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: return pages +def test_summary_001431_case20_extracts_all_five_section11_windows() -> None: + # Arrange — sim case 20's §11 lodges 5 windows, each with the glazing + # label "Double between 2002 and 2021". That phrase wraps to two PDF + # lines, so pdftotext interleaves its continuation ("and 2021") with + # the next row's cells — a layout the window parser must survive. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001431_CASE20_PDF) + + # Act + survey = ElmhurstSiteNotesExtractor(pages).extract() + + # Assert + assert len(survey.windows) == 5 + + def test_summary_000474_mapper_produces_three_building_parts() -> None: # Arrange — cert U985-0001-000474 is a mid-terrace with 3 building # parts (Main + 2 extensions) per the hand-built worksheet fixture