From 4cfec00f2242d833e7234adf53eecbb24e6db1ef Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 27 May 2026 23:05:52 +0000 Subject: [PATCH] =?UTF-8?q?Slice=20S0380.17:=20map=20Elmhurst=20=C2=A711?= =?UTF-8?q?=20glazing-type=20labels=20to=20SAP10=20codes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes a systematic +0.02..+0.07 SAP over-prediction on every triple- glazed cert in cohort 2 (13 of 38) and removes a silent-default failure mode flagged via cert 3336-2825-9400-0512-8292 (+0.0674 Δ). Root cause: `_map_elmhurst_window` (datatypes/epc/domain/mapper.py) was passing the Elmhurst-lodged glazing-type string verbatim into `SapWindow.glazing_type` (declared `Union[int, str]`). The §5 (66).. (67) daylight-factor cascade at `domain/sap10_calculator/worksheet/internal_gains.py:512` requires `isinstance(w.glazing_type, int)` to look up Table 6b col light g_L — string lodgings silently fell through to the `_G_LIGHT_DEFAULT = 0.80` (double-glazed) branch. Cert 3336 (Triple glazed, worksheet "Window, Triple glazed") got g_L = 0.80 instead of the correct 0.70, inflating C_daylight from 1.072 to 1.041 → lighting kWh under-predicted by −4.53 kWh/yr → total fuel cost under by −1.17 GBP → ECF Δ −0.0049 → SAP continuous over by +0.0674. Fix: `_ELMHURST_GLAZING_LABEL_TO_SAP10` dict + `_elmhurst_glazing_ type_code` helper translate the Elmhurst Summary §11 lodged strings to the SAP 10.2 Table U2 integer codes the cascade keys on: "Single" → 1 "Double pre 2002" → 2 "Double between 2002 and 2021" → 3 "Double with unknown install date" → 3 "Double with unknown 16 mm or install date more" → 3 "Double post or during 2022" → 5 "Triple post or during 2022" → 6 "Triple post or during" → 6 (year-trunc.) "Secondary" → 7 Two regex passes strip the layout noise the extractor sometimes folds into the glazing-type token: a `(?:Part )?value value Proofed Shutters` prefix (from adjacent column headers) and a ` Summary Information` / ` Alternative wall…` suffix. Verified against the union of cohort-1 (7 certs) + cohort-2 (38 certs) + test-fixture (9 PDFs) glazing labels: 18 distinct surface forms, all closed by the dict + noise patterns; one window in cert 2636's Summary_000898.pdf lodged the year-truncated "Triple post or during" — added as an alias for code 6 per worksheet "Triple glazed" lodging. Strict-enum gate: `_elmhurst_glazing_type_code` raises `UnmappedElmhurstLabel("glazing_type", label)` (Slice S0380.15 pattern, extended to the new helper) when the label is None or not in the dict — surfaces mapper-coverage gaps at extraction time rather than masking them as a SAP precision floor. Cohort-2 Summary-path delta progression (38 certs): bucket before slice 2 after slice 2 exact (<1e-4) 11 11 <0.005 0 5 ← 9421 +0.0012, 2536 +0.0016, 9370 +0.0017, 0100 +0.0028, 2800 +0.0044 0.005-0.07 15 10 ← all triple-glazed 0.07-0.5 5 5 0.5-1 4 4 1-5 1 1 5+ 2 2 RAISES 0 0 3336 (user's flag) closes from +0.0674 → +0.0400 — the residual is the remaining systematic offset the next slice will investigate. Tests added (3): - `test_summary_3336_triple_glazed_windows_route_to_code_6` — pins the mapper output for the user's flagged cert. - `test_summary_000474_double_glazed_windows_route_to_code_3` — exercises the DG branch + the year-unknown alias mapping. - `test_summary_mapper_raises_on_unmapped_glazing_type_label` — strict-enum coverage gate via mutated site notes. Tests updated (1): - `test_first_window_glazing_type` (test_elmhurst_end_to_end.py): asserts int code 5 (DG low-E argon — "Double post or during 2022") not the string verbatim. The string-passthrough behaviour was always a latent bug; this test was the only direct pin on it. Pyright net-zero per file: - datatypes/epc/domain/mapper.py: 32 (baseline 32) - backend/documents_parser/tests/test_summary_pdf_mapper_chain.py: 0 - backend/documents_parser/tests/test_elmhurst_end_to_end.py: 0 Regression baseline: 694 pass + 10 fail (= prior 691 + 10 + 3 new). Triple-glazed original-cohort certs are now closer to worksheet too; the ±0.07 chain tests on the original cohort still hold, and a future slice tightens them once the next-largest residual is closed. Spec refs: - SAP 10.2 Table U2 — glazing-type integer enum. - SAP 10.2 Table 6b col light — light-transmission g_L by glazing type (triple 0.70, double-glazed variants 0.80, single 0.90). - RdSAP 10 §11 Windows — Summary lodging of glazing type as a type+install-date phrase. Co-Authored-By: Claude Opus 4.7 --- .../tests/fixtures/Summary_000888.pdf | Bin 0 -> 79294 bytes .../tests/test_elmhurst_end_to_end.py | 7 +- .../tests/test_summary_pdf_mapper_chain.py | 66 ++++++++++++++++++ datatypes/epc/domain/mapper.py | 66 +++++++++++++++++- 4 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 backend/documents_parser/tests/fixtures/Summary_000888.pdf diff --git a/backend/documents_parser/tests/fixtures/Summary_000888.pdf b/backend/documents_parser/tests/fixtures/Summary_000888.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2a48320ba0137242ad689b6d05c67d34bc25079d GIT binary patch literal 79294 zcmeF)1ymeO-Z1(kSa63x@ZbdZ;O>NA6Kn|X?j9gOaEIU|0Rlk=cX!v|?(Q1&4$sQF z@9w>OzuoWLJ@>p{Pfn+MTB>T`-__OC{HCas#3dP7nK+PG$yms2^v(JCnN{6vjhMys z9Q7=1OqiAQOpF}J*q|Gg1qF?)4WYXrJ-+*!UH{mHSPW`Q&H>HR(#BrJR?onQS=`9k%)m%lN|agD%+W!~$X?9G%GSo(2wEf_W<@kkUJxZRaV8A$R-@*# zuM^|udE*TkV9j-9wTWGKuP1Eh5&84lFb+7le*Y&gk$Qg z>w(ed*Fk%N7}-abZ1fpJWu4ktzaVTm=JlYaMq48{uT?A1#Io(u`pgW1+iy0Xz|FQ5 z$n5b|2EE04)56YUIOpwzLM!>Oc%56fUE#98vZ9mr@h(M`0#;(xFl(6xeNS8 zB}?^QM;`6!8_SQ+hvy#nLp!Xk4tc;0Lm2hu1h)0|-Gr&o|KNxqMD|a+7=pB$k9*1S zAJ_i}SJ8{@QDdc)OzmtK8?x7y;D#O^H0~2NV7tQ9BT1>hbid*2>Qu=|56-*at~ov$ zZIv~&Frf)OXVtTHMr*pUG!In1*?QqUztM62ey`;;x=D2&#cUZ%F;RWNb(Fp4%$(al ziiqszsM!^-k1m8dQ!XP+;WO9ukH+n3{N?OK3k!lyY&V$tH=lkgAHvU?e1ID$gm90Z z^)vsbBb0@};%O}7Qh8pNYi*(*?Qk#ttoM6<;+Ea`gE^d<;}YQ%lX!lC3)e{~laKCb z2ef?>GP<}+=)B@fNS{QDAk=Mw2Ha zBpM{Wy-nXKShX>J)ev4vwl}0amry=|b50C~Qez zuf&`K2Mh35x#??9$CBr@W1~Bo9F+|Oq06K!(`OeXye>Up2{#^&l?N}OMpZ+bf|B~{ zBTw(yt@jUSulK~GYiX$Yx~PBh)!y&xf8G8uD+!)*F5+@ESw(;>TAkX2eBI$ zs69&neG$LqJNABXsO%~JS~~RrJM!)!U6;ZqqQW%fy&OM8IUpg0Ltfw%0xBJ+kOOm` zvHfgaw4V6!niLQ9m!)APKJyFfUi35@b8im*pZuRnO=dgGUE}Aprc+FAd>qMBcg8DHsY}~la=ju4IZlESd??b?S-qn9Kj+&le*}HhyC!iIq$AB z!|$_H29=GCTN#$F*7L6qja@2&G}9)hh|W67-8za0=RHRadLspIOFNx6(Y9|Y2OsF~ zz;3yxz2d5JxP4f~Wf5kr*jP6|a;RdX%6oNha!ge4ZVpb7{P3x537K;S7@V1#aV9W` z1E#0bRmp7#q=NMZPU!U)R7@+t`|`oOiadRUd*L(!y}DMGqXk3$WIoQDB3ljHg*sVX z`3=W?jKhpC!3D0V+k&CNAYr{eF%|DK_BF{3;=6foSdQpk(uzKUsXUhKhO#av`f% znD#QvO4I0YVuFpTS!*&iPtp+~_lM50V1`_=?F!c2=d}wFid;PiV=a|J>Z)bGkH@az zkvN=nK*yVEqkN``F`Cl40Re0*b+LUYCbNW(Y$B_iUIdDcPKZ5a6>u9o%p(FgM*4-MZSe0BI#aAeL}+KV?>^zPNPyd*a-E)x|b#$&hWcx9q1fs-^cu^RZ;cksoTQe{V%A- zI5W-}H*hkBl2C8wx)Og?^j@b6W^fq?fPB$H8wm_s^N|%U?~qB8wT2f1X6NTH$FkTiV$%&@kaXyO?xz+|D~Td$2iGL%T^m^U%7aUU>2z{B`jIt1oWc z2h&$+borwW*N5#TCM0`S%gR=qY+{V9o4swZC5Qq|_94vs<;7TfyRnFnqhlYWz7G@CE| z9+Ot!&EO&1v*VC?Z`7o>d}Su(2ZAq)^Ip_RMl)I@H|RsO&;90;8NmS;2gFBm;0Tc5 zZ{%dH>HY$N_!B06-v;$mMsUfGO2hn+c`3nnN+%i;=i8&QxBd!OV@W6U2~h>%tFBa; z+83B~{dqZin!b^qu`M(D)L@Zaj30ZJM+8+ybj(jnEOaki6dyJ_o6aVkBz_&s1{Nv4 z^3re!9q5-0j8W~9{klUmj;5RBiW}KQcd83nykMn(l`oa{RB+~b&$Nm4zU_4SS6Ivd z^uX;w=6un5miN*3U33~;$*$RZ$k5-=4Ut6Xe!B*yCUx2m{c&lzim2iqrxB2of-F}T z1Ki&4d?B3pZQ|DSrR?_f%};Rq4oGsFY7ZO+_JQ>VI%R!*BU2Y|t5~oQZ(mN-<{7%3 zuM$>)NV<=o9YxkkHehsc*w!mIADjfCxCAL&eivJ)w7ltlu?Dk|b9|(%=JA7$rZ0cp z>HJ%6dW5$Ffv&93=xdta&o9wj8ZU)H`~1BvD#v?w@>k>0E!iCw!vJvW{n3GY%|>XLn9jc6_k2&4HUrQ2F7%$w721|T*gW5PGyaYlrUaMqfXfWg|!v!TqxI_daZ&k2%wy)6;K zoq?L<*?`IOS$oaVSZ8rK{kpbcjF$^MdR+}| z^IA!_vVU%GkTbQ5m18~>d@0P?nFzk7bwWJseJCsfJ6^3u;}`5|^2d2>L+_^6Mj5ij z&kVb^czy;g;CcBiU@#X+RNX@Ommme-oj7DKlP*NSjrIls>$m~2()Qrs2%Xn+$3%9S zPPM1+sZ3uVhtx-khQDJY<#jPj$NTmqhE)yF(0TVXX{N zJ5yBw88`{_BTTdfSnU zaNf_`rZbblAn#u1s5Om%!?0OGo{leJs<{YyP`?nPElmfF%zpK^PV1&4nzgNkFe2k& zE-U_k4d^sBakw6Scz2ZMDr(K(csUvM$yG2jVE;<)d?1Qf^QnT2K1NAB1+E{ek*J{B z&zcV_?WYzo-#@IxZ%eWdkd<1Lp1F3+q-4om)vY15GsL{SHl+w-X6k7Ra;2gshFc?h zSCA4ts&8M6BAg%hQgHMv??qe#3I zaMA7alj%Z1>Qu{5q99d!_!Zo9#9Ex0#)bSZ2qVi%gLLeyUF9o3k3#n7rligVykvWD z!J^Cdq%rJbUV;?s)4ygZ)4ZJ;g}tQ{KJR@@BpNPs5XAlrO1J^PhIHRCr!>v3;#y=R zyN(;Y$Ip(EWP^B45SUhbQRJn81t3W~dAAsDyME=-PBFr}-`e&1y1UznVkI(Hn3C*O|3(Ito+$%zgO#EFWJTy%CD^qEB-wOk!aWu zX^JFKn%EKd+Y_~QUIownR~|DrN1i-}5hS9E$XK&EF(R8SsV6_@w2JhIC!>x)sjF}+ zYRa*^FVpMw3q6R%E#-z-e4#ghpeOAdJLuSun8r1qJvXkK`S`;#p_WX+3*fx%%-Wg9?bF$-jSTvk3(mTA~dw((-CuG#JzQkF+zKq0D4oqSahnPlMu3koN}a z@rvqw>pP8OpZRaAO`t;C-=E-7nA8!3xqD~g&>`*a)upiH69VtS(chOmZ8Z9Q@JWaz zm_o@fzDg*){YH($iD(|UrFAXg2^k+yIA)=Ov^v)#A*{tuMUcNsJ#wZuV||)5k|BjQ z(c7L+VB@_~Sh~^~yedH3^GoN0pY76XN#OWFKDUv6V?6Y6`=&KS&JYi!7)oZ-ELUJsVzr$~SK+WkoMXa2Ss z*KnS5CKzNBo~Ok8TiiQ%j^@6;l|TCwshM_I4$W;@+sLCO10=%?4qFL~u*BBnG}483 zE$9pN(Fn9W7yI7NXa9K1!r?F06Ld`BXDyaBL?HTEezg(P!{adDa$0+FChc+R|4AIvMf^jIZq!pue?n5<1+)Yq5fiITXiMRI4o`uv|L<=o*L*1Es9{;jnrgX{(QCHnvsWULzAU=JH@_ZC8l;iT&UkvTO|AF1%!Q=7{KKk=B zI3yferA%Spe_Uww4^GO~3e9Kd(?Lc;C*--C)(}ufW?1FZ!9+qwQ%@9!fe|aQGMAaB zsiTo|u6=_;f-#p~FThDje;8cb^1zYqGmK{k%`cJ{%ovA*3Zx&2YuwTzXw8~cv?w0mmu zii$|Ee#^^Wv+dq-4RI%5+C_g%pOxy{V-4Tu86>Hx9>dKlreS^ah8_ngC@5&NQyf(Q z4qr=%p2X4kLSyq{dWF4cHRapW$qw9~$bPNV#nsi%X?R}4YcEe7sjBw%tdTfXiyCL3 zDq9ONmYi~|O`MGmD}oYQ(_7BY%BK*E^h{k%okZJBPoG=dSxjdlv@lTGzRs2tDAd}V z@a$5YOl)0fo|^j74xV=i2-xJY`K5J!0c7@c}QRg9-@CO(E5ePKYLlWK~PDSn78o3b>9a zOc1POpv^#Ju-}eg*jgiqYpX}wru2D!{6t~mXf|u*aLt&Du->GelardHcA-OnHa^XT z!MZ3jfwD|){7iQIhyhO-NB!8Ph@Oi`?WQa|d)2`a0^y#<+bl;5e8erJJUINMxx7H3 z1kk3-wL0w{MgPD+&&&L}cVDM)uO}N<7q5J9MM>R2u9N2HBg(bE`>ejPuAgauo;R%i-l5JG8l#(;WJ{>Ht9k-z zEH1X)9Q5K&9R7BCdMYO;cUhO&5V5|#p0Kf@$yL`_6fv~9SnGsbvV5Iu>u9Hq^WNh# z_y*_d)m!?h62GTXlgYYc^cgISH+e9-dDH7wX>BOV0q{YtwH{#+VGD<))!IcB*=nNN zw|7jY5Cdmp_%QP|+o&CL3oR|p%!&`yl9G~HI(p>mAw*9+0~L7r_X=t+qW ze&9QbscenvL!`@g=Y?mkaLO7Q@4sza*1hJZLghSXNMu?(^lOxY#*MJ3O<6cxz!Gc< z;mcsy2s=VT_YQmH%Fy9c+W@atw9`_lRxQDg?ulm&n`fFWepD{zS|I+NX7|pTYdA`& ziL#l1o2fSLs(rlX{N|4igAt0a+Fo7O6(nrnVT-MPdC7g?BC`1ENOK@ep^S~as%)72 zYjIAkW@~;<4)qaIVCdumS|KMj-svX>6bl|>weySs7{yQS2z4QFj;_Lq$%&bb9}1p> zM=Zc>3^z5ec573K^=iK59$Jxku@sSxbiy}sD0K$oML3N@~xrs*$rb9ppXe=Um#puO{ zyuW>raN$3SD2SO?myay#ZM~w{6lJ4!pR3zuVbof|n$}C+w7ajX75C7Zcg<_296T0U z-rMz|0^XQs))pBD5@#Ug8t?+6B~uhkj;Y@m-WR}*qY%o(?VYcC9jSE4*|s@WhiXOt zFj=%C6N~^oVZak~Gx+lt`!0DeVQJ{2QsZz4NHFcmudc@}0^kC$u}cr)!$8Fk-9qdR zE-M^!?#}8H>!FFZ6;oeQ;&J2Il<`1=Lw%8rSh$#Y-m5d`+Qv+Taq=1`^*cK|y&nv< z!$TTz+*miR!?meZ?h6hM4#Lh*Jn3tuFDftk!{nDUmF%vk>G=UXlQ8UMEXMQZF0iB$B^pz z1U-D~&CPXQ#yfUaj+5gHc!xbp%c0aaRwi=!ko?3!nN9{55VwL8>Xz7TIJbVTO=ez> zh>N4Wv+b|((UGx<@UYKcGMe}D@qv?Gg@)iGA#I=ab~+!)Yqd> zY@3>Y#YwI}Rs5vU9Ivy@bQ|GxYBn}DD)h#YM<=gCIRE$A_sG7!UMeZ!`?rF(x%CB> zg>wmbCI~d)W#*(LZ(eV&3mdYqoYVZ47 z;(emwxFe1;r?W8Bu*SRVEJ)(e2Dp>Dq_%cuX7riKk5mwbChGaQ6{-%ucfQKNDTxy# z)q>}P+Ho$HGA6ZLzOAly;x!&UJa&kSp}t{%e~-v}S)$amwEUk1S<_QHrsk8XlhSOl z$MpOhVFzpce0*A}@2~bJX}&S(ZtrhPw1>Y^2v>OOeP63N(<9T?Qx6>{oM7qdj zJ==eV(mgRY4}<(f*2BYn8K;L65uBZqwPa)S)}~_2B&m#x4r(?mA}U7NZ)IKsd>Jq zf-XJv4NTf>4fhe-dVNX@J+*oBqEg>8*K*t|2OZ0MKt8Bc_yx1DbNTA*?5VQc<$V9% z*4aDLGDzYb&Li5%fC1CBJUA@9jBHp0gZA-T=F_0Ir#?&KhHUZ*ituphT{R!l*2r9q z^vu!Fv}-)4yPJ#UiUL(sRPYB8mh($T*?L+SPkZ~@aio}F;lBEIKmRzqgkb>FM#vl3 zTyuGGAu1{zp<4D+ul>a~3RSjGQ}fHGdYQYb%W5jGel)oyzaus6ypDD_Kkb#NFou|C zT$;IUa3>7?rU5Ui7B(YXk*TZUY6SL&p@f(1@bdD;)G2i{JSUk#WpF@lrR{-Hc$q#p z7#N8Eg9U~?2vIOAJ4ZI+BgpiIMZa=_i)~QRkKdSS;k#qTZGzjn=^@4ojIHerLDoj= z=P;eaLtW_M$jkLH7FfB)C6!4aIel=RDJ1Gj!OvVVTI^iqV*IV8X@Tb~DYtR`giF<;^F+?>{^8$1i z+Ij#NTynB0U>$Ycb!SY2_J-6;ee+TzX_;P~j*1tqlQcTBE}sufs?pjf@vKE$_Ks}9 zMvi^^%;N~D^#vsIwHRO6kKjBMh^+#ZD&?SPdalArNOU^W5W_34wiIstZbcbI`O&cn z*5R=rF)2!l!n~}Kn>#XP(3mtG&3NFsT-VNz9@lmChXRi_c2W=TlZ5qS|Fc(3?7X&I z(r-KHJGwf2S)$&%sz29T!!E!8PgiW2!rqMOBS>{J@Tio#e12bAYCriYic6HT6y-Uj zKg`H2!1bup$tc9mqFz=(W_)rab{W?+HX{+T_WG)2DHFLGS%K5(eWl;6r;JY;?CcN& z3(Di$`@ zF$)6&J=dT+x?z1t*yr++@}1rNSA3ZDq92u%Rpk?W!$7?^vsIn>&(9Q(ViM2 zwg0_ml>!l_&$>(#e5hE?Kqi#Y&EUX*HM1~hEpt$93KeM1zkP<2^$GO((C^uhR7Q66WU|;kcab8YLulKfTqX zOWoyPZ9EAZ7rEYqg+N2VO??yn>vW=O-diu7w34syTuony*M(FSBe<$5JqaYEBn^y!6X=Xa|6a(A6bSn(s z7vw}J(0HI~V~FhhAyRmm7I8apA?K$m?moTrmTNLqEe~B9YiTo*M@Qx%9t95%Zx9Z4 zm^jUa5klX~ur!Cgg59lHV=MUC*kE^U>1y7M3I|Cib(^H<0DINtd-p_w`U_TRbv8X8 zp(k&ssHis#O};C%VES{>7h4qipP3gnTB>%tw?tk}C2dt@h2utho1EYeU>M-$e=>iu zvptrU8ZT`P=Cip4gO&*)w=hJe&R$--wB;VI0Urih`-%3~FZO3oeq$b!!$#B*SXk-h zy@5a*qpkT)PENkxycZf{zP>elwJ~N-NEoMja|74({S_52fgLP64GkXyvX3w=Memb3 zO)brC*YR0LzN-5WxLp&U49Yh5ma=WLo0G)FiiZ{A8CEl8@&@DM3+{Wud>3`K0^IhG z34zqwFq*YtVyR!ge1sKnTFE+NVe%9Iyigvex}91V1aj?YTo7a-#Mm5_T%Fi5HFRy? zj$K?aTP^A9@;9vaqYo4hqKr*?#p-s$wHH$vXuNYR6`h`(^P_NgeK~ki%8_UBkdvAv zW!uc`x0{=IuMt~IQT^Op$cg=MN|?@mxK4OD>H%j{mC>E7KIGOl(#KY1=9P zQt*eLc$aorD07TqkNKk3Eqh-b{j@m60=i z7@2$&VRvUom`hs927TyL_0?L|TJbOojEqYDM@(u=>X+N|lGW@E zBxk;$!iZXXxbSDKhnoixH<&2sa^(pm%MHg$D$BzBv}gm?Y-PcF}2<+#lW zkzx5}i^eF)wK91o#3zVKJ7{S|PpIT0?g*4#WYO&|V&J|h0Uuer#i*Yk8RG86qvS;` z6nj4yR6%k!a=6sEzuM9=at?Hj_4UrRN-s9I4?o94QO`}#VSwyl=B+3srF@BBe0kJB z;4n7Urb0$We9pv3mpwiDUKyecual<3=eC!PgL4g-b28sKPQ%tF8DvQ1zZ;7qJ=PHsZ=nuqT zpNK>>Sf9PP;$A*H;1(daDTun5^|YN@4=pj9Mzs0)!A?(vhjwrKz*nlS&{SOP+0;c% z4evNi&wkfV?~M8jmah#04Js=lxrY%++uOeP-rCrVrgnO(0~eSCEzCA;nsVis(y!gX zGE;DB=>F=;+Wkd&zF9PHsNN9XOsBGKF%a(Bkse@Az%||Fj~`zW;k;mI1$(VHd%7zg zUoi(DU_t-T&nz#`us(*e7&$MuIBF}-bn~QEOHy=7it(q<-u%h0M0`P>4;SI01_5U# z`7V*JX{owdT<~t6i_6PDv0yOSh?$dP>KW*XJk9@UpXnAcvSu-Ri!QkhX)rrJzfqLJ zjWK5=;qbXx;~n6mFxQF-LPkYJ&0AT3SIec+1F3ndfQ|g5W|k@DPNB8*>uU z6V7;Iw|u9xf9(rnOoMZ>Bk<$#v8E)ii^HlmR)2s0nWKx#(VM_et>bEH9(#$239v9Q zX`bFbx{H2DBURFK)1PmpSM8CX^nCn4tXJpRFUZL`J~lr#KF!I`?uyVNP&%8cG`2xvr6(bYfS`TRp{y(zF<9{;f=O`@OakWVn3$pVkN%oP=ByA7 zsfsN7jx88Xw6}Lm*Kix#Ji4;d(cL{UQHLzV(Az68FL37M7)<323;Rq&+V=TC=S29y z&i2N#>EY6fJWXH-H8nNGD+TCr~lMTJ^!B17yHDzU)Vxjdk z(g{-EmkNCIMRth$8rfYppEDBW1cM}GH!#V%l8vs1u6E|lBm}&>v-hb-boh<)uDYFG z^w)H6SJN-@8%xM7?IK?|kyRXuzZDf(J38XfD2BaNRa4*I-U2t~emH_lL`C5-(Kn4t zix;4Mk*fPe_ojru<%70mK|(D&Nr21CzqSLPA1(ee1k7 z#tY1?PzygFJr~SC$5T*HL`6fx!^ce_Olj9{B7cGL(WZT=T8Z}8*hC&=@xzRd($a5- z`==>hj&&HRfq_}=?E_y5wAB?oRFm>_`kF|P-(2NnTdF>ngR^tW{@@_jVBOiiwYx>9 z-42UyV5}c9N$GWDuFCO>k(jwp7}WHBgomx$EHj^`$-N`~z^}YMD>Jk)uLScK#e4B5 zasIw5E{>V6J(7v}Gac`_m_RBDI^O$X()`SmeQjeyUECZzs-?;#{0LS!i%l;w^J{|5 zJhbX!_&wlk*U83n>h<8t=(3N#@f~jNutNIe3Yw;UZj(D^tzb$8B@lNkc83$PgW`>b zM@RLuY^G*UFEu#qO6(?!z!GCcQJO(a{->MIqNT38!*{utDB}BNMX?=<%VKYl!O-rY zkE7K%Ta2*;_XpRUqf@Y=$Oq%&Ns0yM81pF?WYs3SYnY_?=v*;gb(<{Es~$N>HA~&j zS^`YMp1E#xSFpKI`i6o)m)cWqd=P;HB|5+DHe!@UQWErbk&bfrEn-$q zR(fJ7dYf3Aac2(d+V@(D;~}akO$`Ua15Mr+xu1(|9kAJdZ(l-hVR?!?#ZuO?457C% zh>$0w%!gOiJON3|QDzgB9THg!^nb9Y|QOL0+MP@yvRb4LZ98h@EzaPDX7#T1rH*956I!EIyK>Rsk(w2(%l!)I?tl`(^1(d~%A&%<>dwS3Y~M9{`Q z9Vwhl=`E5q#p@FEN+p9v-i?ss`;qr_%lckO+6G}f;SE+%B5Hqc8Ro~xgjpquO%Hj} zs`V*IG!L$@nk*uYF2uHhzhQ-Rc;4<9maKdUn>!0nv^C6+=kP*mXzps&d#ny?x3~}g zt@#~S6lQInDDnyNo|Rd;^?|h`z6(Am0Fl0$oP@kGfCfb5D?jOWG!S`Vp{0X_=E!+ZU)uJ@f=V zR12`S^!NBKYwjwOnSk`bc6QcAwqxD`8>>DQ8oM2Dwmi?E?V5#BK7O{Eng+-_JXO|( zC1}k`iD(@vl%4j@Q8n(&QZFEC8P^h&Q>_wO5a$svpCE7UXN|yW{u6g^D}cc!9K`b+ z$*Evblyc`}9oaXeio)#!^y;HJpif0jNvn2Z(SSPgn)oz1!*lCmdeKyu#F_UZYs2Q* zs6V$BQU9x7d&WaE&mb79ujXb|>0lvKmX)_4J#zAworD4c0*Yu6C+b_bHK(Jo%PmIY zd8jX)tOQ|SyOK`TKEIfma+SMi%Rr9JH7aU~9 zGu3l@1qEyE;#+PtPt)j)N!l`Q`jb+c9zuTy({Shp(~;{i{eWd3V^_yZ=uJ%RGeE94 zH@D>A;vghD6pRVN0(08)^zC{Gssxb!#n=oi{qQ&D|}sG4njZ`b$Sb=;z4LvdUs*yW%g+c&=1fHp27` zOs-Yn^z6oyMWm|&n>Z&>q`rwRyFjwPWsi6x> zOiV#`&ZeX>VHxdX*Mg`M5~$vOXxgu^=ObZI{rO~wK)JnT5L}8POIX7z&*(Hz zJJS?7C!rG@vq1WmTvl%4To-eR!`3$MXt;QqV7h253edmCQBvarBz z0NMe*K$3oBA)d?QyqgfpuEUb~1tcaW)(Ic@a-l;kv^H;vTlBI6$6Eh8IhAl*&|JX$ zb0q#q6EvT$qP*ygN?8Aztv^rA*C^=m*g(ToYR#`u1WB2!XL(hZe2YJ3c-RY~RaVC&CMI6O)xbx}|JmEq zVrT7)t%b^ODSc?4Y2?{BRx#z&-P`ZCM{|R>z)pVOLWFVK$l@V|p{d8pN|pbK)c7?c zUfDDk|NSc5)wk^QkQx{-1(MO1x3Zd!G35L6LLT+v|r0I`sKWlvibMO(HLbs z3oFu>?O%NFH!&`H-wJ07i%?!T8W|b2+e`~*h{@KB>X3Cnn@4LtY0$gNzR`XnV{
-gxJAIYMZma4z_>-gxJAIYMZma4|D(q(;`#Twr~hSJ#Q9IUrvY08*do9d z0k#ORMSv{=Y!P6K09ypuBES{_wg|9AfGq-S5nzh|TLjo5z!m|v2(U$fEdp#2V2c1- z1lS_L76G>C|9D%(`|mxxW#?cPv$3?XSFzPIFk%)rayBzCQkD{B7BzEpP%^R?v$3+Z zu{N@HB;#RL)Uz^Tmf`v*4{rfm1lS_L76G;hutk6^`Ucn{z!m|v2(U$fEdp#2V2c1- zgm~EdP*?S|HtDZR@Q&->FvL)i+KLY(_26n0lEm#MSv~>bP=G709^zI zbP=G709^#=B0v`bx(LukfG+Yb{q=1}L~=y8kEG(e(CRTyh##Pf09^#=B0v`bx(Luk z|Fd-w+rQU8{V(ex-ha|R4d^037Xi8m&_#eQ0(23eivV2&=psND0lEm#MSv~>bP=G7 z09^#=B0v`bx(LukfGz@b5ul3zT?FVNKoWU}G$u$*8FESPVjX-V9t#$L0XJV|@jr5TqS ze2a%;nzQo(%7@(hZ{Zr;i8H_x`R_dPElDlS;^rS}%eJh_U_4$GZM0IXZoaZkKMP?c zHH6_I9T_NR^nM7MZUl#lc6O~WCD@+EX@%X8Z6@*xWuYW-?^oGr-P$R^T~RZ2qB_qS zq1H|1ru=C5wdgDNwBX_mm79u`p>?aRBh6%B{w5<|pv0kx?UrK^Jk-EF)+Z2YV zocJwwLsv{)V}VY6zJ;pYgy)VQAgMcT4LM``AG8KCFk{KbHKXJ})47SLHHLRNGj2nD zn0{X!v6=*2UPpZ_e4~B?o2OYrZh?RGB@utfY&S|mahLB`6#!9vE$!osB^DEOC?hwC46U{-O`cXYEgVpg?xGE#k9_#+cZ7G_aB2cySK zng1q332OrzLo;g=W{{b+h_!>+U*Aib**iFjnd;d;mPTIh&&#ZAY|uRo9Zemy*tmJf z9w&BAPBKnTu0JLo9$qpoE^abbURE-8cJ@C_&~={pFbdxA|Z9ho}k+zl;2T@84rI|F<*b zFJ}Zam&Yi7+{Vq$O~%gs=Q9VI>tEJ!az|#n`KzZ9)E7o zt&guiSI~c?3vK2+CS+#mXaueGE%h9Y#Eqb}t`W1mk+q4VDKu4H?!ReTr)kXE&vIdS zO@fwKlVnVvz`AbBbyc+=@I+WJ#BOtVeWa4s7sW&q?ijf|%k!BIpp8~~9c&RPU=Z3m zT{iX7x#iIR6V}5k({>qondCm&zLT)94XMPq)TEXS|3iPvvd^D6f?erj_X7$vrV2{O zM2KQJWKs@|>LqQjWYMX2)v68=!uZilcVrCxXVwYmE=Px$*(W33~- zcZqjdB~wrDK71FH6`4kp)rr~dxCLKosej(1OV1H~p8iAGKFpV#r&2)H`dnN~Cp@YX z4fdl-+3U8KZ$5gvqS=^K3AYlK$IE#8^IIhJBd|@cZEtAsR`XdBCsdxi#m6hp@0mYF zEs0_o$1%xJf|c*du)@WwP&i8Ef zi@sej?i!N}2Nv6_iah##7EIY#8bO{W&jjRWh+UNUuK2LcgK(}VgD|s%()9?>mg?YC z17p9v&d|Gfa&JRD-AMHHc#LS8e>U+|SP^mDnvPXd$%e?RyUd_tVaVt{sKUWE#;If1 zc@O3`s8aBk(x8{0F-}{ObHMjPzNyl>Z>X^42s2uSPhnj}b=9xc=bpZ3qdIZ^9Z&G~ zeD23^;e8`*0mnEGL=o=Tl=C)D-Z|LpaQy z!}qohu*4O}LEds#P4fJF;wYcj&6ae!ki0jYi}Eu%U&=UaPWuVJfFfd3wdin<_9v3| zH_V@OUTO_>o9t)mf4`$GD@xV=65wCn&gD(cLGxpRN2+Kk)mJl|T=6%g9bW3?6IOni zVf%6QUBWJ`W<_~{F{(hhQL;;^pRaeQA`}GkJG-kHFvLzjqr^Ws`+ahEw8|M6?9#FP z>oxtJ_p=v)RF;igCNoMPXBcq?2TdX+7c&U+%GMWXsYpYp1dL?U?Pn| zf_+w`kR6EbU3p8HGlp+6*`> zo*19-7MOu}6;0gF!kgdMvwgYOsCFMG{(>l}G&=rF4U^}yZ3x0`HH><95m73^zI5Bf zmYnK-eAj18ophlu&j?|j$3>SqBTB@62}o4QJX_Xb+m)T&j-lOb_Gp}Ko+g{Z9EE>v z$rk7T#4Giw&3fN=s&K*=yyblpie}1Y0?sVCQ66DbtMz@=5}bh{UmMM|$x$v_UfGw@ z_Lguzh|kaJ!`={(ig3KK^1+EhH_Ra-7|3k!{keifnk=X!w;rkS`QrzKkCaFk$JN)Z zs|OWD-QE3@V?@#)g{8J%Oy#DFc+NvA=K68!m^wtyF0(xN-E(+m6KeR%Pa>{d-`vs{4fbqgy?WeY zIn|m`$k+B~&QFfW7s>AhLvyse6*D_tHW8a!j6aWO(zYxnLYWE!2bo)%)}e(?t5#hX zaj7$-%V#fdym^19kZq!OZ2UW*mf74%*WBKspQvY|#lz(3kfd8*PP`2>SbRpN1%-^? zWZ&L4=@3r6Vn^8RkOQrmRP|j2f&U15b2?^%+;zgb%wfra4ExOWFsLiTHl|JtG*508 zpR{IhsodXeHJJXWnd?CYj+Bo{im~kUKCbtipX@=IyS3trP)p#548Y@{_a3 z`LHW}y*C0gA_^8>i9`zJP1$&%x)~agFk_4aOSIu9v1H0H2w0CutI*wdI0BJ}?)eW2 zeN{@5tIN5z%xP&AHb-wp{`7iHoy4(o>`3qPw}%Sk90GRo(-RS%_vbKW72uXYCP&t^ z@w=*63M|piCd)kC$uNl;Ay+XSuj7xH)921wM1$KL;T;LB8EP6aFRjRr$Xwke)|gfJ z@lbNJr<89ce~(Qt45PpO&T))LVCtQnZ#M7K@k%#`)ez5LMtq~k{mAK{0lwQ2_Va8phT%W{_x|EUflaCO~tVqw(nx=MZrs{Px*+s4M#umMAba3QHwGYI^ zFD9DA;*;7Z&zk9mFB|&lwv97)$>jd++QcRq}L$@1yO`fzOZ2cB-dQl8z6B zSJ;WL%@b5jB(BZF)#N^&SJwg^ewe2tWOyVcPGNrP(v z(X^qc5^pGtChxxA8;R_+N>~QV{ag*=;Xp&Xuy;<3@(x3Jbsnj&iqZ2Kl@ryEg)Fus z1>G-o?YoeqkYtN7=DP_cy^3S}-tpG)pDWIkR3)87_eJZ3J#E zq`Rd>K~lOyQY1uR=p4F~5g59p8x$o<` z)(q3Im?V9fQ&xT$UVY#jdKckWdx$)ZOs)(3J48$PI#y8t;8uH*F*9(I;jK9BO4b z=7VXto0zzGFxGm$I`Ad1#u%v6_DnBw5H}@Dc?{oO)-|!#j21;t8OeEJGLiy6l;cJRd<8kryJ6qqLrSe%F z(|wXgc~4W?%e%tmsiL80%lw3<$Iay48#f2_vK)1*46T@Z^X)3-QiUO4#*bwxA1W5A zzV9P#=tc{Zg^r;F(zy)g*Rj4>`Dz~Pr&n+2vXXw3hLggFA`erGwZ7e zzY?nndZu|F`hGK;OJ)7|%b@?udPn^eN^H3!eRGWf(z=)r0x1_nfpvk58!<~kJ(wa z0BbEd(hOJtxBJ!_z&#|XDTvpTx!bgKY_I>$S&k@Bf zjI()a2(cMe1bg)#@eH4!=@Jw(T-mb9gtiQ@?{vv6h>8M_ZNRM7digZHxB+ANKQSRK468O*6g87tYa71Q+e zkysgeB;qt0{B~jd3u0l_o{!ArOs?A~OM(<4sRPaMBF zN1U*i=2*k{HB?Ve4ej8RZM-3s>;j%4TyqUJ_~cz}(ORA@CV??{$s>B%uX!-C_nv3l ztN3GMrt&V{f4V^Yf-Qos;H8X{7%U-X2Yiqsdb6&%m?hlh{9gLJU2e?=rb)ulcS{hw z2Bi<_$Z=Zq53Fkp;(}0a4|TG${NOV`jL2#$@Hv=FKB@+BGJPtxy?$d(3tBtR+9GQJ`c#Bb zC7RSu*+km62a_WrR3V5`2S}xdEk)n{)0)|?)YoP2NO|`Jmrsy*k7A!fK_;bXHZS^= zF{LqVFbio=k|vGRd+^kX1&;Zr+||$-cFf99?Gy$v%|~IVnO8liOD#iq6`sLj4_S&l z#U~<^KMYn&enO&;$%&)Ps!5Kxu0IW_{o%MW_>_Mvc?qW(;*PRUs?D% z7UTOU#|#CdoC;+ZO0`1WW){p|9e^C7eN7d7SWG2_=GTs5!q|>7Yu4VS@uaC^VIfec ziJFg7;H(wJEXaE|gW;@I(115Izp|2Xs;N~Y9puJ3?{Q}+$^oGnM4?dC@!$&r2^4)m zbz-9#)RxxYzt2J=m9E>y0avJ1{yca*f^=ESBR`nYs52CdfBI^Dn)C*MBMjuidGB#u8Z=kzYLb=Stin#DV!oEM*guE2(uvg({5^p?0+$}Odw?!t zl_N|<;(Dh?SMuEI=nSwBgyFhEq`PvlPt?92IbRjM+2sK^3f&)(uxpZAP55eFbNLzB zE8TMb1M^yor2vd=uL@X_Z>Ba;C$To7KSxyEQKl-1dEvS7=pD*1UPwV9 z1|K3QPsf3l0(c}~yrDpAm}1wdeoY(u)(t(1wsh-kGM$mBh!*#}0_2oD{QMk;??hZ+ z?ifSM0Nr8;P5*B2!3om_-oVWLP(Pb-_0Ci!rVUEja2NdURjcG>Srj3l9+Atd(SL+cLHfbKqCQh@NExNn= z)(riyCPz(GG>`lzBDp1srWBeDCWg(!c6C&g*>W*O;lh}h8a7Q0Q=n?_rDI0mZr6$L z;%ap|KHFRFRv^039P7faAB7EDJgL!!C|?X4RtK5&VXI>yJ@cZ6p@?b5nNvI8(?UIv z_R$jgQLq4CxoodaM*1q03T&t2*4;d(D|z@tGh!e54XOB;1%H%hOsVZ#$N?(j^!N5} za8$8bm>WTK(hO9hN;KBN6)9B5m1`kWIcS|W6fohv&WDO4TEzD*4huGqq>!JZ=c{sC zZQ=bpO$S~pQQ%B({9CFe-L!QAP+7{YEKwj zkOu?f^BGq0e}K1Psx!pA{zyP}S|Z`boqOy`?6P~17EjZJpot9OgjDB+$c5Q;6C>%r(8YPNwpWpBZ}~Z5UhH zr(U&!B%VqQQ=NnKIVyt28#vs9oG(!b%N=QnhG)>ZV?U5lf>2>?7hMJEZz2Oop@x$f z_QzMK{B^=BD=YaHcq17KsU>w{1< zAUxPtiK_~9ES7dwJ#FltJ6f2g-^c~DJM~37yVf_u?HSKo7jwfyVU1+YJTbt*j!bDY zsuyuutro?>-fmn?oQYzbBzd+hQpx;QjNEg282*MNzb8mf-ZKK|WRT)vZDFNbPqlIkSFq z^%&OoM65cJkFocChcn4b#H@Vc^}W>AEud$9tp2in>uK(xzF(pH8NjIS7;3wm^bLh- zye>VjSw+htt>Fh|=tQw(Cq&$nUMIPE*<73a$xU4ehK5ragww{@i+;IEJbcGLn%2t> z@g~xu@&%t+P8Zf=^pYjk`oZZ|m~JWyzH$00-nPPf#KW2$=~Z5e3>P==akE^O>#>5^ zrCB7s8@zReNF$1TV^%CI)pch6r_sKP9WF8S3K$YhevH$E2|NpkC*&ijBtIWnrt-UC~>72oU zUxI)ZP~}E|{DaT`y>crNW!Z(I0XdgFw!Il&BNRYn&csCx5JH)V5%@qm4D2)y%tHCo0pd-bGT%3*=Gs=V1hQmwN2V+f`ju;I`0_CkY_OQ z+ER1nn-Fhrn?-XyzaKApv0QZa=Q{$4nyp_x>=*ao7@jFF8Xl6je9ASHBS?8>6o3;EkfB!=Q0du^G(GU+ zJSIVgYt|;XiOWzq{)mA0>^^4AhJf#KuJ< zlzn?lq+F{-BHiB(^kr@kx*vOkN6xc=f(@aM=KzMTaO5)T%$VfH%4#5)%&#peXA7iLwqebo3_?cHj_s!AhU+3beeih7ForcU0 zL{`+>wh|`EOFU;Q+GyVEXr0VlrJ3E`LUr+Usx4HJvXH1i>NPC+d^9?9a+$v;$_33o z#~MGi({rf7*()yot~O=FBe)O)bXS`j@CDgxgltZy_+=HX_+RLVP;z8Uyotb7@_*)J4_A!RQaxB6t7UDvj&q66^mF#91-J1?{KZ6|bF46u4dfrm+ z{~PrDqXGDjx(E@#AOB%~1D~P*fq&4KDmO4H@PF#&jr#d7^vz%U{lEGqKS<~=pbFJD z^gKzVHK!vO5h*Fo5E)`679e3^Z=6F@&3rLs z{Gl9u|KjE7vVq@uv}nKWOfN7Vk2^B8t@O4N6K8yr2eG$X{;B>_SC zoy6|wnH2Hctt=xkNoMp3fe9bQ6FV_1x69)p(FYXjbs~FO+Jx{~j@#OSV+z!dbRP~WjHGH=A?X7QjoR)9Q zQ-%B%!(LpLX9+^`ICIPz#VZ#iD^%x_qGJQ;SkFH(8Zc<4Hk#{E^X;Z0I}wn}c~GJj z!}lrP{GJZKwl43-;I|Q!Y{p;d@GTSwv=gCslrX;WwcxEmL!A_|6&%bR60&DvRx>#& zI>z<897`yji0QjWLcaBM1UJ&~vudZwlY{7|_r3jl?Fac&8Z@=+yW-KgZ2?y2!MGd{ zy01obdIf{Kn-Z|W(JEW>A`H`l?TH`BW^Gn{+-1T;tW?yK0RM0Y;Cy-VG~RTI@Hl2_ zozeVM$mgFnhqd;ZDD7)(-1DjovP>SnTJ{ECJ|=g)&w|^P?7Pwzw>eQwBEr#J0OXj^FFm>FOin8r?hnus`TEk?BVY?ErJF&<=Ey zK}@JQ(~cB=Ha(Yg<<}J$(ND9BCR0Y4QF^*5nnW|N%yk>ixWM$DM|s`J)9N{>2dBr^ z5e1l2|R&P`G?nL<2VBL^lAalggp?Gn|#1b^QjT7it5-+T${Ag^*}@E zu1FXr2G)^?JC=F@YLwW75B|Q_Qw<8X?^$2el@~cqjb;bBG{*04mCH9VjE^(tQe`t* z(7j|1l6HQc>Q&OHe6pao$ypFqD(z(swPi)GATAM}uJ4X`FElwGKmW?!Gf=;(z?|np z3nl&yA0qUSWVVwV79lz>X8F`3E7?hmn72l%LTSE!@A^*d0glc?Y8T7?H1`sv3&r%R zFII@2gKOgBpHysbxaf)r)L}9XpOzSD5B&P~qr|K4d|<~7x-ugdw>q*LCTJd0%A`Hl zO>ctLBarAz@M?3D(}|=aQ!oOV;H#|69C=RfWMCu7X2mh{1y`c3Ex;> zZpB1Ff;R>2Z{q~PqM#eB`z;0%gb3cwMd$|WZs#Hd7Pv9H{61G01QEJ{{omHRF{prU zuLp(*2;ZIyhKT&K==?SpECT-J3A+`8{6g>FW1@n8%jM>1f;YwSt#P6^9=_jWkQ>L{ z?R_DF0s^;lzA>+f+cC1A;Lm`Ifs|0 zg@dz=C$6|SE>PPcz~ None: - assert result.sap_windows[0].glazing_type == "Double post or during 2022" + # SAP 10.2 Table U2 glazing-type code: 5 = double glazed (low-E + # argon). The Elmhurst Summary's "Double post or during 2022" + # label maps to code 5 via `_ELMHURST_GLAZING_LABEL_TO_SAP10` — + # the §5 daylight factor + §6 solar gains key off the integer + # not the string. + assert result.sap_windows[0].glazing_type == 5 def test_first_window_draught_proofed(self, result: EpcPropertyData) -> None: assert result.sap_windows[0].draught_proofed is True diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index d38e677c..69f09ccb 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -878,6 +878,72 @@ def test_all_seven_ashp_cohort_certs_extract_without_unmapped_label_raise() -> N EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) +def test_summary_3336_triple_glazed_windows_route_to_code_6() -> None: + # Arrange — cert 3336-2825-9400-0512-8292's Summary §11 lodges + # "Triple post or during 2022" on every window; dr87-0001-000888 + # confirms "Window, Triple glazed" on every line. The Elmhurst + # mapper must surface SAP 10.2 Table U2 code 6 so the §5 (66).. + # (67) daylight factor uses Table 6b col light g_L = 0.70 instead + # of the default DG g_L = 0.80 — the +0.0274 SAP regression that + # this slice closes is driven by the daylight-factor offset that + # the default-DG silently masked. + pages = _summary_pdf_to_textract_style_pages( + _FIXTURES / "Summary_000888.pdf" + ) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert — every window on cert 3336 is triple-glazed → code 6. + assert epc.sap_windows, "expected windows on cert 3336" + for w in epc.sap_windows: + assert w.glazing_type == 6 + + +def test_summary_000474_double_glazed_windows_route_to_code_3() -> None: + # Arrange — boiler-cohort cert (Summary_000474.pdf) lodges + # "Double between 2002 and 2021" / "Double with unknown install + # date" on every window. Both routes to SAP 10.2 Table U2 code 3 + # (DG air-filled post-2002) per the `_ELMHURST_GLAZING_LABEL_TO + # _SAP10` dict — same Table 6b col light g_L = 0.80 as the + # default, so the cascade SAP is unchanged for these certs, but + # the integer pin guards against future cascade consumers that + # key on the subcode (e.g. a U-value default lookup for absent + # `WindowTransmissionDetails`). + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + assert epc.sap_windows, "expected windows on cert 000474" + for w in epc.sap_windows: + assert w.glazing_type == 3, ( + f"expected DG post-2002 code 3, got {w.glazing_type!r}" + ) + + +def test_summary_mapper_raises_on_unmapped_glazing_type_label() -> None: + # Arrange — same strict-coverage gate as the cylinder-size helper + # (Slice S0380.15 + S0380.16): silently routing an unknown glazing + # variant to a SAP default int hid the +0.05 SAP regression on 13 + # triple-glazed certs until the cohort-2 first-attempt probe. After + # this slice, an unrecognised lodging surfaces immediately at + # extraction time. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + # Mutate the first window's glazing_type to an unmapped string. + site_notes.windows[0].glazing_type = "Quintuple glazed with helium" + + # Act / Assert + with pytest.raises(UnmappedElmhurstLabel) as excinfo: + EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + assert excinfo.value.field == "glazing_type" + assert excinfo.value.value == "Quintuple glazed with helium" + + def test_summary_2536_normal_cylinder_routes_to_code_2() -> None: # Arrange — cert 2536-2525-0600-0788-2292's Summary §15.1 lodges # "Cylinder Size: Normal". The dr87 worksheet lodges "Cylinder diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index fdcb2c8a..edb814e0 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -3201,7 +3201,7 @@ def _map_elmhurst_window(w: ElmhurstWindow) -> SapWindow: glazing_gap=w.glazing_gap or "", orientation=_elmhurst_orientation_int(w.orientation), window_type="Window", - glazing_type=w.glazing_type, + glazing_type=_elmhurst_glazing_type_code(w.glazing_type), # SapWindow's width × height is consumed across §3 (windows_w_per_ # k), §5 (daylight factor), and §6 (solar gains) — all summed as # the area product. The Elmhurst Summary PDF lodges W and H to @@ -3458,6 +3458,70 @@ def _elmhurst_cylinder_insulation_code( return code +# Elmhurst Summary §11 "Windows" lodged glazing-type strings mapped to +# the SAP 10.2 Table U2 glazing-type enum that +# `domain/sap10_calculator/worksheet/internal_gains._G_LIGHT_BY_GLAZING_CODE` +# keys ({1: single (g_L=0.90), 2: DG pre-2002 (0.80), 3: DG post-2002 +# (0.80), 5: DG low-E argon (0.80), 6: triple (0.70), 7: secondary +# (0.80)}). Only "Triple" vs everything-else materially affects the +# §5 (66)..(67) daylight factor (Table 6b col light: triple 0.70 vs +# double 0.80) for the Elmhurst path, because the worksheet-lodged +# U-value and g-value are passed through `WindowTransmissionDetails` +# directly — but the canonical SAP code is mapped for parity with the +# API path and forward-compatibility with any future cascade consumer +# that keys on the code. +# +# The trailing-substring-match `_elmhurst_glazing_type_code` strips a +# layout-noise prefix ("value value Proofed Shutters " or "Part value +# value Proofed Shutters ") and suffix (" Summary Information", +# " Alternative wall…") that the extractor occasionally folds into +# the glazing-type token before the cohort-2 dataset was first probed; +# fixing the upstream extractor is deferred to a future slice. +_ELMHURST_GLAZING_LABEL_TO_SAP10: Dict[str, int] = { + "Single": 1, + "Double pre 2002": 2, + "Double between 2002 and 2021": 3, + "Double with unknown install date": 3, + "Double with unknown 16 mm or install date more": 3, + "Double post or during 2022": 5, + "Triple post or during 2022": 6, + # One window in cert 2636 (Summary_000898.pdf) lodges the year- + # truncated form "Triple post or during" — the trailing " 2022 1" + # was consumed by an adjacent "Alternative wall" lodging in the + # PDF table cell the extractor joined into the glazing-type token. + # Treated as the same enum as the full form per worksheet + # "Triple glazed" lodging on cert 2636's dr87-0001-000898.pdf. + "Triple post or during": 6, + "Secondary": 7, +} + +_ELMHURST_GLAZING_LABEL_NOISE_PREFIX_RE: Final[re.Pattern[str]] = re.compile( + r"^(?:Part )?value value Proofed Shutters\s+" +) +_ELMHURST_GLAZING_LABEL_NOISE_SUFFIX_RE: Final[re.Pattern[str]] = re.compile( + r"\s+Summary Information$|\s+Alternative wall.*$" +) + + +def _elmhurst_glazing_type_code(label: Optional[str]) -> int: + """Map an Elmhurst §11 lodged glazing-type label to the SAP 10.2 + Table U2 integer code. Raises `UnmappedElmhurstLabel` when the + label is missing OR present but not in + `_ELMHURST_GLAZING_LABEL_TO_SAP10` (the same strict-coverage gate + Slice S0380.15 established for cylinder labels — silently routing + an unknown variant to a SAP-default int hid the triple-glazed Δ + +0.05 SAP regression for 13 cohort-2 certs until extraction was + audited end-to-end).""" + if label is None: + raise UnmappedElmhurstLabel("glazing_type", "") + cleaned = _ELMHURST_GLAZING_LABEL_NOISE_PREFIX_RE.sub("", label) + cleaned = _ELMHURST_GLAZING_LABEL_NOISE_SUFFIX_RE.sub("", cleaned).strip() + code = _ELMHURST_GLAZING_LABEL_TO_SAP10.get(cleaned) + if code is None: + raise UnmappedElmhurstLabel("glazing_type", label) + return code + + def _elmhurst_main_heating_category( mh: ElmhurstMainHeating, pcdb_index: Optional[int] ) -> Optional[int]: