From 3ba4947330cb5fd74a156d7d0c680136e154d5b6 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 12:49:00 +0000 Subject: [PATCH 01/24] remove foreign key constraints on sqlmodel classes --- backend/app/db/models/epc_property.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/app/db/models/epc_property.py b/backend/app/db/models/epc_property.py index 0bbf2add..f04741f8 100644 --- a/backend/app/db/models/epc_property.py +++ b/backend/app/db/models/epc_property.py @@ -18,8 +18,8 @@ class EpcPropertyModel(SQLModel, table=True): __tablename__ = "epc_property" id: Optional[int] = Field(default=None, primary_key=True) - property_id: int = Field(foreign_key="property.id", nullable=False) - portfolio_id: int = Field(foreign_key="portfolio.id", nullable=False) + property_id: Optional[int] = Field(default=None) + portfolio_id: Optional[int] = Field(default=None) # Identity / admin uprn: Optional[int] = Field(default=None) @@ -148,8 +148,8 @@ class EpcPropertyModel(SQLModel, table=True): def from_epc_property_data( cls, data: EpcPropertyData, - property_id: int, - portfolio_id: int, + property_id: Optional[int] = None, + portfolio_id: Optional[int] = None, ) -> EpcPropertyModel: es = data.sap_energy_source h = data.sap_heating From 1d4655c3c460f9a76d48aa4fe940319ea23af631 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 12:49:06 +0000 Subject: [PATCH 02/24] local runner --- local_runner.py | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 local_runner.py diff --git a/local_runner.py b/local_runner.py new file mode 100644 index 00000000..45f9e1ec --- /dev/null +++ b/local_runner.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +""" +Parse a local site-notes PDF and load the result into the database. + +Usage: + python local_runner.py +""" +import sys +from typing import List, Optional, Tuple + +from backend.app.db.connection import db_session +from backend.app.db.models.epc_property import ( + EpcBuildingPartModel, + EpcEnergyElementModel, + EpcFlatDetailsModel, + EpcFloorDimensionModel, + EpcMainHeatingDetailModel, + EpcPropertyEnergyPerformanceModel, + EpcPropertyModel, + EpcWindowModel, +) +from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor +from backend.documents_parser.pdf import pdf_to_text_list +from datatypes.epc.domain.epc_property_data import EnergyElement, EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from datatypes.epc.surveys.pashub_rdsap_site_notes import PasHubRdSapSiteNotes + + +def _parse_pdf(pdf_path: str) -> EpcPropertyData: + with open(pdf_path, "rb") as f: + pdf_bytes: bytes = f.read() + pages: List[str] = pdf_to_text_list(pdf_bytes) + site_notes: PasHubRdSapSiteNotes = PasHubRdSapSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_site_notes(site_notes) + + +def _insert_energy_elements( + session, + elements: List[EnergyElement], + element_type: str, + epc_property_id: int, +) -> None: + for el in elements: + session.add( + EpcEnergyElementModel.from_domain(el, element_type, epc_property_id) + ) + + +def _insert_optional_energy_element( + session, + el: Optional[EnergyElement], + element_type: str, + epc_property_id: int, +) -> None: + if el is not None: + session.add( + EpcEnergyElementModel.from_domain(el, element_type, epc_property_id) + ) + + +def run(pdf_path: str) -> None: + data: EpcPropertyData = _parse_pdf(pdf_path) + print("successfully mapped pdf") + + with db_session() as session: + epc_prop: EpcPropertyModel = EpcPropertyModel.from_epc_property_data(data) + session.add(epc_prop) + session.flush() + assert epc_prop.id is not None + epc_property_id: int = epc_prop.id + + session.add( + EpcPropertyEnergyPerformanceModel.from_epc_property_data( + data, epc_property_id=epc_property_id + ) + ) + + for detail in data.sap_heating.main_heating_details: + session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id)) + + for part in data.sap_building_parts: + bp: EpcBuildingPartModel = EpcBuildingPartModel.from_domain( + part, epc_property_id + ) + session.add(bp) + session.flush() + assert bp.id is not None + for dim in part.sap_floor_dimensions: + session.add(EpcFloorDimensionModel.from_domain(dim, bp.id)) + + for window in data.sap_windows: + session.add(EpcWindowModel.from_domain(window, epc_property_id)) + + list_elements: List[Tuple[List[EnergyElement], str]] = [ + (data.roofs, "roof"), + (data.walls, "wall"), + (data.floors, "floor"), + (data.main_heating, "main_heating"), + ] + for elements, etype in list_elements: + _insert_energy_elements(session, elements, etype, epc_property_id) + + optional_elements: List[Tuple[Optional[EnergyElement], str]] = [ + (data.window, "window"), + (data.lighting, "lighting"), + (data.hot_water, "hot_water"), + (data.secondary_heating, "secondary_heating"), + (data.main_heating_controls, "main_heating_controls"), + ] + for el, etype in optional_elements: + _insert_optional_energy_element(session, el, etype, epc_property_id) + + if data.sap_flat_details is not None: + session.add( + EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id) + ) + + print(f"epc_property_id={epc_property_id}") + print(f"address: {data.address_line_1}, {data.post_town}, {data.postcode}") + + +if __name__ == "__main__": + run("backend/documents_parser/tests/fixtures/ExampleSiteNotes_6.pdf") From b3096b52ad925bd8d0cc631e67754043f007e2a5 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 12:49:54 +0000 Subject: [PATCH 03/24] move local runner --- local_runner.py => backend/documents_parser/local_runner.py | 1 - 1 file changed, 1 deletion(-) rename local_runner.py => backend/documents_parser/local_runner.py (99%) diff --git a/local_runner.py b/backend/documents_parser/local_runner.py similarity index 99% rename from local_runner.py rename to backend/documents_parser/local_runner.py index 45f9e1ec..b3862043 100644 --- a/local_runner.py +++ b/backend/documents_parser/local_runner.py @@ -5,7 +5,6 @@ Parse a local site-notes PDF and load the result into the database. Usage: python local_runner.py """ -import sys from typing import List, Optional, Tuple from backend.app.db.connection import db_session From e15646c34126338f32650e7b45861ffb4845c470 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 13:01:51 +0000 Subject: [PATCH 04/24] rename example site notes to PasHub_ and add Elmhurst example --- backend/documents_parser/local_runner.py | 2 +- .../tests/fixtures/ElmhurstSiteNotes.pdf | Bin 0 -> 64317 bytes ...ampleSiteNotes.pdf => PasHubSiteNotes_1.pdf} | Bin ...pleSiteNotes_2.pdf => PasHubSiteNotes_2.pdf} | Bin ...pleSiteNotes_3.pdf => PasHubSiteNotes_3.pdf} | Bin ...pleSiteNotes_4.pdf => PasHubSiteNotes_4.pdf} | Bin ...pleSiteNotes_5.pdf => PasHubSiteNotes_5.pdf} | Bin ...pleSiteNotes_6.pdf => PasHubSiteNotes_6.pdf} | Bin ..._text.json => pashub_site_notes_1_text.json} | 0 ..._text.json => pashub_site_notes_2_text.json} | 0 ..._text.json => pashub_site_notes_3_text.json} | 0 ..._text.json => pashub_site_notes_4_text.json} | 0 ..._text.json => pashub_site_notes_5_text.json} | 0 ..._text.json => pashub_site_notes_6_text.json} | 0 .../documents_parser/tests/test_end_to_end.py | 12 ++++++------ .../documents_parser/tests/test_extractor.py | 12 ++++++------ backend/documents_parser/tests/test_pdf.py | 4 ++-- 17 files changed, 15 insertions(+), 15 deletions(-) create mode 100644 backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf rename backend/documents_parser/tests/fixtures/{ExampleSiteNotes.pdf => PasHubSiteNotes_1.pdf} (100%) rename backend/documents_parser/tests/fixtures/{ExampleSiteNotes_2.pdf => PasHubSiteNotes_2.pdf} (100%) rename backend/documents_parser/tests/fixtures/{ExampleSiteNotes_3.pdf => PasHubSiteNotes_3.pdf} (100%) rename backend/documents_parser/tests/fixtures/{ExampleSiteNotes_4.pdf => PasHubSiteNotes_4.pdf} (100%) rename backend/documents_parser/tests/fixtures/{ExampleSiteNotes_5.pdf => PasHubSiteNotes_5.pdf} (100%) rename backend/documents_parser/tests/fixtures/{ExampleSiteNotes_6.pdf => PasHubSiteNotes_6.pdf} (100%) rename backend/documents_parser/tests/fixtures/{site_notes_example_text.json => pashub_site_notes_1_text.json} (100%) rename backend/documents_parser/tests/fixtures/{site_notes_example_2_text.json => pashub_site_notes_2_text.json} (100%) rename backend/documents_parser/tests/fixtures/{site_notes_example_3_text.json => pashub_site_notes_3_text.json} (100%) rename backend/documents_parser/tests/fixtures/{site_notes_example_4_text.json => pashub_site_notes_4_text.json} (100%) rename backend/documents_parser/tests/fixtures/{site_notes_example_5_text.json => pashub_site_notes_5_text.json} (100%) rename backend/documents_parser/tests/fixtures/{site_notes_example_6_text.json => pashub_site_notes_6_text.json} (100%) diff --git a/backend/documents_parser/local_runner.py b/backend/documents_parser/local_runner.py index b3862043..a50786ea 100644 --- a/backend/documents_parser/local_runner.py +++ b/backend/documents_parser/local_runner.py @@ -119,4 +119,4 @@ def run(pdf_path: str) -> None: if __name__ == "__main__": - run("backend/documents_parser/tests/fixtures/ExampleSiteNotes_6.pdf") + run("backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf") diff --git a/backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf b/backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf new file mode 100644 index 0000000000000000000000000000000000000000..964c2ffbf4d76f281424e72e58d377a42be3643d GIT binary patch literal 64317 zcmeFa1yo#5w&2p^`=x7mt2a&R}b3od%wsTaqH!w0`7B_LRFfvh*5@i;(068g}IEvZX*xT8fK>EVNtY}~Z zk;B3)X<-dAab%XXHUOE3n;6*{n=s3p*qT8Eb8@lq2nsrZ98C;tkUj(*>LNO+NyR;U zLA@NT;QDsp7VL~+jj`@<5J5E9-Fe{F^Fd!&RJ01sn2692^h^{tgaVl;8-{47Me9Y* z)TCwcWOEi!Yg0viYTv{A8QX}G%k{@=-%{KP7+8>&pT3zhf+uAxE z=wj3Mpf2z}lQNnwY`p~z`-1IGA}6O-m9tMRS_flh)g z8%0*9>#p^-Uzn+g7tP-IxZj&j;Q;zDWovWH18g3_XtKn&Z*uG2>;)mV()06Rb zSz{|R>hKFz1A7;=)>~`KV3pgQ*FK9|T^An?+J42hsx6{etiDoA)>v{IXRklEg&Wan0+i2d2uF%ZgC=#C22HvO9~nnxAZCh2M?k4%75*+8%K1l?Ey(Me>g8(xMga zxxZ=0*_3+Jaqf65yC^GT{%WVml~eE~+?g`tmGwvTyllaK;b`;`yu9g0xS=vI_xSlB z^KV)LSuiZF<|+=Q*G;wdHu}jP_wv^!|CeX(`7MRakyIR42){6i7MHkioQ1OaXixS4 zyJunJ%liZ_Ykt(OZ6)I&hbbQ9Z_?;nELf`=oa~?3_N<4;a`K}0 zHr0Xa&DHyleTm~lR&A4fyfzGl6&#~?PuC@0^vkr--~J@4m-T6`+|AJ*NJt~}d@h9&NLHpjO=A9T=i4IH{k5V05~>tAExTDP zKzetVu3NBfYx24+M%W@Rsm)+OtBw4GdC$Yl^&^*QG@wH2K!Oqlwdu@#)AF1qX zaO7kK%#oYD@d8!8Y?v6|)8eRY#t&a5VVOO@B<6MP14_8_aI8Ig3$>^j+m%)}-JE#& z%fdN&MOmS^9ABf8`IXGMn$Nc1v8)o=r2m^#zfo?@j8*AY^0o zay&8ENc+` zS601Y1D+s#mB@CS*h+k*^;yMX-{`R}fkmZ2+)=pB8w3;)n$|-nI39#=D0qL99r=)_ zI;>)9+D^Z6y;*W|Z0cGQqLnc{LwMd*?cP;Ru;?{z)E_N)SJmyZjkbGRJN!s@4|Fg3 z)i16lhcoc1;#-tOJJze)!UD?pnCgDL+X6FH+}opHNd9AQmAp#Win0WnpdbM_6%Eu#BHA6Smqev+C2{F!2w{LjgV-~v6S{nnSr zN|bjlZ=R0Z&++wUZM9_z}$Hfo*VRHcZ!sNZ zn3ZSI;lu=6)bciD>YrsIf*+1u;(_!=UC&y_?>rHZsRKOnQjs;Z!6M#;-l|4U?^&ID#_Odcf-h3h>taj#oF;LQ9 zOTy}{|NXrs34Q>J1^(-^**va8Pf=gfU$+M%hH(Pwj*J-KxFyRJbh1rQRgq570vFBq zM`7CM6AhV+P3_j_w^`?M(SR$ITOMYcFek%ckmT{m;Ve z!FHon?JQ2Y^TlS8qw2n{Ua`DrLyAEU)b}X2!*C-7rrmgYXv4$d9Q>^;EbItxG=;?I zRgR6{A>=p(mY!I$R`uf!m%o2ID=*2-i^D_-j`Q3vKV6&ZN#Z0)S8yQ5jBh;aj}1u5 z_RS!3^EQ^o9qF4nA50SuZn`PWos%Mo>P~svSSagu0TeN2|15Ltr(4&UHOH|EnMAW3 zAIei-33}QeXjP7o_LGB`1{H8UjoaxbEEAuV=xEL{98|vosflX*n7J$aJ@}e(f-~!a zaSJ28Y5Wo*DyanI5y#!g|>K>Wov-aq6l@JP;Bo#7yE46BI z&AuSB(~iuT+tJC7gQyAj`Q z+LCdno8!((GvWiARTUdfHZjKb?f#DVN<@KH$1vu@>WWte`|*h2lU4bW(c>2f)8Ota zyHgsA`6jY_`ewUiz>jW?gHHx4^t=YEra@|C9S!2+KT6Qg&I}}6B90cF6IJ(DPC9a8 zA@d+)5M3YPFx@`C8!_9SDA(2~&9pa8yuaxWG;B4FW%HvuVA2l0 z9X@7zaT>PhgPN+u_sy*ONbpTX@#{v(SVpVQ&4yr|3;)H>jKHAFBcc;IU=%>`H}Yrg z*}+nQ#4{#-zh;eeMqp)Ot#L`%qLko!dFm1p3(D7^bXGGi(q~h){ zbBSmZ%ZJ#99$HPV&u;k#$dFIymQW)6uu~ILiz=hga8g>XCZ?j#c?{sJAj{Q54|njp zLjZTG z;@;C2C(#X(%@|!A_Dw23j?O|*TtgJDzKAW=THp4*-hkOEI6YBO_bjBP9w^y#zQ`@g zjPh~9*OL_*XQ2N5@(Rth(u?n$&|8}=T1%Il}0@G1DQOA=v>!@-=^*GI7+mV1ksu73wK}SM~yku z3s2$cjmpG4;}d0;Z811>>&ms+fV<7louBvEs-f{!i&jwXi(9DWv_n5SDmYYA;N?Rt z1CBV|IXZYG>#*$a=J%E3Epr2ZH)dmgRD2kd=&rqKSY=8RB95NdA?q|2xz)Q_6wY!U zON>;|5KI{_?83QWBQQ?RWIr5ud7ws+xq&aO3CN>Wk84q=4hi6+RdUHd(+wl+1J*L&}hicNz#q3vgy zb+0wyob@_+-tg()r0Gp9AYrR^ z6GLp;*|`ReJKuMNE~D;er6H1(G4p}K@8_3Ts`d#MMMw*qw9efo1y~cq+u0EoKMxxI zU_!uz570CsSa^ALQ^#c328y)ds) zYb6dAu2g$T#2PJ#lfVZ5wJ7TG1?t`wOtJ3<14m1v)$uGtL-PbL7jXRMJH+O-k#1-I z)Y&X&?hr4>d@T4zn6o<>cthiic-;S3Rt^MRugBt*?rZTUc$MSv}iMnRi6LGjX#z|bgNhJ{l?hivDDUmqyV z8BW8RqD3R$vyt$+A`T`0$|_rI^ftO_DdN|~#k#KkE?Qi8a^d&rqwCuPVn+0x{6gvD z)ceW?*J}H$P=JT2m$z7y0dpWn` zy^|wffJ4Y*elD&CIpQ%pqL5G3&)hu`$uYeF;+ii)>!&pz9jFEch-Ma1z;xwTAQAH?UDp{)%ujc7KHaJffXGW^bl0C<{A@ zZj6bh^i^k#is)K4KRK-#iD#7muRV57T{`j2F08f5V`RCplUWmW*U44_-BVMsi8vu# z&59VaOo&ablvw0vx?{CR_I8YnBFon`UyVO+X=+h;tmbNj?rY*U2f6&0=)k;`@0!3^ zZfmZ*0{gLId4oRhZ_PPZW^_zPjrYbx^MYxg5s5%S*a?7*6_b(Op|;=jM;C*<9lCQ_ z^zt5!AnjQMZ2Ij=vP?V)bFF2-qsFBeO;sjfY(6KzHlvr8aNfQF%!rJOxvE$Q8`N!T z=5#aq`2HlrP1KeibTu6l=q8vQba*XyF%-kA^;|*55Tmk*9LFEkL{w1yYrXzj=P#?c zFZyeVyOQifq*Yc`=WbndX?b$jjT=av^l@))%*i8|nff|H+$gDt;5JC#m!`#z8#-2? z2$v+hQCvfV(dm=4bs`;_5&e`!qSru|`O%9^X;Z^d^kKdJ$UcIC_)Xr~r)?HJVkrjj zjw9=>cc0B-ugHD6uu87zI!>ujZ9c`Fi7S$byTmWf+cMUqzEWXkkwnAEQ{krE%I-@# zZ=!lYC}0{zEw<)n!$(MYU!L*{&YCk4PF}Q!Wt2>AoNOtG6kqjB-JyuSKkuzDXYt`d z@NjfNA0qj=%a!WRV0)mkyK@6*M1fvg;8k4d{(CI2{4EdmD~U|-y}PsVviqlJvt@!* z>DGaw05wPWHJl5?2JE?(rIHkcu~p?^T6Wf+>b0*YVF$D`QWpZ=vVAx}(N#y1ICe2_ zLGsPnpYs$MKF%$|KGI2_4n8Imj+QwIVtoQ6-2xfFz4y#%t@G%n&MI7)h>*uRqro5a03n2|8Rrlp%Wr(x>N@=a`Ib^A>>g>`G4 zk|AUx)nT!Qee^GeR;nPn}F>*Ux&#{$Q-Z1^6ybKNc`9-j-fWea{zj#eZq6c!BcP*dU>#FY#w zC5k2clG0%p4luSx8)Bs4)xK;tr41PCq(4WQ%fCXazr2|RB$^=~3^n1FH~BSno5sHg z*j1lGh1lPL@F+|g2*TX`a|!6+PLJigTTFi;pM11Tw$KJGBB$+xR^ zS>UK$M(uZ$J%-we#oyLi#~7Du?F2s$=5WmIaVu21cv}M_FTOahm8sTnmOm1JnX-FU7tL z@;N>|vTy{5^@W_0``e1;jo^!ZGF*ucNp_1Lil3>XR;ea3nynVKx}g_j%0ud=-9woQ z-6gJRf0p2MByfu8{8?HUvutc0BhQWWgqB4wKYP*1aE0~VRfgJao-Fw%{EBA6 z#o?$Et;1&ZKGD&b%x-C~4RRL4E%)(gYZHMVe4e6Tp%lb=Z8F5Th=j#gIJz;r2>crZ zCuCvu@wqfw<1-|PZ~ISf;6Hh}IsP^GG{jT=&$*{r|ABj&n}d_(zqqFnKyRiKCo^qk z6BMdEn|MQArcodi&k=1_%&lueE0$X z3YJf#;#1yCV@2|7#OKdZUXJ62b6nj8h@m|UKC(MKdS2bb$9{SNhlEX|oGtA4_nG#9 z(A0eG@Dg@DU1TJ50-pO>O#uyL`gJ~COeAzPjbw2c7_mwlOPNLLMrt{ihIiP+7z>$A z0-O|d$Ds{vj~to4{+!rVO$`+jiMbWqtZZy-0_1DwxC=Yi4bm$KKZH^zt+-l7YbGvo zD?%zODp+`Uc;2xIvTS)DmiQyb{pAa?#PYMOU?}u z5s=iL*>-+jJ%d!sDb)oeCG8$E?RL%y6qnfCUSSC8U!a{JPcnVFPM z;G$Dd&^C|VC*7P#j+*fXP~5u_v>~ozP$KPtODSYaEzTiB1WQVFQ%T{q^GX?A_k_Vu zy{E5Rjv6b=>J?&4DPG(SQ=OSi$;wLR(g>u-Uc}9>ZMf!Y(c;}mn=oUPDx#hyz)A+&4Mm6g?+He1 zv;eqv`gH87UX~XMmQ93j6J0Z|>yL++V+b z$;ruGHD)(QZEkKRZEb0BHMW#TjchMBI3riC-W1t`9CWZhczy!jVqd>iqMNDoe=ars zS#N?ai-qyF7-qkCcGD)K14ShWKE$oTGa@Qt>A32AFS4SFpYVxB$@Sc1oU-SDh(p9I+ z(hD~@6-~_#xm#C_4E&U+oEP-TOv}gqEm9D@5f-)k76})$0-HwgCKNWxfq=lH%MrOY zeDs%nkas)UuPUi_ZNZNo$>+`6=UQ$4l&+TA0RFun9^Lgfa1>Hg-{yjDXF9m+4sm~! z{P^fJ9HscS$51T0;>E-`LpezKxRQR1`F5wU-nWP@NzJ zhfgn|m2pzx{tBc=vEngRzsL%LQ4I7zXbgh`xd|tKPR?%8FMSCdvjVcw-`2m~udm+> zJ*QYD1F0B}yWk?#w?qN1UKLF){G`Bm*(vDoy1XLqSC!;g3v3J3Pd;fl zmo^tT&KAbYkIa~Pl^DthP7b3vu@o)>QJdZ zlpY-&MVzB}(bdmh)?N-q$ggHAJLUyU`_ZE#!H4=%Q&Y>y$zp_^&AG?%zbtWdN}0|D z=MER~_D;-=)BSewvPKyL7>M}g*Gnu6&5Xsywk$0v3hMFj6y;GZBhSsx{UBrm%6O57 zOjNKi7+x$Z(=WMxxfh>|A&H1{?(!8==!*}NzEoHGob=#1qINMw2j6~sdsCeCo}HEB z?DP`e>A>20B>kO@nOq6DBzah-o8A?`t>BEhBX$?bZCGTNU0fjI3UYL@|2a86HZc_$ z@hK&%mG3gpv$I3U^CkjjXgE<(%9M!FhP#1&$^V=)-@T@(359&u+%gCIa}BCupyrQ6 z-Cd@;DCb`m6BFY???61d#a+TBzt6u!4-E8EN(nzG3EmYom0FiAB;lGNP)B~VBq4ss zu)8U2`bLhvhTY7jtW-c}@xa1GYmcDlN6-(_UQ=V~PfFJZUzCUr2}cu7IL@8VBTyq+ z?r-wI$s=39ZmPA|GT4(=#$kzLw_A&g_|6POD8zv&Em%@pD8RZSeE)X{&v> zKAfh`WzyR{+?D8ze5(+t@Z9I2L2Is0rmt7?JBNm6JIvD4-7OcpPm?9Bh+Qyo!O=tO zb4!%1u1&_q%R#)}=u5cJlCm5N8;hV6w5j3}C!5QcjlnkAB_WOQr~3WMW?s2e73sYw z#N_A<+9I~Z*Si}lZkywqVO3{q?YReo47EQe8c9BA0;zbuqyw(J42{e>?2Qi*JNkXA z%Di-Vi(}G1FgI{KsDzx#dxG^FDmCYa1r^iR2OOrpJKk zSsfmgUPU%8he7+8oBceb39-#s;aIiGd{=nohmfZ10Z>x$L&akjoT#$Ww|o!C-Luy8qky)QqGu3#9!bP(_c|ERyZyc89cj#B&f)u8kB zE(&G7Z|jdY&keHo)mGJ2-xjvIe|}G5-hC76bn&ZSrp6R(nRR91zQvt1@|zmCtXB2| z;hI!K9Y-^GFajm=+a513Z(O5tFa1m68B}^F>-GPdHDsh zQ6B;3w=9OWQ(SDrivIkjOiN!tS$9e9o94$DuQ7IZw**;RY+u53kB;=9M7 zGOesl1;`l!i_O6?*9!iYim_rBs+W^W*5;*N^CaA+O_%=N2zoc^>dmrEmoBeM!ZBlI z;cr*+N*PPrMMXn>``q9{(T1-*ZU^O+=$p{gSUKJd%_YtEtEW6adfh_B#HHv6+axMt zFCz=Y`u^M#j**eKiW6w!2XH}C_BP4SXGS#AXF&6eeUwe*Y5T1S_GeGHD}2yS%7f?(oLa( zA!&TpnAkzmv|#zjh{SXD4hiy(u3qUX6dk7NpEIywT9DrO2T1M)vS_^g#Nnj+jXoJH z9c7&gAZ1a@@_NbeYFXYlARp;?z!g)Qu}LgG=msl zL}6$#3fNwOQjKC*G_y$IEG#yYX@vf*cSjnxVXvZ$qWt*86zk|jh?o=wd0BB@isiOJZ(cv?Tlp>5(O0+Hl^{I|d!4o9WDqkU0K6ut@_8)8$=d2tL-V zW+4;E=w)$Wz*<;YvVLDd3pi1ueqnl&5?9fp9@Q-=!Vs5j0@ zmk*yx4My2&V*$cWPEO5kc?I>wG7^>-9FaJjotl-z4qtsVVyisl-)=pNm=w9$h6O`Z zz+Gb-J!dvqtysxhH={D=y_@-K@y4*a3IsQGZ}>I!GQ@qn1UBSY+5 zEJ&zmv4q9N#bXm=i(9`@XgK2Aii*qO!9SQzyu^SGZ`{iw4+S|9N;MxTJLsdk3q{JV zGNSH=F6I2y#64zLl(?qT)r--kU#)CM^XSSvCZgct;ts>XjuK_KGJ@&)=~ou8*06dN z>+J50;|Kt8)WAYhdM zdr*@C#%`Uv@ylx#>y-mN0me=Kbiv{w z6!EEVS>12B4&rKqP4{l3VlzJ%6qfC8u7*xafq0gWIjLCEb}cM^ySt0`o3OQ&H!UoL zojH!CMd%(z>PAMQ9&xtTncT}7g74g-eLZ&%?;X35cAZPE1h4p6>v-xMf{t>&7tM?> zh~AfeP7Aj>7)s-v<;9OcIq4Z6ygMkKc;4b;o)R}$^fk2-(fY+wMyDSq@y zPMfrmm(u~yj6~9NhH;)R#Q3;mx-oOb?8)Sb783ldjGU39$YkRP`+IxBT+&i@=p%vO zuQ&2GDn?mgbiHj*8U1-_WoW6ReN}b4;?m>NQ|>M**YmrOT=+uDq8c3GB44y49>~fl zw&_xIQw!%<6cvkQv*?xB?Ejb$-VZoDySjK=;JzS4`pPd~G)`HroyjXHF-cU~ zNn1O1O0@)WPoU~Dk9L0<1Ls{O@Wj?VPU8&880R1nr8s7(!spSb4qUKRz@^TeV^71# zIn*;T(7(_wz5Jtd^d&BeMp2S3J$MhZcugTSEhTaJ%}F!9)5JuFDk&+^1rsA}{_OY% z6|f4tZiX%s2(O8=50f~0`(PtNi?*0+*wM)$9-!(L95Z3>Ug0K<=5h(KKGDw6F_v+$ zv1cX@((>27x3`Oajy%XCwWxNOw}*QPUrkGc;Dn`0!fe;n(iJ4}doWvrF%6Y|?`SVr z^@tn%u|F&f1zzkkK-_e(z1Lj4JQPo_nS&5sLB8l`QCw`?6h~2k zTwGKUvy))Heb%5YDLNy?_|^Ad@$6?Zo*<9@Wu&N4(79QOYqVQNx?Ua^y!)q$>gqri z3?@4 zsHmvLYfJFzMU(~rbstrriNDl5Gbrxy1TP%iCmnSDweNXGg^kvwgu#`*u`z`JBdUt5 z1aL7C$O{)VYqGt+%t8N%)BCHu*}5)~qB-wT>h^VAMvEL0-^a5v$6z(7q_ASTLdGi5 zq1s)&=uE!5q)msQJn{m4-Tu~E2TB@#`<>%sz$=`%v#`EM#xsZ2d*#C$KNwSL?6Wgt{c$;nBuFfbWjKE8U({zzkW(hIYn?xfcp zk)QQ_)F(1%^coc8z*qa&ci#6qio??RlvVM~5^H@) zsrdMv%TC|Eg(8M3UPdt~4g-mSJY7?BQ~|L$d8951k>I-MZ(pzkV+juqPHCI(;(v^< zt#$SGPE9o;3(@!Y3oHtpJA*Y8qOyvP|)iaT@6iX&|Kr&vKa^?6E<5-^=HM zL@`A#3EmG*wXJ2N?W1j&dp8XR?(ZE0_KA+ZbJ^E$Fo?~`^l>v!k>6TDZtE0D;Y3z- zs>m%bw*`T)sTCuX)YLV0cXxm-MfxXj$*3qiW`^bo8HoZkuhaEX^lmHp+w^s;OOqPl ziGx5}zkj>SIfZLo&Lv$KkmX3i2d73vgoTCq`89fPO_o~Ppq70)c`2BMj;o-ch>C`W zi-(g&kk+ZwO7pRIc+je*E?C;R(bi(2pnHq*oQ+S_Ps&Tw!Bw`*A z2DE+{<6-Ny$S$F7_2^1G@~>{n%MNcTuEhLF{z3d%LV(|zD=3@6^D_~DHt2zi381Q= z>vI?(&CfhN&@nO6!_C2?R;5DBk6?qn-1<7Zq(0QbQ@b&a-xJP$lXSA6$pG#fZT?9v z-|_YyD|kSzv~||^?sM0?4NR4w1md36{%BHuNTSK;__$%7-OT*2D@_iEN{8ukpu|LZ zj8+I!z_0BWu~Ij^k^9^$j9_q~FcNK3J=gqT;5mzi9J-XYdu+FgLU@uh+MbcAw7 zOVf$qNQ?J%(WeS~CoJ~gyH}7$Se|k(v9ygmW5{C+BKR2z^YQg}o}kp#7>lXeE{Qy= zcbDX&-!a~-nG{Qw-N~9di4P^4mSzz<-s^arO_JT-X8eVudtYoId%0R40r^MR%iyKaDneHt?sw?$yJ{EBn16GU$p3E+8&3vNA9iEx9>A?lIQaJGf$7NH3oyp|+l6^G zT3GOmb?qI%fQ;--H-UhFfFfGdna0jt{jc%()ix9HV$?UzHUh6dF8HsKYaG!S18}}^ z=FE&#{-*BnU-BI|OPgj`@|ixJ=$lng3N1C6p3|D8S=bdtto>0E_}$ICP$4ll5O7-Q z@P;b{G6-79(f^LggE;>7blJQ)lKTSd7S*~gIY>?CzAqepPW8mgxq9lY0bgKK|p#e7#H#i$mz&a&`lK} zdvz$UBv;3wJv~DDTogxUSx{V;>bC|3I$gFG#9M}hjHD7Dud1q|-*SYyvysfs8=KzY z;U3+ReUW6F(v=kcDLVXHZH0W3He}sU)xqRWehqr}?L79r5wxUZx zO|`jM&baWY<<+LSnVr(Ncw#j8VFY-11YKnx5mPiQ@kF60yu-s?xXl2-h_uP#y`OMYbT~l#v=y`DU+0O1RynuCCslyP&0lq|>d157A#N)D`6wa>8lAQt& z6BFx(4}P=MB^KULyuvMd)rDh0X$87zI zoJ>%_3m_l!ktstFT-X|x?|_|LV^vjqxoOE+2`&cWrpe~Z^b*a|KF=*Q9Od?s8byGV z*=C;i_sZPFQ~Jk)5E>N?Od=wp6&y`Gq>``weQgf5E?C;A{MORPj@c$&EfX~}&b|GE z{s+{zxJ&G04{d}PcP%WQQW#nWtgMtJfh493jEu9hqvJAME-ST;LkdH+tFF9P?|1?z z{X;V|%DZX}AuBt5c5$NJZ!JmFMvsp6aES}{lE%>4lQeLX0mS7b)5OFCE>*5g1UQRr z>k5jB!^49R7ZY+&JH)SL9s6d{M8)z;^mv>Ku9XeRo6Z!!hi#0@ekI|2VG)W;kco*& zr`@b@mY8h)xGrfI#5~&Y&44^z4vY^Hnp)b}3Fs}qm&iE5rja&sajA`8o}C*X@3x7* z<4NGB>Zax)RE!=cq0IZAEkOR=l4S5B0(ojy;NLIaa{g0vPDp~2+9^g*&--g1Z9h$Y!Q?#g0e+Wwg}1=LD?cGTLfi` z{%>T9xc)Ww^gqoOvHt`2G?XoZvPDp~2+9^g*&--g1Z9h$Y!Q?#g0e+Wwg}1=LD?cG zTLfi`pllJ8ErPN|P__ul7D3q}C|d+&i=b=~lr4g?MNqZ~$`<{%XN#U<8~r=t7IFN8 z#ak#_1Z9h$Y!NhW5j1X*6EtoSG;R?zZV@zY5j1WQG;R?zZV@zY(ck>S|I;BfZV@zY z5j1WQG;R?zZV@zY5j1WQG;Y!V({YP<{x$dXKg||#{sZ?klr4g?MNqZ~$`(P{A}Ct~ zWs9I}5tJ>0vPDp~2+9^g*&--g1Z9h$Y!Q?#g0e+Wwg}1=LD?cGTLfi`pllJ8ErPN| zP__ul7X7zpi+KOF#oK>6ZV}f%SiFU@MNqZ~$`(P{A}Ct~Ws7p5Y!Q?#g0e+Wwg}1= zLD?cGTLfi`5RdyG%gTYE>-AW?(tR!d1kYV4TLfi`pllJ8ErPN|P__ul7X3fX7O}GY zYyRnf8ZP4g2mWa&Tm*%Spl}fsE`q{EP`C&R7eV17C|m@Ei=c236fT0oMNqg13Kv1) zA}Cx0g^QqY5fmxv9kWJuimn9GE19Sn3;n} zAvrNSYdc3(djlgAW^of23nLR1DN$xo3y_ntiKCdEjlG?%i7kkfhgs3U#)Mgh=O3)y za&qws3Oa!tO$=<1J_Jd`jVNJ>VD~&y-dHu-TCyLziPImoh?Y*#L39f z!X9Mj$ozEp$EYjfXklQ@AZllA%q(ebVCF>1&i2PeQBgZLZAK1mUQ$MGZVpmb4h|Ml zRu&d6T|vP=-#Z+C-vYC$vmwac-h^4r(b+`pDf34ok}S-k22LhVjWYjLgA%qzcE%RA zX3PK!TM=6)i@zL8S~xm^#LNvGpL!#2@TWX08ylpgG05CWn~j@?^l4}3nkvoqMXx$-?qSxu=GBARYLlEadzj_5M`uFZCdGp7OjL9DkAh zB=66icuG0AATJ0a>*)plrSp(B{+$1#%|E>%Y)^fF>`#4oN}t;QbGbjKf5>^d{?yl} zdXW56AD_}c?Ye*H!++`yDeIGJsr;uNaQ?j>{AcOE(X@YB$+-Sr`2UxZvHp#cG5}3TWN2w( z1cF?bfxIz)T$W)a{iB@VAKRY?;s5;Ac1a6skO^dDw>AKoh?_u0Rug7<6I(NoIizu3 zj=!>=GqmO%=XqXvPXkt1i^D7y+~$+ZdNrt)oSTR>%lm#BlVBjmeJ_wkpg0`6I+(cI z9w;5~}^yN5Kl^C6yNMO^t+p+O9Vg=j&^FM3&~fi`deNq{kT zXfF?v8=>SamL^|ih6yeVnKWV7;jvhyg#C3Q@nGGz?pBo7?!=>{VOTQeF+T{eqYD%4 z7(UXo3YZ)4yjJ|t+X?d8&Z04hliz}2`tZXY<*3VnlqaZdwz1K)M#WU2Jj~rf(Lh3;Hgp7p$_DWBp6jN(g&@0nh@KfCFI6_k~keJ-03pJsZYdwZqX zzrmT=&6;jX3m5x7Y}THK;TDt1NMKqLPjzF951zE!fZ;^@P`EX{ef~Jz$N+h6fT2-1 zvszUnfMss;(3XK@p}Col;pjk*1HP%GZ?Q+)kkl^>&em54tCIwasnf*;03efYfAB9rT?5q2~BB^~LUtXHrwkjK}y8`01izR))p!EM~@_q-&Nz~$;4*p9? zyy%M7v-jO0Kb~Kb{fNK9v`0aeSyOKpe1UeN3Nyztcy znNfD*%5cn!mnWYd($8(a6R`x#6)c|bJJycUr->Tff9Z?JTzyT1#Z4KkN-_MvM3=3d z{dP}Zs*4=Zh0@VWRMyZ;I{oznho#@bdpwi`&6-Z-D#6AhyB=;HI)uR0?3}kE>H>^r zy_CE^Rx#odz3;=I+lc!cOZJDoZp<*ThsN5S#NHOvdXji_NlG8 z5t4Q!s2gk~sMm}&bf5B6=)EahttCoEa=Pn?c_$;N?ydoLNLZCId=&;nq}pUZ1My_9 z-d}Hdobv^TUA?n^&jsfqf`-zumZaG@ZAp%Ws_1L-pxpO$Or*Rtc+>{UvqJ!}D3D#Lwh_}_xblM*6p;tdkC&IF^KbA}8R)YL^z ztR?*BUzkaJXVkYP%c0}R9Ffarl>N|1R)+VQuUcth$xOvupd=fQY?<{9?YQ9NXG3_E z9AXZ|7mLmZUr)qF1Rto$o)=0}H2GrsQ@zboJ|OYNwOovThHo3Ao8hg_&vl{gcWmi} z+09p^x&}~<7nrh}dO5t+XKv;XmjE)gEFI;A_X5${UQY28dQOBG&tM}bCXc)(z|0h% zP$I!(qcb(bvWfVdt4NZ}#}I;Qot9V!JFwVD9ImM*bX(3832YtfsSn>bSVEhwVyR4f z(DWX$ybzdHK1!gc&_>^W&DGDxRd&F6ubS74!%Q0(QE1JrE8Kg~g%m;r6e(&B^>dUu?{0CJzFdpan z$fl>nH%NO-szrnyw3`-frC3)*;Zf+KhT`RcN!`)L&(&rTS7S&p*VC@9qb2E? zG5S#ua}{`yVI)nA{d`LqP;AF|sN(yFgI3)=33gpNbC>U@ZwKM(>M;@y%X$m^Nm8uu z9dXY!&B9F%eYN8K5^~%sDO$mZ$SR%)4J~v0RMRG#jJiBJsSXSqnH{ag{2^SEKiUF1 z);VJz($_`Vve7m(*X#O$UQ^G7_|evSsq)r;*D&GMJo=H}@kWz<6K3&eiL4#k_;$F> zO}*%=AZ77}YWf9*j$@PAQfSi3b2GMCvG$nr7x6F&msr8;BOe>5XHTA)V0QwujjIx0 z;LWIiW1dW;ROSel+3C09-kJo!bIzMmDF>2P;E|Ti@zZ`!S~d^ytT&21H~}QuypEs> zlP6+-JeTBLs&n!xzWlwmsp4w%kUy9FiBSSYuI9I2gJg;~aOQ>5IEW_VH6GDa?ZVP} z_Z2H1Nnemhzr9YxRVB?=D&Je956NR9KIB) zzuGF->=r+AUwcNb4XefSuqVw4UQONwq2bf7VnD-6Dl_|X=9WIByFjeoa>HjCLhQIlVp8pMFg6+xL z_}7dHh-LpD#soX*pG=J>YvNDF#2<41BSQHqrhe=;VxFrJKw{0tW=6MM3k zRdz1k7An)uR{0sj=Rt9#;iwWt6t8>kzdV@WC5XRBqWm~K!=7Z3E)c#b5RQC5LebXi z)y0$h$$)~5INu^E_xH^qUl`NRjp;t2c#zP`F7nW#l~BGI8w6XaNp!6480pT-<0?!fyJWba%xxsMf-$L9Fjb;1}Jyg_yrko_7W?2jOj|lh5E)?S6+5t7IN0lOw|*cVk_R(#CVQPkrKj3nzA*-avm&%<;p{aN+< z0hVnM#%LVIcQ_nrankaw)%`|?s2AL(fXhvFtQ46sGFfr>|0vZeqoQiNHUiS!Fv?IP zDKIn4FoZONfPjEB(%l_Wl7fJQq>6-ubf?q^A`MFS&|QKci15MltnYgt@muR%>wWJZ z=f}0~`<%V+bI!f5eeGQ@BJCOawsZ;e*LNlL6VB*JqGP0q_P=Yrqg1w1mUGQcHcpNQQeo%8GN z(acKc`c5aH1Mn^Mz{~5+(CQhWEf4|iYJ$!bmq zgLPnhbHqG2rXbStN5=3^%UQKmmQ7B)Fx?hBE_Z09+2Hb{^+uW6;x19Op5nL+dR?Rw z9w*SoFJnrB)a~5Kq=!t5Mu!oI^tk8+PI%pCWG0&se{Pe>7(9t*$)i9wKy*Yp=hgpa zU;MPAjSV&dn6oy!BDP&5l@5))(`_q6l_^;)vLDbL9ap7*p<$%bP6holufV zoBB}$KeH)#ZO|OZrqz;iZ<{ZtAXA0zkJpzPzhk34hfATSB~sgb*yqO|N(%q#EhA)3 z5jtoxtkzWwkcQumVN;c=?Sq(nPR4{BiUK4X@rJ(J$@Kc-4IOeID${yGZ7g$S03%k} zHdR)eFCkTS5(g^tM#J{q;2g30V{tet&TNgYo9$u}y&XdUmjwx4ap|{Q+HJstibIpo z#rfib@7>SVhaEm>S1Ybn+xwXm);;^i79_24$`dgAzMWI3RZ^e{A6-VuqTLSPQCYuh z!l=%{#p)#@kmzZO{|RWxqJS4&3ssaZm6r({ma);Olz2@X(H)crhcLdqg+LZ|^g4PA{>pn_AQZ!`rD)?xy>!!3k@ z2h9P}u6qdD<5}R$G)*v)l!B`w@ktbsQqK_)tR09G2q?u=fZ8sB0*TC44+_QC3<{+$ z#U{K}g8jK?j04qmMhB_`-_FD)L@40X>||4fiKM6OH)|fPX6!t`s-|%nFwWi1vS2Ai7)%dmQ8=UCRYI*0W*d7BHmV zR1ou)R#G2Bc2+a=RL}|~I~fqOR1ss7=dO3jH{V`UEpeZ18vR1hr#RA@XRhkwW3`$j zOSzOpHZUW6m>KvWh_IOF%LD2h!`w9Xq-7?R49pmdc4xWu#G)?>E5d`6Jub2AZ^Vzy zGz(7TjDYI#HK|OK2!Cb0NBp1MGpp-kV$q3*q_w; z?gV@|Tks-5`l(T&w_VLXA^wYC%HASusCI)R0Y^vK$g8i+^i#D)8W*}Yqe+z+$*MJR z0wD?7P33tX>Ki4zt0lzsXF^&`s+7_uKkF=|+-Bxe8vl65ZXz5DA&)df3s4fTR@q}a z=Pp@UI1aNm6q)@(+iTvgH0NY%l0m=gm4!XK)nma}f#bfk;KcnyUyMyC;qtY2t_oo^ z7iaEG^)Rq!VHiO<%R4W%IgO3dYe1jbt1c_r-Xh{6yo`P{+Q5U5=e7*AM zc=5#J=mLu?6~p*X(g3+udHZij!{3UTv5=qX-;xFx z81_%npflw#g}fpSy^4V}PRldP8Y;3|ozBnORqs|KRSyG`G%0N33wiPpr`hM`mhPEh zNu0`*KTt={UHkj}!cM0NZ`J_~52?K{vm^&&7A1*>&b~E~Ix6kf&aN8xdf}aMY!!0g zy(Y!^fa|F;Av}>h zLOiXeLj0fyc72}StP-dEsmBTIKNV!Xnm9Im6wvcqpZA3aC_e-qf0`Z_%~w-D6FA8e zeA7t6b}M#~bod33IeU(F4=hhuH-h(&XNf*!{62JaN3j^9Iv*w0By#c^nJK;Xy(w^9-33&qoBVRK-Fc2u+-YYuf?+cMW}B${ zV395P#80!NJBj6>XX+%c-W0kw2brNNZ<3!kd2#g~2+55mu$1IDj0y*LS$ulO8q|Lb z;-Pr)VxCEu!os%iPIi!%e3DUkFT&T^Ye|v7OFhY2XPqA5Lc@3ChkE6ss36^iV%{Oy z8wb6j(m4l-HS*Opq9&Mao>p|HeT|c=YqgNF1wD}?aF z(XnA0B8%ZHRdT9iM9Nw3#Wtg|T4P?ThO8%+MpjMUigK&pF7Rx40zZm)SL?YYge&6u zlRB{p8tR}vaiF3h_Qt)iNVK3LD2Bl@ppFD3f-~9aikv~!4V;L+DjyK2qhQXV9)hD6mxR_p+Xo^>W=?GbaF2|U(ImW)- zQmeSaY~7@(KC?!FfvCXN)TeCA?T_SfJU7qCm7c55(!}JzZ_E=V**M)gGaRh{Rot57 zXVJsnPWApJolm+6vBIProVgB(BwZ0f%gE%nvH+9r1l1W-hh)7C?%o0T{ zp476IS>YoKL(O?qc=XfVb+wUl>{;UFfb5;n-bt}sQCRSz-MufP4SpdaDAkGmQ*t57 zer{p;qv37)!Wnx~;=D?fT|NH&57xiX!&T2J9fqFB?Q$;XK2Lm`J5J|tcm!IhFLA+U z1{B1Tu&GpRt?mn;BpPOV?N%9=c}6RWc-`g=-fz&KeLpN~oL*}`?}IR++ET0m-Pwrk z?(eD4Ie7qWG!Cz>9S`&LY`dQl*;aD{D)Nl5jmNOWUI#@sqI-+L2p{-W)LmhAGt|e^ zet$saz*~3(p^;79xrf<7eP)wXC#_$sNR(62K53NBi*B3rv)+%I_|P68(2aQNlB!D# z41-`^9NFak3c%tF-}-+d+O@jme-qLEmd*c@hz4i6%Em7v+GV-$?_^pY!Sq*~%Oxhn z^jBNJr3`;H{x>(idf2}Ub3%xJrpCouO0H7~qNYtPbV_e_i5TZP9lQ0*v@JN`!#0Q( znRVBL^9ytS``Li13m3_*+_e0u1_e1>J*vGu3^9q}mvX*kT3aeo;lB3|%;L?xab$lT zg|+4!o*%t!w6n!_R=8vQDmFQ6-3XzbpF=Af8}H`zRt6A51_V2nmp26L-W>Ek56RXc zI%ksxoZ-8QZ;{6;Yyf3ao(u^E^(IChAEl8>5*>unB2jEa>e#@X#=6~55PY`HxxR9FP%Aly$Ktf43a zW*s8cS2uu84sn~ACQvarO-p%Dn80>eG9_`MNVDsAm^*yJlH26lzK}r=yW8`-?rFAF} z281WQ2})fZ=U?HnrFv1Cl5GvQBf+Xezvv}Ka#d<3#NYJx@A5kZ%8n34GE0tWk1m$I z^fhGrAOlw@lgdBQBFf{Q0O!c%l6^q7B;0+1QX33#_HcXeKC!-^6|NsMN3@73srW`t zJWQ1T*2~zs`IHM^ghSs>NT4MfBVs5iv()jQLP!JdV8RA_UDZobGrn#o>M#|19mr zfXk|!Rh~{#@=MiiZB3-0Y%p1$R%%1DJUu6xD2vCFzqAL$GQJTa!|D=oA6}=cnv|n4 zpVV_+8%De!y$v#>#MliH6iES=4@!o}yqs^t%Df2XR@Ey0ic=Cs*;NFbV{q{9v%j%B z5x@BSV+)Xm(18EPxD(b5q2Sa&akCMtl$swO&EAU@9f?~yO-(ZHW<#iF`47u68Wx_T zwl!jTpo_UV=$qs;^1!>7z%@2^6}p>A?Q_w@iztr|Mz(U{Lo8luVHX7 z+>Lj{Qp?%=3;DZXYN8EDoVhoZvDjk n>O0_5b9B6He!L1V1Y%+Yu5QLIZhw6xaHt>*OaKDOsv!RdDOGIs literal 0 HcmV?d00001 diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_1.pdf similarity index 100% rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes.pdf rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_1.pdf diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_2.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_2.pdf similarity index 100% rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes_2.pdf rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_2.pdf diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_3.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_3.pdf similarity index 100% rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes_3.pdf rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_3.pdf diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_4.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_4.pdf similarity index 100% rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes_4.pdf rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_4.pdf diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_5.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_5.pdf similarity index 100% rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes_5.pdf rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_5.pdf diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_6.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf similarity index 100% rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes_6.pdf rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_1_text.json similarity index 100% rename from backend/documents_parser/tests/fixtures/site_notes_example_text.json rename to backend/documents_parser/tests/fixtures/pashub_site_notes_1_text.json diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_2_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_2_text.json similarity index 100% rename from backend/documents_parser/tests/fixtures/site_notes_example_2_text.json rename to backend/documents_parser/tests/fixtures/pashub_site_notes_2_text.json diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_3_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_3_text.json similarity index 100% rename from backend/documents_parser/tests/fixtures/site_notes_example_3_text.json rename to backend/documents_parser/tests/fixtures/pashub_site_notes_3_text.json diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_4_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_4_text.json similarity index 100% rename from backend/documents_parser/tests/fixtures/site_notes_example_4_text.json rename to backend/documents_parser/tests/fixtures/pashub_site_notes_4_text.json diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_5_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_5_text.json similarity index 100% rename from backend/documents_parser/tests/fixtures/site_notes_example_5_text.json rename to backend/documents_parser/tests/fixtures/pashub_site_notes_5_text.json diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_6_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_6_text.json similarity index 100% rename from backend/documents_parser/tests/fixtures/site_notes_example_6_text.json rename to backend/documents_parser/tests/fixtures/pashub_site_notes_6_text.json diff --git a/backend/documents_parser/tests/test_end_to_end.py b/backend/documents_parser/tests/test_end_to_end.py index 84e611c6..5278c002 100644 --- a/backend/documents_parser/tests/test_end_to_end.py +++ b/backend/documents_parser/tests/test_end_to_end.py @@ -20,9 +20,9 @@ from datatypes.epc.domain.epc_property_data import ( ) from datatypes.epc.domain.mapper import EpcPropertyDataMapper -PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf") +PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_1.pdf") PDF_PATH_2 = os.path.join( - os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_2.pdf" + os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_2.pdf" ) @@ -302,7 +302,7 @@ class TestPdfToEpcPropertyDataFixture2: PDF_PATH_3 = os.path.join( - os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_3.pdf" + os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_3.pdf" ) @@ -339,7 +339,7 @@ class TestPdfToEpcPropertyDataFixture3: PDF_PATH_4 = os.path.join( - os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_4.pdf" + os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_4.pdf" ) @@ -369,7 +369,7 @@ class TestPdfToEpcPropertyDataFixture4: PDF_PATH_5 = os.path.join( - os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_5.pdf" + os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_5.pdf" ) @@ -401,7 +401,7 @@ class TestPdfToEpcPropertyDataFixture5: PDF_PATH_6 = os.path.join( - os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_6.pdf" + os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_6.pdf" ) diff --git a/backend/documents_parser/tests/test_extractor.py b/backend/documents_parser/tests/test_extractor.py index 66cc4271..65ec8c23 100644 --- a/backend/documents_parser/tests/test_extractor.py +++ b/backend/documents_parser/tests/test_extractor.py @@ -37,32 +37,32 @@ FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures") def load_text_fixture() -> list[str]: - with open(os.path.join(FIXTURES, "site_notes_example_text.json")) as f: + with open(os.path.join(FIXTURES, "pashub_site_notes_1_text.json")) as f: return json.load(f) def load_text_fixture_2() -> list[str]: - with open(os.path.join(FIXTURES, "site_notes_example_2_text.json")) as f: + with open(os.path.join(FIXTURES, "pashub_site_notes_2_text.json")) as f: return json.load(f) def load_text_fixture_3() -> list[str]: - with open(os.path.join(FIXTURES, "site_notes_example_3_text.json")) as f: + with open(os.path.join(FIXTURES, "pashub_site_notes_3_text.json")) as f: return json.load(f) def load_text_fixture_4() -> list[str]: - with open(os.path.join(FIXTURES, "site_notes_example_4_text.json")) as f: + with open(os.path.join(FIXTURES, "pashub_site_notes_4_text.json")) as f: return json.load(f) def load_text_fixture_5() -> list[str]: - with open(os.path.join(FIXTURES, "site_notes_example_5_text.json")) as f: + with open(os.path.join(FIXTURES, "pashub_site_notes_5_text.json")) as f: return json.load(f) def load_text_fixture_6() -> list[str]: - with open(os.path.join(FIXTURES, "site_notes_example_6_text.json")) as f: + with open(os.path.join(FIXTURES, "pashub_site_notes_6_text.json")) as f: return json.load(f) diff --git a/backend/documents_parser/tests/test_pdf.py b/backend/documents_parser/tests/test_pdf.py index 3a6dd2fb..d7492659 100644 --- a/backend/documents_parser/tests/test_pdf.py +++ b/backend/documents_parser/tests/test_pdf.py @@ -5,8 +5,8 @@ import pytest from backend.documents_parser.pdf import pdf_to_text_list -PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf") -FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "site_notes_example_text.json") +PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_1.pdf") +FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "pashub_site_notes_1_text.json") @pytest.fixture From 540ee2c3c1579863d529caa558fa23734738c395 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 13:07:20 +0000 Subject: [PATCH 05/24] Elmhurst site notes dataclasses --- datatypes/epc/surveys/elmhurst_site_notes.py | 235 +++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 datatypes/epc/surveys/elmhurst_site_notes.py diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py new file mode 100644 index 00000000..d1fabc73 --- /dev/null +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -0,0 +1,235 @@ +from dataclasses import dataclass +from datetime import date +from typing import List, Optional + + +@dataclass +class SurveyorInfo: + surveyor_code: str + name: str + title: str + tel_number: str + survey_reference: str + my_reference: Optional[str] = None + + +@dataclass +class PropertyDetails: + rdsap_version: str + reference_number: str + lodgement_required: bool + regs_region: str + epc_language: str + postcode: str + region: str + street: str + town: str + tenure: str + transaction_type: str + inspection_date: date + process_date: date + epc_exists: bool + uprn: Optional[str] = None + house_name: Optional[str] = None + house_number: Optional[str] = None + locality: Optional[str] = None + county: Optional[str] = None + + +@dataclass +class FloorDimension: + name: str # e.g. "Lowest Floor" + area_m2: float + room_height_m: float + heat_loss_perimeter_m: float + party_wall_length_m: float + + +@dataclass +class BuildingPartDimensions: + dimension_type: str # e.g. "Internal" + floors: List[FloorDimension] + + +@dataclass +class WallDetails: + wall_type: str # e.g. "CA Cavity" + insulation: str # e.g. "F Filled Cavity" + thickness_unknown: bool + u_value_known: bool + party_wall_type: str # e.g. "U Unable to determine" + thickness_mm: Optional[int] = None + + +@dataclass +class RoofDetails: + roof_type: str # e.g. "PA Pitched (slates/tiles), access to loft" + insulation: str # e.g. "J Joists" + u_value_known: bool + insulation_thickness_mm: Optional[int] = None + + +@dataclass +class FloorDetails: + location: str # e.g. "G Ground floor" + floor_type: str # e.g. "N Suspended, not timber" + insulation: str # e.g. "A As built" + u_value_known: bool + default_u_value: Optional[float] = None + + +@dataclass +class Window: + width_m: float + height_m: float + area_m2: float + glazing_type: str + frame_factor: float + building_part: str + location: str + orientation: str + data_source: str + u_value: float + g_value: float + draught_proofed: bool + permanent_shutters: str # e.g. "None" + frame_type: Optional[str] = None + glazing_gap: Optional[str] = None + + +@dataclass +class VentilationAndCooling: + open_chimneys_count: int + open_flues_count: int + open_chimneys_closed_fire_count: int + solid_fuel_boiler_flues_count: int + other_heater_flues_count: int + blocked_chimneys_count: int + extract_fans_count: int + passive_vents_count: int + flueless_gas_fires_count: int + fixed_space_cooling: bool + draught_lobby: str # e.g. "Not present" + mechanical_ventilation: bool + pressure_test_method: str # e.g. "Not available" + + +@dataclass +class Lighting: + total_bulbs: int + led_cfl_count_known: bool + led_count: int + cfl_count: int + incandescent_count: int + + +@dataclass +class MainHeating: + heat_emitter: str # e.g. "Radiators" + fuel_type: str # e.g. "Mains gas" + flue_type: str # e.g. "Balanced" + fan_assisted_flue: bool + design_flow_temperature: str # e.g. "Unknown" + heating_controls_ees: str # e.g. "CBE" + heating_controls_sap: str # e.g. "SAP code 2106, Programmer, room thermostat and TRVs" + percentage_of_heat: int + pcdf_boiler_reference: Optional[str] = None # e.g. "17742 Potterton, Promax 33 Combi ErP, 88.30%" + heat_pump_age: Optional[str] = None + + +@dataclass +class Meters: + electricity_meter_type: str # e.g. "Single" + main_gas: bool + electricity_smart_meter: bool + gas_smart_meter: bool + + +@dataclass +class WaterHeating: + water_heating_code: str # e.g. "HWP" + water_heating_sap_code: int + water_heating_fuel_type: str + hot_water_cylinder_present: bool + + +@dataclass +class Shower: + shower_number: int + outlet_type: str + connected: str # e.g. "None" + + +@dataclass +class BathsAndShowers: + number_of_baths: int + number_of_baths_connected: int + showers: List[Shower] + + +@dataclass +class Renewables: + solar_water_heating: bool + wwhrs_present: bool + flue_gas_heat_recovery_present: bool + photovoltaic_panel: str # e.g. "None" + export_capable_meter: bool + wind_turbine_present: bool + wind_turbines_terrain_type: str + hydro_electricity_generated_kwh: float + + +@dataclass +class ElmhurstSiteNotes: + surveyor_info: SurveyorInfo + property_details: PropertyDetails + + # Section 1.0 + property_type: str # e.g. "B Bungalow" + attachment: str # e.g. "E End-Terrace" + + # Section 2.0 + number_of_storeys: int + habitable_rooms: int + heated_habitable_rooms: int + + # Section 3.0 + construction_age_band: str # e.g. "D 1950-1966" + + # Section 4.0 + dimensions: BuildingPartDimensions + + # Section 5.0 + has_conservatory: bool + + # Sections 7.0–9.0 + walls: WallDetails + roof: RoofDetails + floor: FloorDetails + + # Section 10.0 + door_count: int + insulated_door_count: int + + # Section 11.0 + windows: List[Window] + draught_proofing_percent: int + + # Section 12.0 + ventilation: VentilationAndCooling + + # Section 13.0 + lighting: Lighting + + # Section 14.0–14.2 + main_heating: MainHeating + meters: Meters + + # Section 15.0 + water_heating: WaterHeating + + # Section 1x.0 + baths_and_showers: BathsAndShowers + + # Sections 16.0–22.0 + renewables: Renewables From a8579db4d98b47a76cac229af1006014541d57fb Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 13:09:30 +0000 Subject: [PATCH 06/24] elmhurst site notes fixture --- .../tests/fixtures/elmhurst_site_notes_1_text.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 backend/documents_parser/tests/fixtures/elmhurst_site_notes_1_text.json diff --git a/backend/documents_parser/tests/fixtures/elmhurst_site_notes_1_text.json b/backend/documents_parser/tests/fixtures/elmhurst_site_notes_1_text.json new file mode 100644 index 00000000..3e21bc51 --- /dev/null +++ b/backend/documents_parser/tests/fixtures/elmhurst_site_notes_1_text.json @@ -0,0 +1,6 @@ +[ + "Summary Information\nSurveyor:\nP960-0001\nName:\nRichard Matthew Ratcliff\nTitle: Mr.\nTel Number: 07760 443 469\nSurvey Reference:\n001573\nMy Reference:\nCurrent SAP rating:\nC 69\nPotential SAP rating: C 77\nEmissions (t/year):\n1.683 tonnes\nCurrent EI rating:\nC 76\nPotential EI rating:\nB 81\nFuel Bill:\n\u00a3896\nProperty Details:\nRdSAP version:\nRdSAP10\nReference Number:\nP960-0001-001573\nMy Reference:\nLodgement Required:\nNo\nRegs Region:\nEngland\nEPC Language:\nEnglish\nUPRN:\nPostcode:\nBB10 1XX\nRegion:\nWest Pennines\nHouse Name:\nHouse No:\n19\nStreet:\nQueens Road\nLocality:\nTown:\nBURNLEY\nCounty:\nProperty Tenure:\nRented (social)\nTransaction Type:\nGrant scheme\nInspection Date:\n06/03/2026\nProcess date:\n06/03/2026\nCheck for the existence of\nan EPC:\nNo\nDoes an EPC exist at the\npoint of carrying out this\nenergy assessment:\nNo\nReason why another energy\nassessment needs to be\nundertaken:\nRdSAP Inputs\nProperty Description:\n1.0 Property type:\nB Bungalow\nE End-Terrace\n2.0 Number of\nStoreys:\n1\nHabitable Rooms:\n2\nHeated Habitable Rooms:\n2\n3.0 Date Built:\nMain Property\nD 1950-1966\n4.0 Dimensions:\nDimension type:\nInternal\nMain Property\nFloor\nArea\n[m2]\nRoom\nHeight\n[m]\nHeat Loss\nWall Perimeter\n[m]\nParty Wall\nLength\n[m]\nLowest Floor:\n44.89\n2.24\n20.10\n6.70\nNo\n5.0 Conservatory:\nIs there a conservatory?\nNo\n7.0 Walls:\nMain Property\nType\nCA Cavity\nInsulation\nF Filled Cavity\nWall Thickness Unknown\nNo\nWall Thickness\n300 mm\nU-value Known\nNo\nParty Wall Type\nU Unable to determine\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n", + "Summary Information\n8.0 Roofs:\nMain Property\nType\nPA Pitched (slates/tiles), access to loft\nInsulation\nJ Joists\nInsulation Thickness\n270 mm\nU-value Known\nNo\n8.1 Rooms in Roof:\n9.0 Floors:\nMain Property\nLocation\nG Ground floor\nType\nN Suspended, not timber\nInsulation\nA As built\nDefault U-value\n0.69\nU-value Known\nNo\n10.0 Doors:\nTotal Number of Doors\n0\nNumber of Insulated Doors\n0\n11.0 Windows:\nW\nH\nArea Glazing Type\nFrame \nType\nFrame \nFactor\nGlazing \nGap\nBuilding \nPart\nLocation\nOrient. Data-Source\nU \nvalue\ng \nvalue\nDraught \nProofed\nPermanent \nShutters\n1.30\n1.10\n1.43\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nNorth\nManufacturer\n1.40\n0.72\nYes\nNone\n1.80\n1.00\n1.80\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nNorth\nManufacturer\n1.40\n0.72\nYes\nNone\n0.70\n0.80\n0.56\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nSouth\nManufacturer\n1.40\n0.72\nYes\nNone\n0.70\n1.30\n0.91\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nSouth\nManufacturer\n1.40\n0.72\nYes\nNone\nDraught Proofing\n100 %\n12.0 Ventilation & Cooling\nNo. of open chimneys\n0\nNo. of open flues\n0\nNo. of open chimneys/open flues attached to closed fire\n0\nNo. of flues attached to solid fuel boiler\n0\nNo. of open flues attached to other heater\n0\nNo. of blocked chimneys\n0\nNo. of intermittent extract fans\n2\nNo. of passive vents\n0\nNo. of flueless gas fires\n0\nFixed Space Cooling\nNo\nDraught Lobby\nNot present\n12.1 Mechanical Ventilation\nMechanical Ventilation\nNo\n12.2 Air Pressure Test\nTest Method\nNot available\n13.0 Lighting\nTotal number of bulbs\n8\nNumber of LED and CFL Known\nYes\nNumber of LED lights\n4\nNumber of CFL lights\n4\nTotal number of Low Energy\n8\nTotal number of incandescents\n0\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n", + "Summary Information\n14.0 Main Heating1\nPCDF boiler Reference\n17742 Potterton, Promax 33 Combi ErP, 88.30%\nHeat Emitter\nRadiators\nHeat pump age\nUnknown\nFuel Type\nMains gas\nFlue Type\nBalanced\nFan Assisted Flue\nYes\nDesign flow temperature\nUnknown\nPCDF Heating Controls\n0 \nMain Heating Controls EES\nCBE\nMain Heating Controls Sap\nSAP code 2106, Programmer, room thermostat and TRVs\nPCDF Compensator\n0 \nPercentage of Heat\n100 %\n14.1 Main Heating2\nPCDF boiler Reference\n0 \nMain Heating EES Code\nMain Heating SAP Code\n0\nPercentage of Heat\n0 %\n14.1 Community Heating/Heat Network\nHeating Type\nNone\n14.2 Meters\nElectricity meter type\nSingle\nMain gas\nYes\nElectricity Smart Meter Present\nNo\nGas Smart Meter Present\nNo\n15.0 Water Heating\nWater Heating Code\nHWP\nWater Heating SapCode\n901\nWater Heating Fuel Type\nMains gas\n15.1 Hot Water Cylinder\nHot Water Cylinder Present\nNo\n15.2 Community Hot Water\nPCDF boiler Reference\n0\n16.0 Solar water heating\nSolar Water Heating\nNo\n17.0 Waste Water Heat Recovery System\nIs WWHRS present in the property?\nNo / Unknown\n1x.0 Baths and Showers\nTotal Number of Baths\n0\nNumber of Baths Connected\n0\nDescription\nType\nConnected\n1\nElectric shower\nNone\n18.0 Flue Gas Heat Recovery System\nPresent\nNo\n19.0 Photovoltaic Panel\nPhotovoltaic Panel\nNone\nExport capable meter\nNo\n20.0 Wind Turbine\nTerrain Type\nSuburban\nWind turbine present?\nNo\n22.0 Special Features\n21.0 Small-Scale Hydro\nElectricity generated [kWh/year]\n0.00\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n", + "Summary Information\nRecommendations\nLoft insulation (Already installed)\nFlat roof insulation (Not applicable)\nRoom-in-roof insulation (Not applicable)\nCavity wall insulation (Already installed)\nSolid wall insulation (Not applicable)\nFloor insulation (suspended floor) (Recommended)\nHot water cylinder insulation (Not applicable)\nDraught proofing (Already installed)\nLow energy lighting (Already installed)\nCylinder thermostat (Not applicable)\nHeating controls for wet central heating system (Already installed)\nUpgrade boiler, same fuel (Already installed)\nChange heating to condensing gas condensing boiler (fuel switch) (Not applicable)\nFlue gas heat recovery in conjunction with new boiler (Not applicable)\nSolar water heating (SAP increase too small)\nHeat recovery system for mixer showers (Not applicable)\nDouble glazed windows (Already installed)\nInsulated doors (Already installed)\nSolar photovoltaic panels (Recommended)\nWind turbine (Not applicable)\nPV diverter (Not applicable)\nPV battery (Not applicable)\nWater heating controls (Not applicable)\nAlternative Recommendations\nExternal wall insulation with cavity insulation (Not applicable)\nBiomass boiler (alternative) (Not applicable)\nMicro CHP (alternative) (Not applicable)\nRelated Party Disclosure\nAddenda\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n" +] \ No newline at end of file From 1a53a8d83ee90beae1772775b18a3c785dbfed2c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 13:13:24 +0000 Subject: [PATCH 07/24] =?UTF-8?q?Extract=20Elmhurst=20site=20notes=20to=20?= =?UTF-8?q?dataclass=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_elmhurst_extractor.py | 433 ++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 backend/documents_parser/tests/test_elmhurst_extractor.py diff --git a/backend/documents_parser/tests/test_elmhurst_extractor.py b/backend/documents_parser/tests/test_elmhurst_extractor.py new file mode 100644 index 00000000..99670ee8 --- /dev/null +++ b/backend/documents_parser/tests/test_elmhurst_extractor.py @@ -0,0 +1,433 @@ +import json +import os +from datetime import date + +import pytest + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.surveys.elmhurst_site_notes import ( + BathsAndShowers, + BuildingPartDimensions, + ElmhurstSiteNotes, + FloorDetails, + FloorDimension, + Lighting, + MainHeating, + Meters, + PropertyDetails, + Renewables, + RoofDetails, + Shower, + SurveyorInfo, + VentilationAndCooling, + WallDetails, + WaterHeating, + Window, +) + +FIXTURE_PATH = os.path.join( + os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json" +) + + +@pytest.fixture(scope="module") +def result() -> ElmhurstSiteNotes: + with open(FIXTURE_PATH) as f: + pages = json.load(f) + return ElmhurstSiteNotesExtractor(pages).extract() + + +class TestSurveyorInfo: + def test_surveyor_code(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.surveyor_code == "P960-0001" + + def test_name(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.name == "Richard Matthew Ratcliff" + + def test_title(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.title == "Mr." + + def test_tel_number(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.tel_number == "07760 443 469" + + def test_survey_reference(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.survey_reference == "001573" + + def test_my_reference_none(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.my_reference is None + + +class TestPropertyDetails: + def test_rdsap_version(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.rdsap_version == "RdSAP10" + + def test_reference_number(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.reference_number == "P960-0001-001573" + + def test_lodgement_required(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.lodgement_required is False + + def test_regs_region(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.regs_region == "England" + + def test_epc_language(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.epc_language == "English" + + def test_uprn_none(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.uprn is None + + def test_postcode(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.postcode == "BB10 1XX" + + def test_region(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.region == "West Pennines" + + def test_house_name_none(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.house_name is None + + def test_house_number(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.house_number == "19" + + def test_street(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.street == "Queens Road" + + def test_locality_none(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.locality is None + + def test_town(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.town == "BURNLEY" + + def test_county_none(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.county is None + + def test_tenure(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.tenure == "Rented (social)" + + def test_transaction_type(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.transaction_type == "Grant scheme" + + def test_inspection_date(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.inspection_date == date(2026, 3, 6) + + def test_process_date(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.process_date == date(2026, 3, 6) + + def test_epc_exists(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.epc_exists is False + + +class TestPropertyDescription: + def test_property_type(self, result: ElmhurstSiteNotes) -> None: + assert result.property_type == "B Bungalow" + + def test_attachment(self, result: ElmhurstSiteNotes) -> None: + assert result.attachment == "E End-Terrace" + + def test_number_of_storeys(self, result: ElmhurstSiteNotes) -> None: + assert result.number_of_storeys == 1 + + def test_habitable_rooms(self, result: ElmhurstSiteNotes) -> None: + assert result.habitable_rooms == 2 + + def test_heated_habitable_rooms(self, result: ElmhurstSiteNotes) -> None: + assert result.heated_habitable_rooms == 2 + + def test_construction_age_band(self, result: ElmhurstSiteNotes) -> None: + assert result.construction_age_band == "D 1950-1966" + + def test_has_conservatory(self, result: ElmhurstSiteNotes) -> None: + assert result.has_conservatory is False + + +class TestDimensions: + def test_dimension_type(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.dimension_type == "Internal" + + def test_floor_count(self, result: ElmhurstSiteNotes) -> None: + assert len(result.dimensions.floors) == 1 + + def test_floor_name(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.floors[0].name == "Lowest Floor" + + def test_floor_area(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.floors[0].area_m2 == 44.89 + + def test_floor_room_height(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.floors[0].room_height_m == 2.24 + + def test_floor_heat_loss_perimeter(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.floors[0].heat_loss_perimeter_m == 20.10 + + def test_floor_party_wall_length(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.floors[0].party_wall_length_m == 6.70 + + +class TestWalls: + def test_wall_type(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.wall_type == "CA Cavity" + + def test_insulation(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.insulation == "F Filled Cavity" + + def test_thickness_unknown(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.thickness_unknown is False + + def test_thickness_mm(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.thickness_mm == 300 + + def test_u_value_known(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.u_value_known is False + + def test_party_wall_type(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.party_wall_type == "U Unable to determine" + + +class TestRoof: + def test_roof_type(self, result: ElmhurstSiteNotes) -> None: + assert result.roof.roof_type == "PA Pitched (slates/tiles), access to loft" + + def test_insulation(self, result: ElmhurstSiteNotes) -> None: + assert result.roof.insulation == "J Joists" + + def test_insulation_thickness_mm(self, result: ElmhurstSiteNotes) -> None: + assert result.roof.insulation_thickness_mm == 270 + + def test_u_value_known(self, result: ElmhurstSiteNotes) -> None: + assert result.roof.u_value_known is False + + +class TestFloor: + def test_location(self, result: ElmhurstSiteNotes) -> None: + assert result.floor.location == "G Ground floor" + + def test_floor_type(self, result: ElmhurstSiteNotes) -> None: + assert result.floor.floor_type == "N Suspended, not timber" + + def test_insulation(self, result: ElmhurstSiteNotes) -> None: + assert result.floor.insulation == "A As built" + + def test_default_u_value(self, result: ElmhurstSiteNotes) -> None: + assert result.floor.default_u_value == 0.69 + + def test_u_value_known(self, result: ElmhurstSiteNotes) -> None: + assert result.floor.u_value_known is False + + +class TestDoors: + def test_door_count(self, result: ElmhurstSiteNotes) -> None: + assert result.door_count == 0 + + def test_insulated_door_count(self, result: ElmhurstSiteNotes) -> None: + assert result.insulated_door_count == 0 + + +class TestWindows: + def test_window_count(self, result: ElmhurstSiteNotes) -> None: + assert len(result.windows) == 4 + + def test_draught_proofing_percent(self, result: ElmhurstSiteNotes) -> None: + assert result.draught_proofing_percent == 100 + + def test_first_window_dimensions(self, result: ElmhurstSiteNotes) -> None: + w = result.windows[0] + assert w.width_m == 1.30 + assert w.height_m == 1.10 + assert w.area_m2 == 1.43 + + def test_first_window_glazing(self, result: ElmhurstSiteNotes) -> None: + w = result.windows[0] + assert w.glazing_type == "Double post or during 2022" + assert w.frame_factor == 0.70 + + def test_first_window_location(self, result: ElmhurstSiteNotes) -> None: + w = result.windows[0] + assert w.building_part == "Main" + assert w.location == "External wall" + assert w.orientation == "North" + + def test_first_window_performance(self, result: ElmhurstSiteNotes) -> None: + w = result.windows[0] + assert w.data_source == "Manufacturer" + assert w.u_value == 1.40 + assert w.g_value == 0.72 + assert w.draught_proofed is True + assert w.permanent_shutters == "None" + + def test_third_window_orientation(self, result: ElmhurstSiteNotes) -> None: + assert result.windows[2].orientation == "South" + + def test_fourth_window_dimensions(self, result: ElmhurstSiteNotes) -> None: + w = result.windows[3] + assert w.width_m == 0.70 + assert w.height_m == 1.30 + assert w.area_m2 == 0.91 + + +class TestVentilation: + def test_open_chimneys(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.open_chimneys_count == 0 + + def test_open_flues(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.open_flues_count == 0 + + def test_open_chimneys_closed_fire(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.open_chimneys_closed_fire_count == 0 + + def test_solid_fuel_boiler_flues(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.solid_fuel_boiler_flues_count == 0 + + def test_other_heater_flues(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.other_heater_flues_count == 0 + + def test_blocked_chimneys(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.blocked_chimneys_count == 0 + + def test_extract_fans(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.extract_fans_count == 2 + + def test_passive_vents(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.passive_vents_count == 0 + + def test_flueless_gas_fires(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.flueless_gas_fires_count == 0 + + def test_fixed_space_cooling(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.fixed_space_cooling is False + + def test_draught_lobby(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.draught_lobby == "Not present" + + def test_mechanical_ventilation(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.mechanical_ventilation is False + + def test_pressure_test_method(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.pressure_test_method == "Not available" + + +class TestLighting: + def test_total_bulbs(self, result: ElmhurstSiteNotes) -> None: + assert result.lighting.total_bulbs == 8 + + def test_led_cfl_count_known(self, result: ElmhurstSiteNotes) -> None: + assert result.lighting.led_cfl_count_known is True + + def test_led_count(self, result: ElmhurstSiteNotes) -> None: + assert result.lighting.led_count == 4 + + def test_cfl_count(self, result: ElmhurstSiteNotes) -> None: + assert result.lighting.cfl_count == 4 + + def test_incandescent_count(self, result: ElmhurstSiteNotes) -> None: + assert result.lighting.incandescent_count == 0 + + +class TestMainHeating: + def test_pcdf_boiler_reference(self, result: ElmhurstSiteNotes) -> None: + assert ( + result.main_heating.pcdf_boiler_reference + == "17742 Potterton, Promax 33 Combi ErP, 88.30%" + ) + + def test_heat_emitter(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.heat_emitter == "Radiators" + + def test_heat_pump_age(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.heat_pump_age == "Unknown" + + def test_fuel_type(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.fuel_type == "Mains gas" + + def test_flue_type(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.flue_type == "Balanced" + + def test_fan_assisted_flue(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.fan_assisted_flue is True + + def test_design_flow_temperature(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.design_flow_temperature == "Unknown" + + def test_heating_controls_ees(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.heating_controls_ees == "CBE" + + def test_heating_controls_sap(self, result: ElmhurstSiteNotes) -> None: + assert ( + result.main_heating.heating_controls_sap + == "SAP code 2106, Programmer, room thermostat and TRVs" + ) + + def test_percentage_of_heat(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.percentage_of_heat == 100 + + +class TestMeters: + def test_electricity_meter_type(self, result: ElmhurstSiteNotes) -> None: + assert result.meters.electricity_meter_type == "Single" + + def test_main_gas(self, result: ElmhurstSiteNotes) -> None: + assert result.meters.main_gas is True + + def test_electricity_smart_meter(self, result: ElmhurstSiteNotes) -> None: + assert result.meters.electricity_smart_meter is False + + def test_gas_smart_meter(self, result: ElmhurstSiteNotes) -> None: + assert result.meters.gas_smart_meter is False + + +class TestWaterHeating: + def test_water_heating_code(self, result: ElmhurstSiteNotes) -> None: + assert result.water_heating.water_heating_code == "HWP" + + def test_water_heating_sap_code(self, result: ElmhurstSiteNotes) -> None: + assert result.water_heating.water_heating_sap_code == 901 + + def test_water_heating_fuel_type(self, result: ElmhurstSiteNotes) -> None: + assert result.water_heating.water_heating_fuel_type == "Mains gas" + + def test_hot_water_cylinder_present(self, result: ElmhurstSiteNotes) -> None: + assert result.water_heating.hot_water_cylinder_present is False + + +class TestBathsAndShowers: + def test_number_of_baths(self, result: ElmhurstSiteNotes) -> None: + assert result.baths_and_showers.number_of_baths == 0 + + def test_number_of_baths_connected(self, result: ElmhurstSiteNotes) -> None: + assert result.baths_and_showers.number_of_baths_connected == 0 + + def test_shower_count(self, result: ElmhurstSiteNotes) -> None: + assert len(result.baths_and_showers.showers) == 1 + + def test_shower_number(self, result: ElmhurstSiteNotes) -> None: + assert result.baths_and_showers.showers[0].shower_number == 1 + + def test_shower_outlet_type(self, result: ElmhurstSiteNotes) -> None: + assert result.baths_and_showers.showers[0].outlet_type == "Electric shower" + + def test_shower_connected(self, result: ElmhurstSiteNotes) -> None: + assert result.baths_and_showers.showers[0].connected == "None" + + +class TestRenewables: + def test_solar_water_heating(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.solar_water_heating is False + + def test_wwhrs_present(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.wwhrs_present is False + + def test_flue_gas_heat_recovery_present(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.flue_gas_heat_recovery_present is False + + def test_photovoltaic_panel(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.photovoltaic_panel == "None" + + def test_export_capable_meter(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.export_capable_meter is False + + def test_wind_turbine_present(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.wind_turbine_present is False + + def test_wind_turbines_terrain_type(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.wind_turbines_terrain_type == "Suburban" + + def test_hydro_electricity_generated_kwh(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.hydro_electricity_generated_kwh == 0.0 From f61add95441454e88703f2eb7a187ad0e0f9004a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 13:32:08 +0000 Subject: [PATCH 08/24] =?UTF-8?q?Extract=20Elmhurst=20site=20notes=20to=20?= =?UTF-8?q?dataclass=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../documents_parser/elmhurst_extractor.py | 418 ++++++++++++++++++ .../tests/test_elmhurst_extractor.py | 2 +- 2 files changed, 419 insertions(+), 1 deletion(-) create mode 100644 backend/documents_parser/elmhurst_extractor.py diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py new file mode 100644 index 00000000..919c0f9a --- /dev/null +++ b/backend/documents_parser/elmhurst_extractor.py @@ -0,0 +1,418 @@ +import re +from datetime import date, datetime +from typing import List, Optional + +from datatypes.epc.surveys.elmhurst_site_notes import ( + BathsAndShowers, + BuildingPartDimensions, + ElmhurstSiteNotes, + FloorDetails, + FloorDimension, + Lighting, + MainHeating, + Meters, + PropertyDetails, + Renewables, + RoofDetails, + Shower, + SurveyorInfo, + VentilationAndCooling, + WallDetails, + WaterHeating, + Window, +) + + +class ElmhurstSiteNotesExtractor: + def __init__(self, pages: List[str]) -> None: + self._text = "\n".join(pages) + self._lines = [l.strip() for l in self._text.splitlines() if l.strip()] + + # --- generic helpers --- + + def _next_val(self, label: str) -> Optional[str]: + lc = label.rstrip(":") + ":" + lb = label.rstrip(":") + for i, line in enumerate(self._lines): + if line.startswith(lc) and len(line) > len(lc): + return line[len(lc):].strip() or None + if line == lc or line == lb: + for j in range(i + 1, min(i + 4, len(self._lines))): + v = self._lines[j] + if v.endswith(":") or v.startswith("©"): + return None + if v: + return v + return None + return None + + def _str_val(self, label: str) -> str: + v = self._next_val(label) + return " ".join(v.split()) if v else "" + + def _opt_str(self, label: str) -> Optional[str]: + v = self._next_val(label) + return " ".join(v.split()) if v else None + + def _bool_val(self, label: str) -> bool: + v = self._next_val(label) + return v is not None and v.lower() == "yes" + + def _int_val(self, label: str) -> int: + v = self._next_val(label) + try: + return int(v.split()[0]) if v else 0 + except (ValueError, IndexError): + return 0 + + def _date_val(self, label: str) -> date: + v = self._next_val(label) + if not v: + raise ValueError(f"Missing date for label: {label}") + return datetime.strptime(v.strip(), "%d/%m/%Y").date() + + def _between(self, start: str, end: str) -> str: + try: + s = self._text.index(start) + len(start) + e = self._text.index(end, s) + return self._text[s:e] + except ValueError: + return "" + + def _section_lines(self, start: str, end: str) -> List[str]: + text = self._between(start, end) + return [l.strip() for l in text.splitlines() if l.strip()] + + def _local_val(self, lines: List[str], label: str) -> Optional[str]: + lb = label.rstrip(":") + lc = lb + ":" + for i, line in enumerate(lines): + if line.startswith(lc) and len(line) > len(lc): + return line[len(lc):].strip() or None + if line == lc or line == lb: + for j in range(i + 1, min(i + 4, len(lines))): + v = lines[j] + if v.endswith(":") or v.startswith("©"): + return None + if v: + return v + return None + return None + + def _local_str(self, lines: List[str], label: str) -> str: + v = self._local_val(lines, label) + return " ".join(v.split()) if v else "" + + def _local_bool(self, lines: List[str], label: str) -> bool: + v = self._local_val(lines, label) + return v is not None and v.lower() == "yes" + + # --- section extractors --- + + def _extract_surveyor_info(self) -> SurveyorInfo: + return SurveyorInfo( + surveyor_code=self._str_val("Surveyor"), + name=self._str_val("Name"), + title=self._str_val("Title"), + tel_number=self._str_val("Tel Number"), + survey_reference=self._str_val("Survey Reference"), + my_reference=self._opt_str("My Reference"), + ) + + def _extract_property_details(self) -> PropertyDetails: + epc_m = re.search( + r"Check for the existence of\nan EPC:\n(Yes|No)", self._text + ) + epc_exists = epc_m.group(1).lower() == "yes" if epc_m else False + + return PropertyDetails( + rdsap_version=self._str_val("RdSAP version"), + reference_number=self._str_val("Reference Number"), + lodgement_required=self._bool_val("Lodgement Required"), + regs_region=self._str_val("Regs Region"), + epc_language=self._str_val("EPC Language"), + postcode=self._str_val("Postcode"), + region=self._str_val("Region"), + street=self._str_val("Street"), + town=self._str_val("Town"), + tenure=self._str_val("Property Tenure"), + transaction_type=self._str_val("Transaction Type"), + inspection_date=self._date_val("Inspection Date"), + process_date=self._date_val("Process date"), + epc_exists=epc_exists, + uprn=self._opt_str("UPRN"), + house_name=self._opt_str("House Name"), + house_number=self._opt_str("House No"), + locality=self._opt_str("Locality"), + county=self._opt_str("County"), + ) + + def _extract_attachment(self) -> str: + m = re.search(r"1\.0 Property type:\n[^\n]+\n([^\n]+)", self._text) + return " ".join(m.group(1).strip().split()) if m else "" + + def _extract_dimensions(self) -> BuildingPartDimensions: + dim_type = self._str_val("Dimension type") + section = self._between("4.0 Dimensions:", "5.0 Conservatory:") + floor_matches = re.findall( + r"([A-Za-z ]+Floor):\n([\d.]+)\n([\d.]+)\n([\d.]+)\n([\d.]+)", + section, + ) + floors = [ + FloorDimension( + name=name.strip(), + area_m2=float(area), + room_height_m=float(height), + heat_loss_perimeter_m=float(hlp), + party_wall_length_m=float(pwl), + ) + for name, area, height, hlp, pwl in floor_matches + ] + return BuildingPartDimensions(dimension_type=dim_type, floors=floors) + + def _extract_walls(self) -> WallDetails: + lines = self._section_lines("7.0 Walls:", "8.0 Roofs:") + thickness_raw = self._local_val(lines, "Wall Thickness") + thickness_mm = ( + int(thickness_raw.split()[0]) if thickness_raw else None + ) + return WallDetails( + wall_type=self._local_str(lines, "Type"), + insulation=self._local_str(lines, "Insulation"), + thickness_unknown=self._local_bool(lines, "Wall Thickness Unknown"), + u_value_known=self._local_bool(lines, "U-value Known"), + party_wall_type=self._local_str(lines, "Party Wall Type"), + thickness_mm=thickness_mm, + ) + + def _extract_roof(self) -> RoofDetails: + lines = self._section_lines("8.0 Roofs:", "8.1 Rooms in Roof:") + thickness_raw = self._local_val(lines, "Insulation Thickness") + thickness_mm = ( + int(thickness_raw.split()[0]) if thickness_raw else None + ) + return RoofDetails( + roof_type=self._local_str(lines, "Type"), + insulation=self._local_str(lines, "Insulation"), + u_value_known=self._local_bool(lines, "U-value Known"), + insulation_thickness_mm=thickness_mm, + ) + + def _extract_floor(self) -> FloorDetails: + lines = self._section_lines("9.0 Floors:", "10.0 Doors:") + u_val_raw = self._local_val(lines, "Default U-value") + default_u = float(u_val_raw) if u_val_raw else None + return FloorDetails( + location=self._local_str(lines, "Location"), + floor_type=self._local_str(lines, "Type"), + insulation=self._local_str(lines, "Insulation"), + u_value_known=self._local_bool(lines, "U-value Known"), + default_u_value=default_u, + ) + + def _extract_windows(self) -> List[Window]: + m = re.search( + r"Permanent\s+Shutters\n(.*?)Draught Proofing", + self._text, + re.DOTALL, + ) + if not m: + return [] + tokens = [t.strip() for t in m.group(1).splitlines() if t.strip()] + windows: List[Window] = [] + i = 0 + while i + 12 < len(tokens): + try: + width_m = float(tokens[i]) + height_m = float(tokens[i + 1]) + area_m2 = float(tokens[i + 2]) + except (ValueError, IndexError): + i += 1 + continue + i += 3 + # Collect glazing type until frame_factor (0 < v ≤ 1.0) + glazing_parts: List[str] = [] + while i < len(tokens): + try: + v = float(tokens[i]) + if 0.0 < v <= 1.0: + break + glazing_parts.append(tokens[i]) + except ValueError: + glazing_parts.append(tokens[i]) + i += 1 + glazing_type = " ".join(glazing_parts).strip() + if i >= len(tokens): + break + frame_factor = float(tokens[i]); i += 1 + building_part = tokens[i]; i += 1 + location = tokens[i]; i += 1 + orientation = tokens[i]; i += 1 + data_source = tokens[i]; i += 1 + u_value = float(tokens[i]); i += 1 + g_value = float(tokens[i]); i += 1 + draught_proofed = tokens[i].lower() == "yes"; i += 1 + permanent_shutters = tokens[i]; i += 1 + windows.append( + Window( + width_m=width_m, + height_m=height_m, + area_m2=area_m2, + glazing_type=glazing_type, + frame_factor=frame_factor, + building_part=building_part, + location=location, + orientation=orientation, + data_source=data_source, + u_value=u_value, + g_value=g_value, + draught_proofed=draught_proofed, + permanent_shutters=permanent_shutters, + ) + ) + return windows + + def _extract_ventilation(self) -> VentilationAndCooling: + return VentilationAndCooling( + open_chimneys_count=self._int_val("No. of open chimneys"), + open_flues_count=self._int_val("No. of open flues"), + open_chimneys_closed_fire_count=self._int_val( + "No. of open chimneys/open flues attached to closed fire" + ), + solid_fuel_boiler_flues_count=self._int_val( + "No. of flues attached to solid fuel boiler" + ), + other_heater_flues_count=self._int_val( + "No. of open flues attached to other heater" + ), + blocked_chimneys_count=self._int_val("No. of blocked chimneys"), + extract_fans_count=self._int_val("No. of intermittent extract fans"), + passive_vents_count=self._int_val("No. of passive vents"), + flueless_gas_fires_count=self._int_val("No. of flueless gas fires"), + fixed_space_cooling=self._bool_val("Fixed Space Cooling"), + draught_lobby=self._str_val("Draught Lobby"), + mechanical_ventilation=self._bool_val("Mechanical Ventilation"), + pressure_test_method=self._str_val("Test Method"), + ) + + def _extract_lighting(self) -> Lighting: + return Lighting( + total_bulbs=self._int_val("Total number of bulbs"), + led_cfl_count_known=self._bool_val("Number of LED and CFL Known"), + led_count=self._int_val("Number of LED lights"), + cfl_count=self._int_val("Number of CFL lights"), + incandescent_count=self._int_val("Total number of incandescents"), + ) + + def _extract_main_heating(self) -> MainHeating: + lines = self._section_lines("14.0 Main Heating1", "14.1 Main Heating2") + pct_raw = self._local_val(lines, "Percentage of Heat") + pct = int(pct_raw.split()[0]) if pct_raw else 0 + return MainHeating( + heat_emitter=self._local_str(lines, "Heat Emitter"), + fuel_type=self._local_str(lines, "Fuel Type"), + flue_type=self._local_str(lines, "Flue Type"), + fan_assisted_flue=self._local_bool(lines, "Fan Assisted Flue"), + design_flow_temperature=self._local_str(lines, "Design flow temperature"), + heating_controls_ees=self._local_str(lines, "Main Heating Controls EES"), + heating_controls_sap=self._local_str(lines, "Main Heating Controls Sap"), + percentage_of_heat=pct, + pcdf_boiler_reference=self._local_val(lines, "PCDF boiler Reference"), + heat_pump_age=self._local_val(lines, "Heat pump age"), + ) + + def _extract_meters(self) -> Meters: + return Meters( + electricity_meter_type=self._str_val("Electricity meter type"), + main_gas=self._bool_val("Main gas"), + electricity_smart_meter=self._bool_val("Electricity Smart Meter Present"), + gas_smart_meter=self._bool_val("Gas Smart Meter Present"), + ) + + def _extract_water_heating(self) -> WaterHeating: + return WaterHeating( + water_heating_code=self._str_val("Water Heating Code"), + water_heating_sap_code=self._int_val("Water Heating SapCode"), + water_heating_fuel_type=self._str_val("Water Heating Fuel Type"), + hot_water_cylinder_present=self._bool_val("Hot Water Cylinder Present"), + ) + + def _extract_baths_and_showers(self) -> BathsAndShowers: + n_baths = self._int_val("Total Number of Baths") + n_connected = self._int_val("Number of Baths Connected") + try: + idx = self._lines.index("Connected") + except ValueError: + return BathsAndShowers( + number_of_baths=n_baths, + number_of_baths_connected=n_connected, + showers=[], + ) + showers: List[Shower] = [] + j = idx + 1 + while j + 2 <= len(self._lines) - 1: + num_line = self._lines[j] + if not num_line.isdigit(): + break + showers.append( + Shower( + shower_number=int(num_line), + outlet_type=self._lines[j + 1], + connected=self._lines[j + 2], + ) + ) + j += 3 + return BathsAndShowers( + number_of_baths=n_baths, + number_of_baths_connected=n_connected, + showers=showers, + ) + + def _extract_renewables(self) -> Renewables: + fghrs_lines = self._section_lines( + "18.0 Flue Gas Heat Recovery System", "19.0 Photovoltaic Panel" + ) + fghrs = self._local_bool(fghrs_lines, "Present") + + terrain = self._str_val("Terrain Type") + hydro_raw = self._next_val("Electricity generated [kWh/year]") + hydro = float(hydro_raw) if hydro_raw else 0.0 + + return Renewables( + solar_water_heating=self._bool_val("Solar Water Heating"), + wwhrs_present=self._bool_val("Is WWHRS present in the property?"), + flue_gas_heat_recovery_present=fghrs, + photovoltaic_panel=self._str_val("Photovoltaic Panel"), + export_capable_meter=self._bool_val("Export capable meter"), + wind_turbine_present=self._bool_val("Wind turbine present?"), + wind_turbines_terrain_type=terrain, + hydro_electricity_generated_kwh=hydro, + ) + + def extract(self) -> ElmhurstSiteNotes: + return ElmhurstSiteNotes( + surveyor_info=self._extract_surveyor_info(), + property_details=self._extract_property_details(), + property_type=self._str_val("1.0 Property type"), + attachment=self._extract_attachment(), + number_of_storeys=self._int_val("Storeys"), + habitable_rooms=self._int_val("Habitable Rooms"), + heated_habitable_rooms=self._int_val("Heated Habitable Rooms"), + construction_age_band=self._str_val("Main Property"), + dimensions=self._extract_dimensions(), + has_conservatory=self._bool_val("Is there a conservatory?"), + walls=self._extract_walls(), + roof=self._extract_roof(), + floor=self._extract_floor(), + door_count=self._int_val("Total Number of Doors"), + insulated_door_count=self._int_val("Number of Insulated Doors"), + windows=self._extract_windows(), + draught_proofing_percent=self._int_val("Draught Proofing"), + ventilation=self._extract_ventilation(), + lighting=self._extract_lighting(), + main_heating=self._extract_main_heating(), + meters=self._extract_meters(), + water_heating=self._extract_water_heating(), + baths_and_showers=self._extract_baths_and_showers(), + renewables=self._extract_renewables(), + ) diff --git a/backend/documents_parser/tests/test_elmhurst_extractor.py b/backend/documents_parser/tests/test_elmhurst_extractor.py index 99670ee8..f79f8704 100644 --- a/backend/documents_parser/tests/test_elmhurst_extractor.py +++ b/backend/documents_parser/tests/test_elmhurst_extractor.py @@ -198,7 +198,7 @@ class TestRoof: class TestFloor: def test_location(self, result: ElmhurstSiteNotes) -> None: - assert result.floor.location == "G Ground floor" + assert result.floor.location == "G Ground floor" def test_floor_type(self, result: ElmhurstSiteNotes) -> None: assert result.floor.floor_type == "N Suspended, not timber" From 15ae46ec9230d7dc7a8c1280236a80295f52d15d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 13:37:21 +0000 Subject: [PATCH 09/24] =?UTF-8?q?Map=20Elmhurst=20site=20notes=20to=20EpcP?= =?UTF-8?q?ropertyData=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_elmhurst_end_to_end.py | 297 ++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 backend/documents_parser/tests/test_elmhurst_end_to_end.py diff --git a/backend/documents_parser/tests/test_elmhurst_end_to_end.py b/backend/documents_parser/tests/test_elmhurst_end_to_end.py new file mode 100644 index 00000000..8e45e24d --- /dev/null +++ b/backend/documents_parser/tests/test_elmhurst_end_to_end.py @@ -0,0 +1,297 @@ +import json +import os +from datetime import date + +import pytest + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.domain.epc_property_data import ( + EpcPropertyData, + MainHeatingDetail, + SapBuildingPart, + SapEnergySource, + SapFloorDimension, + SapHeating, + SapVentilation, + SapWindow, + ShowerOutlet, + ShowerOutlets, +) +from datatypes.epc.domain.mapper import EpcPropertyDataMapper + +FIXTURE_PATH = os.path.join( + os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json" +) + + +@pytest.fixture(scope="module") +def result() -> EpcPropertyData: + with open(FIXTURE_PATH) as f: + pages = json.load(f) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + +class TestAddress: + def test_address_line_1(self, result: EpcPropertyData) -> None: + assert result.address_line_1 == "19, Queens Road" + + def test_post_town(self, result: EpcPropertyData) -> None: + assert result.post_town == "BURNLEY" + + def test_postcode(self, result: EpcPropertyData) -> None: + assert result.postcode == "BB10 1XX" + + +class TestInspectionInfo: + def test_inspection_date(self, result: EpcPropertyData) -> None: + assert result.inspection_date == date(2026, 3, 6) + + def test_tenure(self, result: EpcPropertyData) -> None: + assert result.tenure == "Rented (social)" + + def test_transaction_type(self, result: EpcPropertyData) -> None: + assert result.transaction_type == "Grant scheme" + + def test_report_reference(self, result: EpcPropertyData) -> None: + assert result.report_reference == "P960-0001-001573" + + +class TestPropertyDescription: + def test_property_type(self, result: EpcPropertyData) -> None: + assert result.property_type == "Bungalow" + + def test_built_form(self, result: EpcPropertyData) -> None: + assert result.built_form == "End-Terrace" + + def test_dwelling_type(self, result: EpcPropertyData) -> None: + assert result.dwelling_type == "End-Terrace bungalow" + + def test_number_of_storeys(self, result: EpcPropertyData) -> None: + assert result.number_of_storeys == 1 + + def test_has_conservatory(self, result: EpcPropertyData) -> None: + assert result.has_conservatory is False + + def test_total_floor_area(self, result: EpcPropertyData) -> None: + assert result.total_floor_area_m2 == 44.89 + + +class TestCounts: + def test_habitable_rooms_count(self, result: EpcPropertyData) -> None: + assert result.habitable_rooms_count == 2 + + def test_heated_rooms_count(self, result: EpcPropertyData) -> None: + assert result.heated_rooms_count == 2 + + def test_door_count(self, result: EpcPropertyData) -> None: + assert result.door_count == 0 + + def test_insulated_door_count(self, result: EpcPropertyData) -> None: + assert result.insulated_door_count == 0 + + def test_open_chimneys_count(self, result: EpcPropertyData) -> None: + assert result.open_chimneys_count == 0 + + def test_blocked_chimneys_count(self, result: EpcPropertyData) -> None: + assert result.blocked_chimneys_count == 0 + + +class TestLighting: + def test_led_count(self, result: EpcPropertyData) -> None: + assert result.led_fixed_lighting_bulbs_count == 4 + + def test_cfl_count(self, result: EpcPropertyData) -> None: + assert result.cfl_fixed_lighting_bulbs_count == 4 + + def test_incandescent_count(self, result: EpcPropertyData) -> None: + assert result.incandescent_fixed_lighting_bulbs_count == 0 + + +class TestFlags: + def test_solar_water_heating(self, result: EpcPropertyData) -> None: + assert result.solar_water_heating is False + + def test_has_hot_water_cylinder(self, result: EpcPropertyData) -> None: + assert result.has_hot_water_cylinder is False + + def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None: + assert result.has_fixed_air_conditioning is False + + def test_hydro(self, result: EpcPropertyData) -> None: + assert result.hydro is False + + def test_photovoltaic_array(self, result: EpcPropertyData) -> None: + assert result.photovoltaic_array is False + + +class TestBuildingPart: + def test_single_building_part(self, result: EpcPropertyData) -> None: + assert len(result.sap_building_parts) == 1 + + def test_identifier(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].identifier == "main" + + def test_construction_age_band(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].construction_age_band == "1950-1966" + + def test_wall_construction(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].wall_construction == "Cavity" + + def test_wall_insulation_type(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].wall_insulation_type == "Filled Cavity" + + def test_wall_thickness_measured(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].wall_thickness_measured is True + + def test_wall_thickness_mm(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].wall_thickness_mm == 300 + + def test_roof_insulation_location(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].roof_insulation_location == "Joists" + + def test_roof_insulation_thickness(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].roof_insulation_thickness == 270 + + def test_floor_type(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].floor_type == "Ground floor" + + def test_floor_construction_type(self, result: EpcPropertyData) -> None: + assert ( + result.sap_building_parts[0].floor_construction_type + == "Suspended, not timber" + ) + + def test_floor_insulation_type_str(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].floor_insulation_type_str == "As built" + + def test_floor_u_value_known(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].floor_u_value_known is False + + def test_single_floor_dimension(self, result: EpcPropertyData) -> None: + assert len(result.sap_building_parts[0].sap_floor_dimensions) == 1 + + def test_floor_dimension_area(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].sap_floor_dimensions[0].total_floor_area_m2 == 44.89 + + def test_floor_dimension_room_height(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].sap_floor_dimensions[0].room_height_m == 2.24 + + def test_floor_dimension_heat_loss_perimeter(self, result: EpcPropertyData) -> None: + assert ( + result.sap_building_parts[0].sap_floor_dimensions[0].heat_loss_perimeter_m + == 20.10 + ) + + def test_floor_dimension_party_wall_length(self, result: EpcPropertyData) -> None: + assert ( + result.sap_building_parts[0].sap_floor_dimensions[0].party_wall_length_m + == 6.70 + ) + + +class TestWindows: + def test_window_count(self, result: EpcPropertyData) -> None: + assert len(result.sap_windows) == 4 + + def test_first_window_width(self, result: EpcPropertyData) -> None: + assert result.sap_windows[0].window_width == 1.30 + + def test_first_window_height(self, result: EpcPropertyData) -> None: + assert result.sap_windows[0].window_height == 1.10 + + def test_first_window_orientation(self, result: EpcPropertyData) -> None: + assert result.sap_windows[0].orientation == "North" + + def test_first_window_glazing_type(self, result: EpcPropertyData) -> None: + assert result.sap_windows[0].glazing_type == "Double post or during 2022" + + def test_first_window_draught_proofed(self, result: EpcPropertyData) -> None: + assert result.sap_windows[0].draught_proofed is True + + def test_third_window_orientation(self, result: EpcPropertyData) -> None: + assert result.sap_windows[2].orientation == "South" + + +class TestHeating: + def test_single_heating_detail(self, result: EpcPropertyData) -> None: + assert len(result.sap_heating.main_heating_details) == 1 + + def test_fuel_type(self, result: EpcPropertyData) -> None: + assert result.sap_heating.main_heating_details[0].main_fuel_type == "Mains gas" + + def test_heat_emitter_type(self, result: EpcPropertyData) -> None: + assert ( + result.sap_heating.main_heating_details[0].heat_emitter_type == "Radiators" + ) + + def test_emitter_temperature(self, result: EpcPropertyData) -> None: + assert ( + result.sap_heating.main_heating_details[0].emitter_temperature == "Unknown" + ) + + def test_fan_flue_present(self, result: EpcPropertyData) -> None: + assert result.sap_heating.main_heating_details[0].fan_flue_present is True + + def test_has_fghrs(self, result: EpcPropertyData) -> None: + assert result.sap_heating.main_heating_details[0].has_fghrs is False + + def test_main_heating_control(self, result: EpcPropertyData) -> None: + assert ( + result.sap_heating.main_heating_details[0].main_heating_control + == "Programmer, room thermostat and TRVs" + ) + + def test_shower_outlet_type(self, result: EpcPropertyData) -> None: + assert result.sap_heating.shower_outlets is not None + assert ( + result.sap_heating.shower_outlets.shower_outlet.shower_outlet_type + == "Electric shower" + ) + + def test_no_hot_water_cylinder_size(self, result: EpcPropertyData) -> None: + assert result.sap_heating.cylinder_size is None + + def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None: + assert result.sap_heating.has_fixed_air_conditioning is False + + +class TestEnergySource: + def test_mains_gas(self, result: EpcPropertyData) -> None: + assert result.sap_energy_source.mains_gas is True + + def test_meter_type(self, result: EpcPropertyData) -> None: + assert result.sap_energy_source.meter_type == "Single" + + def test_electricity_smart_meter(self, result: EpcPropertyData) -> None: + assert result.sap_energy_source.electricity_smart_meter_present is False + + def test_gas_smart_meter(self, result: EpcPropertyData) -> None: + assert result.sap_energy_source.gas_smart_meter_present is False + + def test_wind_turbines_count(self, result: EpcPropertyData) -> None: + assert result.sap_energy_source.wind_turbines_count == 0 + + def test_wind_turbines_terrain_type(self, result: EpcPropertyData) -> None: + assert result.sap_energy_source.wind_turbines_terrain_type == "Suburban" + + def test_pv_battery_count(self, result: EpcPropertyData) -> None: + assert result.sap_energy_source.pv_battery_count == 0 + + +class TestVentilation: + def test_draught_lobby(self, result: EpcPropertyData) -> None: + assert result.sap_ventilation is not None + assert result.sap_ventilation.draught_lobby is False + + def test_pressure_test(self, result: EpcPropertyData) -> None: + assert result.sap_ventilation is not None + assert result.sap_ventilation.pressure_test == "Not available" + + def test_extract_fans_count(self, result: EpcPropertyData) -> None: + assert result.sap_ventilation is not None + assert result.sap_ventilation.extract_fans_count == 2 + + def test_open_flues_count(self, result: EpcPropertyData) -> None: + assert result.sap_ventilation is not None + assert result.sap_ventilation.open_flues_count == 0 From 1105491141ab83c2451a620ae8bcb5d1e242e367 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 13:52:02 +0000 Subject: [PATCH 10/24] =?UTF-8?q?Map=20Elmhurst=20site=20notes=20to=20EpcP?= =?UTF-8?q?ropertyData=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- datatypes/epc/domain/mapper.py | 170 +++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 1afade5c..554bdda6 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -51,6 +51,11 @@ from datatypes.epc.schema.rdsap_schema_21_0_1 import ( RdSapSchema21_0_1, EnergyElement as EnergyElement_21_0_1, ) +from datatypes.epc.surveys.elmhurst_site_notes import ( + ElmhurstSiteNotes, + VentilationAndCooling as ElmhurstVentilation, + Window as ElmhurstWindow, +) from datatypes.epc.surveys.pashub_rdsap_site_notes import ( BuildingConstruction, BuildingMeasurements, @@ -200,6 +205,67 @@ class EpcPropertyDataMapper: sap_ventilation=_map_sap_ventilation(ventilation), ) + @staticmethod + def from_elmhurst_site_notes(survey: ElmhurstSiteNotes) -> EpcPropertyData: + pd = survey.property_details + built_form = _strip_code(survey.attachment) + property_type = _strip_code(survey.property_type) + + prefix = pd.house_number or pd.house_name or "" + address_line_1 = f"{prefix}, {pd.street}" if prefix else pd.street + + return EpcPropertyData( + dwelling_type=f"{built_form} {property_type.lower()}", + inspection_date=pd.inspection_date, + tenure=pd.tenure, + transaction_type=pd.transaction_type, + address_line_1=address_line_1, + post_town=pd.town, + postcode=pd.postcode, + report_reference=pd.reference_number, + roofs=[], + walls=[], + floors=[], + main_heating=[], + door_count=survey.door_count, + sap_heating=_map_elmhurst_sap_heating(survey), + sap_windows=[_map_elmhurst_window(w) for w in survey.windows], + sap_energy_source=SapEnergySource( + mains_gas=survey.meters.main_gas, + meter_type=survey.meters.electricity_meter_type, + pv_battery_count=0, + wind_turbines_count=1 if survey.renewables.wind_turbine_present else 0, + gas_smart_meter_present=survey.meters.gas_smart_meter, + is_dwelling_export_capable=survey.renewables.export_capable_meter, + wind_turbines_terrain_type=survey.renewables.wind_turbines_terrain_type, + electricity_smart_meter_present=survey.meters.electricity_smart_meter, + ), + sap_building_parts=[_map_elmhurst_building_part(survey)], + solar_water_heating=survey.renewables.solar_water_heating, + has_hot_water_cylinder=survey.water_heating.hot_water_cylinder_present, + has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling, + wet_rooms_count=0, + extensions_count=0, + heated_rooms_count=survey.heated_habitable_rooms, + open_chimneys_count=survey.ventilation.open_chimneys_count, + habitable_rooms_count=survey.habitable_rooms, + insulated_door_count=survey.insulated_door_count, + cfl_fixed_lighting_bulbs_count=survey.lighting.cfl_count, + led_fixed_lighting_bulbs_count=survey.lighting.led_count, + incandescent_fixed_lighting_bulbs_count=survey.lighting.incandescent_count, + total_floor_area_m2=round( + sum(f.area_m2 for f in survey.dimensions.floors), 2 + ), + built_form=built_form, + property_type=property_type, + has_conservatory=survey.has_conservatory, + blocked_chimneys_count=survey.ventilation.blocked_chimneys_count, + number_of_storeys=survey.number_of_storeys, + hydro=survey.renewables.hydro_electricity_generated_kwh > 0, + photovoltaic_array=survey.renewables.photovoltaic_panel != "None", + sap_ventilation=_map_elmhurst_ventilation(survey.ventilation), + ) + @staticmethod def from_rdsap_schema_17_0(schema: RdSapSchema17_0) -> EpcPropertyData: es = schema.sap_energy_source @@ -1453,6 +1519,12 @@ class EpcPropertyDataMapper: # --------------------------------------------------------------------------- +def _strip_code(value: str) -> str: + """Strip leading uppercase code from Elmhurst coded strings, e.g. 'CA Cavity' → 'Cavity'.""" + parts = value.split(" ", 1) + return parts[1] if len(parts) > 1 else value + + def _extract_age_band(age_range: str) -> str: """Return the letter code from a site-notes age range, e.g. 'I: 1996 - 2002' → 'I'.""" return age_range.split(":")[0].strip() @@ -1617,3 +1689,101 @@ def _map_sap_ventilation(ventilation: Ventilation) -> SapVentilation: flueless_gas_fires_count=ventilation.number_of_flueless_gas_fires, ventilation_in_pcdf_database=ventilation.ventilation_in_pcdf_database, ) + + +def _map_elmhurst_building_part(survey: ElmhurstSiteNotes) -> SapBuildingPart: + dims = survey.dimensions + floor_dims = [ + SapFloorDimension( + room_height_m=f.room_height_m, + total_floor_area_m2=f.area_m2, + party_wall_length_m=f.party_wall_length_m, + heat_loss_perimeter_m=f.heat_loss_perimeter_m, + floor=i, + ) + for i, f in enumerate(dims.floors) + ] + return SapBuildingPart( + identifier="main", + construction_age_band=_strip_code(survey.construction_age_band), + wall_construction=_strip_code(survey.walls.wall_type), + wall_insulation_type=_strip_code(survey.walls.insulation), + wall_thickness_measured=not survey.walls.thickness_unknown, + party_wall_construction=_strip_code(survey.walls.party_wall_type), + sap_floor_dimensions=floor_dims, + wall_thickness_mm=survey.walls.thickness_mm, + roof_insulation_location=_strip_code(survey.roof.insulation), + roof_insulation_thickness=survey.roof.insulation_thickness_mm, + floor_type=_strip_code(survey.floor.location), + floor_construction_type=_strip_code(survey.floor.floor_type), + floor_insulation_type_str=_strip_code(survey.floor.insulation), + floor_u_value_known=survey.floor.u_value_known, + ) + + +def _map_elmhurst_window(w: ElmhurstWindow) -> SapWindow: + return SapWindow( + pvc_frame=w.frame_type or "", + glazing_gap=w.glazing_gap or "", + orientation=w.orientation, + window_type="Window", + glazing_type=w.glazing_type, + window_width=w.width_m, + window_height=w.height_m, + draught_proofed=w.draught_proofed, + window_location=w.building_part, + window_wall_type=w.location, + permanent_shutters_present=w.permanent_shutters, + ) + + +def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: + mh = survey.main_heating + sap_control = mh.heating_controls_sap + control = ( + sap_control.split(", ", 1)[1] + if sap_control.startswith("SAP code") and ", " in sap_control + else sap_control + ) + shower_outlets = ( + ShowerOutlets( + shower_outlet=ShowerOutlet( + shower_outlet_type=survey.baths_and_showers.showers[0].outlet_type + ) + ) + if survey.baths_and_showers.showers + else None + ) + return SapHeating( + instantaneous_wwhrs=InstantaneousWwhrs(), + main_heating_details=[ + MainHeatingDetail( + has_fghrs=survey.renewables.flue_gas_heat_recovery_present, + main_fuel_type=mh.fuel_type, + heat_emitter_type=mh.heat_emitter, + emitter_temperature=mh.design_flow_temperature, + fan_flue_present=mh.fan_assisted_flue, + main_heating_control=control, + central_heating_pump_age_str=mh.heat_pump_age, + ) + ], + has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling, + shower_outlets=shower_outlets, + cylinder_size=None if not survey.water_heating.hot_water_cylinder_present else survey.water_heating.water_heating_code, + ) + + +def _map_elmhurst_ventilation(v: ElmhurstVentilation) -> SapVentilation: + return SapVentilation( + ventilation_type=None, + draught_lobby=v.draught_lobby != "Not present", + pressure_test=v.pressure_test_method, + open_flues_count=v.open_flues_count, + closed_flues_count=v.open_chimneys_closed_fire_count, + boiler_flues_count=v.solid_fuel_boiler_flues_count, + other_flues_count=v.other_heater_flues_count, + extract_fans_count=v.extract_fans_count, + passive_vents_count=v.passive_vents_count, + flueless_gas_fires_count=v.flueless_gas_fires_count, + ventilation_in_pcdf_database=None, + ) From 20ef8cd4893ad06b8ed0905a60f5e845c3c2a4ff Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 14:01:36 +0000 Subject: [PATCH 11/24] update local runner to work for elmhurst --- backend/documents_parser/local_runner.py | 21 +++++++++++++++------ backend/documents_parser/pdf.py | 5 +++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/backend/documents_parser/local_runner.py b/backend/documents_parser/local_runner.py index a50786ea..89dc7cdb 100644 --- a/backend/documents_parser/local_runner.py +++ b/backend/documents_parser/local_runner.py @@ -18,19 +18,27 @@ from backend.app.db.models.epc_property import ( EpcPropertyModel, EpcWindowModel, ) +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor -from backend.documents_parser.pdf import pdf_to_text_list +from backend.documents_parser.pdf import pdf_to_pages, pdf_to_text_list from datatypes.epc.domain.epc_property_data import EnergyElement, EpcPropertyData from datatypes.epc.domain.mapper import EpcPropertyDataMapper -from datatypes.epc.surveys.pashub_rdsap_site_notes import PasHubRdSapSiteNotes def _parse_pdf(pdf_path: str) -> EpcPropertyData: with open(pdf_path, "rb") as f: pdf_bytes: bytes = f.read() - pages: List[str] = pdf_to_text_list(pdf_bytes) - site_notes: PasHubRdSapSiteNotes = PasHubRdSapSiteNotesExtractor(pages).extract() - return EpcPropertyDataMapper.from_site_notes(site_notes) + + pages: List[str] = pdf_to_pages(pdf_bytes) + full_text: str = "\n".join(pages) + + if "Elmhurst Energy Systems" in full_text: + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + tokens: List[str] = pdf_to_text_list(pdf_bytes) + pashub_notes = PasHubRdSapSiteNotesExtractor(tokens).extract() + return EpcPropertyDataMapper.from_site_notes(pashub_notes) def _insert_energy_elements( @@ -119,4 +127,5 @@ def run(pdf_path: str) -> None: if __name__ == "__main__": - run("backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf") + # run("backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf") + run("backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf") diff --git a/backend/documents_parser/pdf.py b/backend/documents_parser/pdf.py index dfa07300..53e209ad 100644 --- a/backend/documents_parser/pdf.py +++ b/backend/documents_parser/pdf.py @@ -10,3 +10,8 @@ def pdf_to_text_list(pdf_bytes: bytes) -> List[str]: for line in page.get_text().split("\n"): tokens.append(line) return tokens + + +def pdf_to_pages(pdf_bytes: bytes) -> List[str]: + with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc: + return [page.get_text() for page in doc] From b36c8b884c477f429a1cc3ce83312515aef0dab1 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 15:33:59 +0000 Subject: [PATCH 12/24] =?UTF-8?q?map=20remaining=20Elmhurst=20fields=20to?= =?UTF-8?q?=20EpcPropertyData=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_elmhurst_end_to_end.py | 14 ++++++++++++++ datatypes/epc/domain/mapper.py | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/backend/documents_parser/tests/test_elmhurst_end_to_end.py b/backend/documents_parser/tests/test_elmhurst_end_to_end.py index 8e45e24d..18742356 100644 --- a/backend/documents_parser/tests/test_elmhurst_end_to_end.py +++ b/backend/documents_parser/tests/test_elmhurst_end_to_end.py @@ -255,6 +255,9 @@ class TestHeating: def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None: assert result.sap_heating.has_fixed_air_conditioning is False + def test_water_heating_code(self, result: EpcPropertyData) -> None: + assert result.sap_heating.water_heating_code == 901 + class TestEnergySource: def test_mains_gas(self, result: EpcPropertyData) -> None: @@ -295,3 +298,14 @@ class TestVentilation: def test_open_flues_count(self, result: EpcPropertyData) -> None: assert result.sap_ventilation is not None assert result.sap_ventilation.open_flues_count == 0 + + +class TestDraughtproofingAndWater: + def test_percent_draughtproofed(self, result: EpcPropertyData) -> None: + assert result.percent_draughtproofed == 100 + + def test_waste_water_heat_recovery_absent(self, result: EpcPropertyData) -> None: + assert result.waste_water_heat_recovery == "None" + + def test_any_unheated_rooms_false(self, result: EpcPropertyData) -> None: + assert result.any_unheated_rooms is False diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 554bdda6..ab1bb0aa 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -264,6 +264,9 @@ class EpcPropertyDataMapper: hydro=survey.renewables.hydro_electricity_generated_kwh > 0, photovoltaic_array=survey.renewables.photovoltaic_panel != "None", sap_ventilation=_map_elmhurst_ventilation(survey.ventilation), + percent_draughtproofed=survey.draught_proofing_percent, + waste_water_heat_recovery="None" if not survey.renewables.wwhrs_present else "Present", + any_unheated_rooms=survey.heated_habitable_rooms < survey.habitable_rooms, ) @staticmethod @@ -1770,6 +1773,7 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling, shower_outlets=shower_outlets, cylinder_size=None if not survey.water_heating.hot_water_cylinder_present else survey.water_heating.water_heating_code, + water_heating_code=survey.water_heating.water_heating_sap_code, ) From 6cc73b6ebf3d3088bdfc40973d541ecda9035617 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 11:06:58 +0000 Subject: [PATCH 13/24] remove unused import --- datatypes/epc/domain/mapper.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index ab1bb0aa..9af386da 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -62,7 +62,6 @@ from datatypes.epc.surveys.pashub_rdsap_site_notes import ( ExtensionConstruction, ExtensionMeasurements, ExtensionRoofSpace, - FloorConstruction, FloorMeasurement, HeatingAndHotWater, PasHubRdSapSiteNotes, @@ -265,7 +264,9 @@ class EpcPropertyDataMapper: photovoltaic_array=survey.renewables.photovoltaic_panel != "None", sap_ventilation=_map_elmhurst_ventilation(survey.ventilation), percent_draughtproofed=survey.draught_proofing_percent, - waste_water_heat_recovery="None" if not survey.renewables.wwhrs_present else "Present", + waste_water_heat_recovery=( + "None" if not survey.renewables.wwhrs_present else "Present" + ), any_unheated_rooms=survey.heated_habitable_rooms < survey.habitable_rooms, ) @@ -1649,7 +1650,11 @@ def _map_sap_heating( fuel_type = ( _raw_fuel if _raw_fuel - else ("Electricity" if main.system_type.lower() in _ELECTRIC_SYSTEM_TYPES else _raw_fuel) + else ( + "Electricity" + if main.system_type.lower() in _ELECTRIC_SYSTEM_TYPES + else _raw_fuel + ) ) return SapHeating( @@ -1671,7 +1676,11 @@ def _map_sap_heating( secondary_fuel_type=secondary_fuel_type, secondary_heating_type=heating.secondary_heating.secondary_system, shower_outlets=shower_outlets, - cylinder_size=heating.water_heating.cylinder_size if heating.water_heating.cylinder_size != "No Cylinder" else None, + cylinder_size=( + heating.water_heating.cylinder_size + if heating.water_heating.cylinder_size != "No Cylinder" + else None + ), cylinder_insulation_type=heating.water_heating.insulation_type, cylinder_insulation_thickness_mm=heating.water_heating.insulation_thickness_mm, immersion_heating_type=heating.water_heating.immersion_type, @@ -1772,7 +1781,11 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: ], has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling, shower_outlets=shower_outlets, - cylinder_size=None if not survey.water_heating.hot_water_cylinder_present else survey.water_heating.water_heating_code, + cylinder_size=( + None + if not survey.water_heating.hot_water_cylinder_present + else survey.water_heating.water_heating_code + ), water_heating_code=survey.water_heating.water_heating_sap_code, ) From afedbd236546f795dd69fdaff56cc3a7804889af Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 12:09:35 +0000 Subject: [PATCH 14/24] add energy fields to ElmhurstSiteNotes --- datatypes/epc/surveys/elmhurst_site_notes.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index d1fabc73..0234a29c 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -184,6 +184,13 @@ class ElmhurstSiteNotes: surveyor_info: SurveyorInfo property_details: PropertyDetails + # Summary Information + current_sap_rating: int + potential_sap_rating: int + current_ei_rating: int + potential_ei_rating: int + co2_emissions_current_t: float + # Section 1.0 property_type: str # e.g. "B Bungalow" attachment: str # e.g. "E End-Terrace" From 444eaa0c06af9c52e04e12b90afa77fa4dcca715 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 12:10:40 +0000 Subject: [PATCH 15/24] =?UTF-8?q?extract=20energy=20fields=20from=20elmhur?= =?UTF-8?q?st=20site=20notes=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_elmhurst_extractor.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/backend/documents_parser/tests/test_elmhurst_extractor.py b/backend/documents_parser/tests/test_elmhurst_extractor.py index f79f8704..c44f8e6b 100644 --- a/backend/documents_parser/tests/test_elmhurst_extractor.py +++ b/backend/documents_parser/tests/test_elmhurst_extractor.py @@ -431,3 +431,20 @@ class TestRenewables: def test_hydro_electricity_generated_kwh(self, result: ElmhurstSiteNotes) -> None: assert result.renewables.hydro_electricity_generated_kwh == 0.0 + + +class TestEnergyPerformance: + def test_current_sap_rating(self, result: ElmhurstSiteNotes) -> None: + assert result.current_sap_rating == 69 + + def test_potential_sap_rating(self, result: ElmhurstSiteNotes) -> None: + assert result.potential_sap_rating == 77 + + def test_current_ei_rating(self, result: ElmhurstSiteNotes) -> None: + assert result.current_ei_rating == 76 + + def test_potential_ei_rating(self, result: ElmhurstSiteNotes) -> None: + assert result.potential_ei_rating == 81 + + def test_co2_emissions_current_t(self, result: ElmhurstSiteNotes) -> None: + assert result.co2_emissions_current_t == 1.683 From 7a68fbcae97fa492c0d319f138009fcc48971cd7 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 12:11:53 +0000 Subject: [PATCH 16/24] =?UTF-8?q?extract=20energy=20fields=20from=20elmhur?= =?UTF-8?q?st=20site=20notes=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/documents_parser/elmhurst_extractor.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 919c0f9a..3063e358 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -368,6 +368,13 @@ class ElmhurstSiteNotesExtractor: showers=showers, ) + def _rating_val(self, label: str) -> int: + v = self._next_val(label) + try: + return int(v.split()[-1]) if v else 0 + except (ValueError, IndexError): + return 0 + def _extract_renewables(self) -> Renewables: fghrs_lines = self._section_lines( "18.0 Flue Gas Heat Recovery System", "19.0 Photovoltaic Panel" @@ -390,9 +397,17 @@ class ElmhurstSiteNotesExtractor: ) def extract(self) -> ElmhurstSiteNotes: + emissions_raw = self._next_val("Emissions (t/year)") + co2 = float(emissions_raw.split()[0]) if emissions_raw else 0.0 + return ElmhurstSiteNotes( surveyor_info=self._extract_surveyor_info(), property_details=self._extract_property_details(), + current_sap_rating=self._rating_val("Current SAP rating"), + potential_sap_rating=self._rating_val("Potential SAP rating"), + current_ei_rating=self._rating_val("Current EI rating"), + potential_ei_rating=self._rating_val("Potential EI rating"), + co2_emissions_current_t=co2, property_type=self._str_val("1.0 Property type"), attachment=self._extract_attachment(), number_of_storeys=self._int_val("Storeys"), From 00821c5c23df56397813c6ffb4abcce7e0c5a0e6 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 12:15:28 +0000 Subject: [PATCH 17/24] =?UTF-8?q?map=20elmhurst=20energy=20fields=20to=20e?= =?UTF-8?q?pc=20property=20data=20class=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_elmhurst_end_to_end.py | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/backend/documents_parser/tests/test_elmhurst_end_to_end.py b/backend/documents_parser/tests/test_elmhurst_end_to_end.py index 18742356..c434e833 100644 --- a/backend/documents_parser/tests/test_elmhurst_end_to_end.py +++ b/backend/documents_parser/tests/test_elmhurst_end_to_end.py @@ -5,18 +5,7 @@ from datetime import date import pytest from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor -from datatypes.epc.domain.epc_property_data import ( - EpcPropertyData, - MainHeatingDetail, - SapBuildingPart, - SapEnergySource, - SapFloorDimension, - SapHeating, - SapVentilation, - SapWindow, - ShowerOutlet, - ShowerOutlets, -) +from datatypes.epc.domain.epc_property_data import EpcPropertyData from datatypes.epc.domain.mapper import EpcPropertyDataMapper FIXTURE_PATH = os.path.join( @@ -309,3 +298,20 @@ class TestDraughtproofingAndWater: def test_any_unheated_rooms_false(self, result: EpcPropertyData) -> None: assert result.any_unheated_rooms is False + + +class TestEnergyPerformance: + def test_energy_rating_current(self, result: EpcPropertyData) -> None: + assert result.energy_rating_current == 69 + + def test_energy_rating_potential(self, result: EpcPropertyData) -> None: + assert result.energy_rating_potential == 77 + + def test_environmental_impact_current(self, result: EpcPropertyData) -> None: + assert result.environmental_impact_current == 76 + + def test_environmental_impact_potential(self, result: EpcPropertyData) -> None: + assert result.environmental_impact_potential == 81 + + def test_co2_emissions_current(self, result: EpcPropertyData) -> None: + assert result.co2_emissions_current == 1.683 From 865ee3eadae1b494f1d5beccb00e3dc636256d27 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 12:16:26 +0000 Subject: [PATCH 18/24] =?UTF-8?q?map=20elmhurst=20energy=20fields=20to=20e?= =?UTF-8?q?pc=20property=20data=20class=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- datatypes/epc/domain/mapper.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 9af386da..5720d323 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -268,6 +268,11 @@ class EpcPropertyDataMapper: "None" if not survey.renewables.wwhrs_present else "Present" ), any_unheated_rooms=survey.heated_habitable_rooms < survey.habitable_rooms, + energy_rating_current=survey.current_sap_rating, + energy_rating_potential=survey.potential_sap_rating, + environmental_impact_current=survey.current_ei_rating, + environmental_impact_potential=survey.potential_ei_rating, + co2_emissions_current=survey.co2_emissions_current_t, ) @staticmethod From 5940977bb26a7964134cbea92a2782e766f820c1 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 14:11:33 +0000 Subject: [PATCH 19/24] tweak window transmission data source type --- backend/app/db/models/epc_property.py | 2 +- datatypes/epc/domain/epc_property_data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/app/db/models/epc_property.py b/backend/app/db/models/epc_property.py index f04741f8..49901644 100644 --- a/backend/app/db/models/epc_property.py +++ b/backend/app/db/models/epc_property.py @@ -607,7 +607,7 @@ class EpcWindowModel(SQLModel, table=True): frame_factor: Optional[float] = Field(default=None) permanent_shutters_insulated: Optional[str] = Field(default=None) transmission_u_value: Optional[float] = Field(default=None) - transmission_data_source: Optional[int] = Field(default=None) + transmission_data_source: Optional[str] = Field(default=None) transmission_solar_transmittance: Optional[float] = Field(default=None) @classmethod diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py index 8f949b0f..6e6f73a7 100644 --- a/datatypes/epc/domain/epc_property_data.py +++ b/datatypes/epc/domain/epc_property_data.py @@ -89,7 +89,7 @@ class SapVentilation: @dataclass class WindowTransmissionDetails: u_value: float - data_source: int + data_source: Union[int, str] solar_transmittance: float From 9571ed608c0f45460c324d5fe581a8c5d8b57d44 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 14:13:02 +0000 Subject: [PATCH 20/24] =?UTF-8?q?map=20elmhurst=20window=20transmission=20?= =?UTF-8?q?details=20to=20epc=20property=20data=20class=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_elmhurst_end_to_end.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/backend/documents_parser/tests/test_elmhurst_end_to_end.py b/backend/documents_parser/tests/test_elmhurst_end_to_end.py index c434e833..53c81164 100644 --- a/backend/documents_parser/tests/test_elmhurst_end_to_end.py +++ b/backend/documents_parser/tests/test_elmhurst_end_to_end.py @@ -201,6 +201,21 @@ class TestWindows: def test_third_window_orientation(self, result: EpcPropertyData) -> None: assert result.sap_windows[2].orientation == "South" + def test_frame_factor(self, result: EpcPropertyData) -> None: + assert result.sap_windows[0].frame_factor == 0.7 + + def test_transmission_u_value(self, result: EpcPropertyData) -> None: + assert result.sap_windows[0].window_transmission_details is not None + assert result.sap_windows[0].window_transmission_details.u_value == 1.4 + + def test_transmission_solar_transmittance(self, result: EpcPropertyData) -> None: + assert result.sap_windows[0].window_transmission_details is not None + assert result.sap_windows[0].window_transmission_details.solar_transmittance == 0.72 + + def test_transmission_data_source(self, result: EpcPropertyData) -> None: + assert result.sap_windows[0].window_transmission_details is not None + assert result.sap_windows[0].window_transmission_details.data_source == "Manufacturer" + class TestHeating: def test_single_heating_detail(self, result: EpcPropertyData) -> None: From 268576e345b362af06fb8f26d5d8c65dc2f41b2a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 14:16:59 +0000 Subject: [PATCH 21/24] =?UTF-8?q?map=20elmhurst=20window=20transmission=20?= =?UTF-8?q?details=20to=20epc=20property=20data=20class=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- datatypes/epc/domain/mapper.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 5720d323..ab931305 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1751,6 +1751,12 @@ def _map_elmhurst_window(w: ElmhurstWindow) -> SapWindow: window_location=w.building_part, window_wall_type=w.location, permanent_shutters_present=w.permanent_shutters, + frame_factor=w.frame_factor, + window_transmission_details=WindowTransmissionDetails( + u_value=w.u_value, + solar_transmittance=w.g_value, + data_source=w.data_source, + ), ) From 8f94bb54355613ac2585183588ae1b6e9f6c1d97 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 15:50:25 +0000 Subject: [PATCH 22/24] =?UTF-8?q?extract=20window=20frame=20details=20from?= =?UTF-8?q?=20elmhurst=20site=20notes=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/fixtures/ElmhurstSiteNotes_2.pdf | Bin 0 -> 65517 bytes .../fixtures/elmhurst_site_notes_2_text.json | 6 ++ .../tests/test_elmhurst_extractor.py | 65 ++++++++++++++++++ datatypes/epc/surveys/elmhurst_site_notes.py | 44 ++++++------ 4 files changed, 95 insertions(+), 20 deletions(-) create mode 100644 backend/documents_parser/tests/fixtures/ElmhurstSiteNotes_2.pdf create mode 100644 backend/documents_parser/tests/fixtures/elmhurst_site_notes_2_text.json diff --git a/backend/documents_parser/tests/fixtures/ElmhurstSiteNotes_2.pdf b/backend/documents_parser/tests/fixtures/ElmhurstSiteNotes_2.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f856591797ce5274403935ffa9dc94f9f4f101e2 GIT binary patch literal 65517 zcmeFa1yo#Jw(pw+cZWdm1b2tv?h>qm6~Wy-Sn%K$+zA?_aF^ij!QI_8=q_u&?Y;CNKz-{4XQqZ#km&42?VP@%Q zWX~jFspn`UW@KPvXv8FEWNiX2n1hp*TR_0U(cVbU3h`sWp$>wBs$|ULca*EaGS12a z*FYz9OZ0WSgD|4O?#=_(o{yh|L_{i34T%W#9bbsx2E9Wf%7h}?Y0-R@H8p8oIN6*5 z)ZA27o7(^E`GWOQJOD~?vZ(aOwVJvyQBq%y-(S5tR=XAW9^E!CO;p%y8p%;x3B!UC zM79|}GV*`Vm4u9|vst>jTqw5_nQU)Y*o*2A;9d*Ww zl73;%MVm8J)@hjc4Z@OT+6-!Ku{Cn@T(<&Dt=g_^&dtHO{buzJ-0oNd&7a<6&{}M^ zF754khE_XqV;3AA#Pgg_@7J!M)}@ztI?4?@XJ;CETI>%^PpvNY>}_qG4s@|d!dYE*FWcZ0t@%$B>4(8kA!iS2#B}q}rUg5PUS*rIta%1T$0u;u^mgWcp1*AOnKN z)mX)$@VKqg+D1Fs<68dF$V0ODK}$G;Ai(0IA(w+HQf` zt;w6VC?T_)_%^)(%{KBAroGQ5E?+o}BLQWS2jUdSC{1T>n@&&A@@kz~I|LbW#9=Lt z2lI4FE>CyvmnB#*6p3jXz{UMH$RM##pTDxR*N*bDsLjuoKsQBUN%(js^K0OI8 z$}da98a$@g*8ON{Hs)UJd_Va7D@^9Qt6bw2v}RLG?z|nzQuii5MZsmF_j0~;9AC#d zQPYWiT>G9iW?5y77T=^l^z%0k-RB+`7pN}oi6QC&IM-6ZPvuFu6u?ie@S)pd^=g+n zt{zxKkV2ex2$WR|=ik*!wgp2;)ye^bbOk$C75rAfA=|@-+m7$wm_($zH*!0D zvM<1jn`+bh(Zm5N-QN_Ft+j07&%B~*ebxBovqpjexGSFdmV^LWJJo4kRVuG|!wNh> z{2`ucKd}{ushO;7w{P%N`;A#SPt0DZ*3%IvEI6%;MsPd`+mQF+HZ%M&M`c*q*tnf; zF;J%{Uc^h^2u4ed&_5tWtaNaMbDvL9K zSymZg){cdFm!C%w8&%b>dzWXTf_ry#j_8X=X-mMAH$>;m^b>muohV>-HeHp>246B* zZ|ICxe@Vr(8h9ud%%i|PKyVOF#ow=MWjS6v;z#Q3ye+)byj!A^(^J%ZI>0c>@CI1y znz}0x8VnH98xU3Tx?tOo*dlsZ^nzxO?kA}pz@I5(&P`500Ojd2?zh4tk^89|W9^>G zkff8@(D@AU)nD`n(JEcKQ|7@MZ@-u5d8#cXLjhX#tkWRO-c$)%y~VJXW>T6(gAo;I zQO((qu78n^0D3%jjs?;ci0)Ri?87%KMJRCg!A-Q)2&$`A{yv?!g+*j{)&ZPutBv!T zCd6n;=}vM8mq8z9j|G$gzX2U|%6ont>)OvX?Pa7Jz4=N;Smng?YM`jUhJ?lQQ{4x1 z5_~^qGyKG_p;{iURF|1y1U^Cn1`b6AkGM zP3@K!cNrJ5k$`LDJ8o02co#cXp_*Z~Vu|mTzl)8XYOZR$OQvdTe9uB`K{lfm?aU6@ z^Mxi7qiWtR9??9gLkaWX`w*tXsxnNy{a6Ih$*Nq@=<%zAX;AmI%_+6nd=ptN zU9(LB;Agk`!B@Q%Iv%}M;{a8%js~&upG9bAXL{n!VMmKjaVq;OCmmVQ;BgQ<2p&In z9SZFq9>ggY^yIa}8E;?QkC<#vlxnFLr`sCFJ>2&D>$e(4v-;2;Flq(f4Ii_ARTB01UV~B07-;MgRnUBPDCi4i@vroiXzH zG^?jF0L$}h42wb*B?UexooR?)?vBgc`^n!-B%RU5M-_*!yHaFoUt!P;7Umsj`b2uf zw$15N0)_X{^A9Xf@N11|m|m7!=w7)fJZ*QkUQ9cQ|2mZkELC{xso@YhG$<1oquL{r zwMRIKs+;7B6WK#^t_xbeVj+i?tB~@Lcjo@cxQ+R-<9zm4Sj-SO;q0M9h#j{})SkYQ z+nK)Y4|C{%D7&rp#BN|8*kqtnIWRCbbER0zjEQjnW~!mk(Cu=apcX*fd;01mvO%I5 zy^GzpN%`l|SrD>Iko@&`(WM&8yWZCuP+NJYC(3H>`7~4mMVn5S*#+qlUJm%WGJ@mu zRKMY`QC(WD1w#k?yew)a`}d00<4~_WcrOS&waB8U+Bt4AGY~$wcBR+JzU`BMb8p}X zcSt-?El9m^Xc{j!;C>iL=hjE#yveVew!vdB(u@~CW3+sXg{M0z9rc1& zgh{4F@6fd?+iC;mE;oCA{_|EfwYO@dyi#AxLJfxv+R;(op{hI&FG>k;#NpoF?sI}R z^ZssbUn$-)7w~suCdL45zWUaCvos#PR$t zoLg1`!-RCU!vXjMRq6~}anSdv^|k2biy~;QsLopYBbH?$iPzwn{fK*ruJGYhb_k zeoyE;>ULHfBta21?=SRWeu=qapI}j#G`~sn!gZ3LB`&m`4Q?@cQ2!?@oX>SKARQE( ziLp&&x#DU>f-omcN-%Ftf=_4d>Cs&2Zk=?R-FJp)-egM%_h8`awYrBE3GPzRMuKph z3?DBM)A4HfIzc=65jem@O3d}u+`RmUp$9)UP`uL+Z1v@71S*{nk3nlW4i>Itdr{aL zjUxwv75-}xl;ca3y)CFh?+tqPmPU)?S^9?N2_8=1_-!57=CzV)XZzaOENf~PE6a2& z@J5KEI{|o0?SydL|5Q>6bi7%Q#w*^}QZJF&}LCtV2xTkH)2HgN)CrR;&h5jyk>r-XKyP7UWDDNN~4Lz*H*!auN* z@VFohC7frJEH-)?+_n_(>EL4BRMm+T=AT^pJbCGO_c$^kcu#((@N#V-%_WFz*gg6t zs#*30hshD!%-U0%6k+ON7~X}dR=W`(GEEk{M1_0pVRTHrlB3)ke2nrE3FVDtKwJop0LE2?f`N1cfs{j?hxO5WV1G*KD;P2&}jHstO~`n0H~Pa^3D_} zAW`u3y>J_nHQ%oAHdWJ|siV_}I$Ug*`=<}n@IDKiXXLq9UKrJI{upuC5nCIpicJKm zY2QcPqqd`ms5w#@G8Ip}Pocr;FN~sSU`$j;9@Cq%OwntX39T z%j6@cF(Gk}@IBvSz+h!-1`yT0^lUSz#jOMcB%*nGIE0ojg z^Q>&nx;CL@JZgL}B$^jUNk$-Y48Z;d*jOR;?aC5FRhPW-b9Y$i((`qp!TEw(^}`bMa=h4YjL|0Y(u0K z78Mt+U2`cpvNw$zh@EsXZ*EP=!YCIy58lHG@RAfj0e%2btogy1mHbdi{}Y*gN7kIcHzDnRSUJ=|Ma8EO(xLR*SvD z52-@R*&^#W#ey}t@9vFU5QW{u&pGZ&SmOJNg_wln^e0b+nzGBguV_7q>H)!kX=K&t z>e~%3L8W~;itjjUPKY=;k)O>Yq_bmWiXBPuRnFAx3h4TC-U@LP9v%b^N9Oe*kY6}o ztLzN6`y0ABH8_sQ(`oTz#uV>=zyitLabshOr-L5coD7%UzP^|(5ui-9^cMlB+QY8l zTp~1J&$TQSCBluZDh<=HvGi1}{Wu9ZpqY`pApO$PuRlqsr8?t94>t7#yCb~)S zgJP=(*~H=~>W*UnP9SU&^XOngL*E<0(ma?8nyoFb`}t4f^;y{gj?F=)6{Jo`^s-uZ3+c!fBVBC zGpfT0arMuALj!ewuB(71o8tcnjQ+mjVWTnVjYmu*&KOE|l_jpEm`#bzfnXlEqjf9p z0h%0=KV_x>wL3QJ1%=|59mX}ec9kwO9M!9c{f?rX zsz)^5SMeCP;Zqc!iiPdQ0XQ+~dlYgXNSI8Z!!$I<0hs>j6#`6rmZ7twk~aBhs~vQ1 zf&)+58N)>zEzO1R=6D{!0zo$6g-T4n#k_(Ss2-cz z`Et(@e^QUiqPneW8@ab-0Hm3KVQYaAmROn`M!I0vg1+DY6@MGN==VWh`{yYOyPs%Z z&?&jEwP?-=zQ|YomB^q3*Vv)hnF=c9Dk6i~DiMoYIswKU#BQ2BD;l*EY*DK~DR5WE+G; z7k#@V@C`YZqoBL|k^+HVZ2&D?j<#1@6MmO@q|Rx1WMZlNOvUqtz3a*=0fH zWo69V+}!V31z6eBJK2VJCI+?HZs-}w+vM4PvJ{5#tOLWoto(@^ki$sf+PZ1wedz zdfUZC)eJ(Zo~f&;lSrrOIlRS##cU>A8{Iow`dnH560Pkij~<2Tg!ZMMGc$>uz(t3E zfNgG@uR2-b?A7B9jxp~?P=`2=9ph*YoQuJ)q{1vR1dxPeH>D(AJCCHnO-~5y)CaoS z<%qG8j9x*8#KOhh5S5w9#EgstPIZ4e>_yz%nuZ(B7EPXwlnE0C*%IOovByH;5KZh} zJ5Z1wk#UdMkf+g6HHi7Hgkn!8M3Y^jwk=sS^*bRsOji^J09qo@W+*b)cTXT}qXodZ z)2D4y0bdk1Rgy5C%ThC1Kj9*zH?8O7q$Z(V;t-&XM|EYeDZ+%WEL{*cmm4=`z+K7S zG;uAg=OWy&Ed$F|dvpRvaG;@><7k10u!C3#gO{{W7$_JI*mk*9r#>Jb92)9-Q`Gn& zYX;|bx@CR&#v4b3#0}s&ZGJhX-0({lda208`PP{FfB+d0+OxTNeSQ6`$(OE)jjZ4N zy1Mw*ZR|^B)8|%`3e#LaV$Qh8cV|N`x7O4LxVBnavm6``4-c7BJ39r))p<507l>}0 zI8P2~*Ku{nI#Y(57a9Qg=Jnrul(~WvG;`Ce@zoEtF90oNWwzVHo?Ho|+2`lyva+(* zjhW36o12^QTU(l(jV+}SBiqXjPDtgew*|J2cG}n<-M<3wuy5Wf($19ozLcC!)}5fu zU}m^0gxW8h-Ly*UKvoWb4RUR84~qy}I zX=!Fwf2xy^kjT-|BNJ>dJIG*jA2hsOilzf+7 z*3kHvy>;D4&qsm6aY>iJxP0u}A_>-wkcdrXI849_bPBe_FTXv}914@KWM!+Z93{&t%WKeVFUrfKJV6W$onAsM z;h@Al_oqX);5Jsf%m{!|@c#_g7y{$yDwL3%klFI77#=ug0c54StAD#+U%wfASPv5d z=>ITK%J07CsH{KkjEh*`5&^izESOyQ^$s1rQ^4+ZX<5#Bg~V73bPMG#UMU#oHfI=) z7KW=YOc*)jF|tD-4#X>pGx?frTxuW<9CAQQDe+qdPhOU`NH*9=j412Ph+ewQ6m;fxSilewNK-p8TAe)2B8vDX$XZ5Mg&;;A+nZ%Sh zoH$lxT)^^b8azw@J03xiPPDj?BN2Q z-if(!+TYF|mdIlOJz<~Rdhvy!nX%~TmZc>H0bOqHf*i_aq`CRIpM0AI@@=ho_qW9rk`UN(bg?Yj*j`q&Bzb41WCZ@u}z9we0 z@?QD7cXkN6--baA4aX@+8WS;CaW&8_`Cf43x>YwdA(QW#nrC4rSEDHSYy6DU*=4+s za5^`em>3s)@5rrF*dgrYvu0n424aWs;u?K-WaMytG2m;qxj|89FC+0;O7Te^NUwz*NSdlr6H=)lRs@ zrG>=`aWT|4931Qu{wPD3nwD1dqc~@FX3x}oT6J2AHTIO2k3H;YgO8V2OZDT;;WSk? zqwenEu6SqoTlsMLmtKzznsa^9eZ3kVfOdPG(t;$*oYwP$7s$O+6N^wtFJ#=^Kd)l< zaUcM5^Kw>fY!q#(CrnP`^~}4i!{x&iPh9!1L(JXXT(hzJG?-Hg*aYGh>_2NJw?tU$ zSfy>i590MkUcm$xm1LP&nFS=GP8Ak8SY5$423lnn1vSE+e(F~;@yMnuOYKD_CP$;! z61FD3+1*%i-5lQxsW@9}&psHWulY66Nb*qwNXh*@6>#mLZ(!14Yj}vz(eGVR;-Sq` z7?t{wse%1bIp|c*9rQ`1+BcY)jnhYG??9FLA@BS5j_&@swqatgaBh)qI&`R>)!|X8 zRV2ewDAX_6nJFFugMcUSXZ`inm57K`glgpvz0TLW$P~HW ztv}zq)XUsgT~$+go8Rh|{DH)@`!?F)^1NTV+8AV>ac$wgv+?;%C{R%_cm3nS*GdA`E^k!Mzjp<-AYa| zLvg!^NU(RGD@-uz@XhDDK{-XbCR9}x_BTUw@$>y^i7$^{w@@;2D%e9ei3rw0hGfw}Sfez6Gx5n2`V%MKp9sVo3gGwb`0!S*tG6_J+ zteW8gPmejTtYbyQ2VEgS)^@awbB4-3LNQhU8t@R>egqR-ezwhT9d+CDU`&Pjp2Smq z`&u|@l~$dGf(NFXBs#OPh!;qr(LNyls!dGhfpp16mTmXK{RFZ7H7Jr^ls7CtxDXj+ zD^H>NZdfF}K>jQwI-PNZ?yYA>3YUJbg0zC%_{0>;=tPjH!$isv3my_iM!WX{N}0O#oJak9$QW+#V*>eo-QBesE@Ad@Om3q#puA< z>K#+)y9s?b$!WaOnMr^jMf zaZF<~5F8)Vhd-kkHid+JttzkD+dq8E zi_s+VMM+szF5V{$(0?~y+g$`6kODu$vNpAAsKv#}43obYc|-ChNFv8;tC(&P#!LKm z{%{+o%FbOYWzXv*oh~)J=zAN^dpi4oqf`^^nGq8E-v?GHAR*eE>okF<>eUP+0%_e0 zc64Yn3v-rA2i4Y4{+~tnuds9c0bh@OUyW!bYf!J@m}%4dou1m2Aw|4#iobgNTC6w9 zS`!Tra&T~HcFoDFFO(KHzhn=`;po&TC${_Hr5;`JS?=xDi?B)I+ihqNSORY9+h|#{ z395yPo;qpeSsz?YUyC({)Rw`yswuq)Bz;H58!++H=ejn3))Nni(K|B4#>tF`f*MU& zSXekVF}Aq%8=0CtwymJB6c+T8@x((EX!pjgB>Yf-1Fl%(iK2rpvO8b6IA9q8kM^qE6~Aap@7(y36<8sd)9*EK=&MdftLB-cwLeZW)?!ZXW*ABKF=Gd_5KP&o0j#mh7?X0YH#QaOU|w%RwIdwJNGr7Iyz8bC z$}%e`6isUnl-<_V(GlPOIU%&~cX)Pv`8LmOL68*FCs!m!Nw%HQBR(!(M9M)+D|$+$ z2w{)E;wpz`e;FO;eL3*N+AT)?4ABthAP%`OYN^cY$)FaLx0T1K#+7AD&A>6#GcnM= z&@Q$7vvU+47g@a^UWX2}hf%mDpOlgqxBTX$8Q)=IqCFVN7Jaj~&yCJs_^ z*Xr8aMUul0a!Adp?B;D@;9;w1sNo#2R7jX?np(O7#D5QFsxzdZ(Cr=V1*#lzfxh&I zgdoF;CIiHLC&_2jG?hi@C);A{@iK0<<~?j@ zHbcwJW)W(MubP!PHJ~edr1Uzm3VJLsPa2LyF!FC_MwChUTZPtUXv-=#xYBp5i^h-E1oB z4jsY19mx^a6in+w(d6khA@*y!cA)2)v&UzJ(;KD$I85+A`k56L8aBneD?=(QD2v+p zX1aaWpd}$PBgyc?`(W|xR|1{@_ou6H5rcpWlOmT$*R)jK98OrbuVqzL{>{Lv)cx)E#GM6(P}&KVH*LnYd&ZS7G~P?a?9?zDwvDG`OkZ zGe~?`(=BOLre3@Tiz3R)(em>|3Z9dTOHiOc=05hl=fjS|uv9KZMQpS9T3>t;K7Qx2 zLuF+!La@SB1f#+*kQm6_H8n@+7oC+u>MS1)s*SAtjwKLHcyMq^(|jNMb9`;BtG9P* zsu4+$uD_puk^jQUF_^*&8v2#6lr8*F_f+`N-tN|_>G8^%993WlB_$>KTXKmm2W<^C zwYpz{zsxNx8f^tklMTHdfiG3nHDzQNW5NA2;u+#VVl|%mG8^dg2I)gDuQTGiDLM(z zeqfSy4J%C_O~c&#X%KLK@4&xLWc0oBzPg=WbXK~Tt7)R#)(TQvr*I+%l8QrFc4?`# zqa!wzLYShen)>eU4zQ)*(+Nxh3Np8ezUjBLIDYEask(`}cjbI-pR_HD;~QX!101)0 z|8|pg2-Uori@($(%aVW%ObQDN2?_D>Y4qHhEH<}7DfxN=FOY$TD=)8rf{KcZhm%5( z(y84__8R?*P3KCT67{c%sY1~5r#Ww>mER7JFH<}n8_`n(19Lh%hZ2jm)fL=TlL~bP zT8WX~-{j?5s=~{{*g55Xa*%Dd?(W>#-=Wd&gvK*4)(@F}=XqkT%Knyth-p9w(E4$V zo3+<0vxutob64DvZ&g!HW@t-cImR#Yk76&r`T4B5IA+qjClm2yIzDnT0#xL6ybi;p z_?V^#IwnSXxY)T>E0l@(;HA_Ue4DQgW@RPT;pdLAD`Z$G8Q__X zxqV1LQvI1HX((dotNVM3r!B+npM_Q*Nx&nWW5UnT!xN*-htyw7NYHE6-D`B43HsAz zB@FbrHYH8Pgds@SF(caQ$|2jN>*gdce%7zM{g5bG_>p=wgXz*UMdF2j-;%Tbka-?0 zsL|r^)yq+3!eCTnH)8nfXrp@ruha`+)CvC+`Lh|lWzyC-UHpE@WWd;mF)}<~vc6s! zpDPL5AoLeJ!756GogXd3d>I%qYDKVUK`+|1{DVXaVM^*qBfimu*f#SuuaS%{+MPm^ zR;^%h<=~36hxu|JUrCNE+^l;|G-B?T4d5x7KX682G!%*;ogp1qnWbAFSv%sn-~j>< zXuG=l@bwZ5Wsn`+zb$kVvAzK17|@9&J5rfw%S#L2^e;^@(w!d-uV%hfprNJz*6R92 z_k~=+8KO{m$5h`S99AebQ&MW@66LtN9{;C0ewMbuKA%<1ePvP;fF97!&f3U!!i#@v z-Md<2zw7;u#|7A~S*qaWW38`m27SO)Wm#GQ_pI*_tV0EJ(>^$=e!H;L3y509u>|DR zse~4Pa}QXImoxXZhGQ}Rfpf4EKxY#U;D$$ZDqa?Ow|BOQMtg?UvPXwZyh z%{@SmjOIC?z_n%r18>@+| zv|4WV_I{*=TYbsMh~a~Jco?5Y1%IIEd|>xh(uU}`i#h=8F2bGKcV-3pU=o<`8OC$c zzr&7TC8zgL8oMP$NANN6*7er5Jjm@0?YG&`dmhuGlzKPq>G}r-D&FW<`WDX$zCU=% zZ{Qry?f>y&1YfzcZ5UXAEJIMwBgf!0)G*f?xgf3+9J55CNG2mYb*YQ7!ftC@crscx zi$7aBJUgo~`m_C)c17{)Cf^~wY3eN;B{hxQ!aO?`R(mlSvCGTLHrzJm_U+r>H!&r= zq2cuiYcMyr)wP1~P`=A=)p5lq){*e$bop(M;P5w>FJ1QVw(!s?lJHYjbly=>Zf=$` zEPQRj+%z?@QLKz5Mui=QgN23DQSuTtM#U0~6O6z+JluuZ^aG4YnQUxtnlQ6|0v0pS zf2g{cxxT+|DvS=k2&_8W+1-WZw=5~P8v;APmx$9(EW`@9o%iEI*>spQ69J;4qTR58 zZo*07#l_=6KeX zXUCn=Jskv5E30D=5fQE6Xy73h{pjy&v$J-_(n8_0lsdN0H1cSfsGf1^?H}|#pt{3d zVk3KOBSgP%VRn~9*VJQSp(yevF{Wo=n4KLRm*#X{sj(lDAF5e(;kka#?MLAooSs(N zRih7H+3B;15$S$wPMR`$bhL*{oVOQ0hQ=1Jj++1=E+v^JCMIyMaA_jIS!`RES5O!p z9t69XkOJDlel5%BH;YEf=HDa7W0Y|%tVrH;Ci*;XqhIwa3grq3zq@iYGBWD4nH9ey3IsZ=!kbk!%8TbT8o|NJL_lvh2|Jvg1Kg|}g{)5F^2wMbUi~c{wjUa3h zge`)wMG&?K!WKc;A_!XqVT&Ma5ri#*utgBI2*MUY*dhp91YwIHY!QSlg0MyZe`Jd| z|26mYKg|}g{R8(jge`)wMG&?K!WKc;A_!XqVT&Ma5ri#*utgBI2*MUY*dhp91YwIH zY!QSlg0Mvpwg|!&LD(V)TLfW?AZ!tYErPH`5Vi=y7X6QBi@5%^#oK?HEn@!%i?WB1qgK2T0r^NZcYw+#*QaB1qgKNZcYw+#*QaqQCiu|L0Rk+#*QaB1qgK zNZcYw+#*QaB1qgKNZg|TtK$}N|7-5)f0`}g_y_K32wMbUiy&+fge`)wMG&?K!WKc; zA_!XqVT&Ma5ri#*utgBI2*MUY*dhp91YwIHY!QSlg0Mvpwg|!&LD(V)TLfW?AZ!tY zE&3nN7V-RRi?{zYTg3Sf7H=VJ5ri#*utgBI2*MUY*rIF*TLfW?AZ!tYErPH`5Vi=y z7D3n|gya6Fl2V}K&3ZIm@xCVCH}_o#TLfW?AZ!tYErPH`5Vi=y7X4q%7O^n@YyRnf z8ZP4c2mWaYTm*rOAaD@`E`q>C5V!~e7eU}62wVh#iy&|j1TKQWMG&|M0vAExA_!ar zfr}t;5d2h9OQ$ArorPfOxDH};(G=1w}WuE@CF;$1$T(41ckP(J25 zRD@~%Oq2nfD*E7FWJzLa_HFU0p>oHn63YE;=~g@W`u1Dv^otM{5<@6%lChyG2Ct`} z*%n}^NO#W~W4!J8H?6Qc((MEu!5riyuESb8t$RBqm>WvQZWQNvBb3IOf|UH`U&}tS zFN?3Z-z7Ih;lOKq7`b8WnFw?m z@F`L4CAf6_1WMiOXwI8B{G>IMfe}kKsTm~;m@P;^sW*Jcn{yi(!0^p-#BAkvp^y4f z@?QN8x=^#8%mVKwF#&JHY(Gj{nZ$x`xkxiKA~w4OC|}r^Z_D`(DbIuTMI@Y9e^21O zxhF>MsB*!f5p-Uh>HtfsK-Lk-1T>z!DRTQhpJ@bpn3V~i$Vp{|>ER05)L-d0ka&A!t*^{7BHRFhUn!%`}u%iQjk zEobI#VZz^yWaPRj1&|iTQ6RFAQr@i^DTIDMvLw~7?W})1ir3EY3Hrd|9guEs5aFnQ zeS!KLFQn{x0ny0X@J|{7AKB=eBeJmmTa?WMQ8q-`|7K;g{acjH!VHl%MB4vmX|w-V zX-n8xJBk@O7}%THI@;JXJ)izzPzc+b=~>c?*jO4eNm%NcIFPcj{&7=8#Ku*Nft`zo zl!1$jos@;0otcz{nVC~ZK;X~A0>|Iiz@*}&@91W0#H4EPWTg6h@JA&Q%uFJB4o1(F zGW}JB;?@Q>hGy0#OaL=$VQUAoznn{$**iFjn(Em!=N}t4HxDT%Cl@IT4+|+98{2_n%{;@qDbAru4E^x!1uRUkxU}pX!-E&3U;0FAW7JU7We1A&! zmwezn&&NFM?0+fyxx7C&;(5=`3I2n?Wj+6ae`!3pjz1s&QRkoD5Z325fVby1Jnx_D z|8u%O@Bb+0`TlcTpYwr_pWFDn|I?=PhaUb@J76t)=0A|BcfAX_5U+ zJkUQR^uG}q%ioBM=`VWvZ|aMa2coZkzrMg)20x&j%zx~kAF$_tgZ)4Ic~eSN5}%|4P|^^KJi1dL~9pN_zIs^RZ`}SeZ%Q$k0shxu<+?F9#0? zBP$oUcy4wUMsRP$tplFhSv!JLJAgZ-Khi53IoLSa8yGq8@%`~&Jnz8$>T^%3%p}1= z%JM9Ca08e=D(agX890LPOM~B-KWo9W*5!N>81XgRyBpi(Soy`RSW z=Wm8fE5=B^>weE%?5sNG8Fy)349}jOIp|M#o11w2d#JV1JT&S=(YR{nsNu=;-OT=1 z-gFYv?z9(IKS@|OD&k-KxbXn_pPKT)kWDnL^FFmFjaoLmmZl)2@l+bQCZgb>9~R_S z-FNzCGb7*7d*-Ctt#Rvin=y9j2+I7~Uw%<)zGH##J9O6Yw=U|Mm>Jnl!vP5fJ-C@X#Otj>@#$)U*<(h5QNLNd;EZTv!}U%NX)c)DT?n6D zYwGQ^g+Y~$tOQz5$=iE{N254DXA1YaIs+bP*@m21xZpZ{AhRJ6^U9SaylX<75Rqgb z&4J88v}0R)o5ffP-b>K5-3OsS>X}Hz&EMT(zavNdXzT9YwW;C6#Azfa``U}vUk}4g zb5Yg5kmeqOfuCvoqOj!tBIBjZi$y5-n_S;P=7`u0=*qV@Pta(7qQdQyccf<1s4&4R zT4-*188gRc2eTWSfjUX7A+c*^rOiRjnIph!2lozLIqPGBzQ*HGUlr~Lumd4DDA~nA z^Ic|4R<{E z=*$kA3bv(!vG~fw#|Hr;?b5-6;I{pQIJ%BO@Ad5ByDyf)7#o*7FM)|l4yGY*y_9Jt ztuwGnSyjpci$5fY=T4@POI(vZrgqtiR=}i)yS4bJ#~a1PVX2H)$9oc{{Mr~m2@QFb zYBGV@)fZz9YmKY&&KUOOu#^G?@E%_JWS7F4E!1D|B&-qIl!|(ZI@74 z{KC+b20i1uRo6Aj=fEH#MC)ioLqRCt{J?>%O+jvW^Iz95cBgtvXIrE*ip~<(8zpq> zy^zE`y;ug;x6e$!e+kQPR`<8%-tYYOODqT)c`Tr6QEN5k*z&1=57wtWYay2V_7E+M zKhep_cQ(G$A_aJ)`6j|UM*A-G#V*Aj31S*7i`wI zX?OHmShJt&^2+RH%aeN5chRD+lwqaYj99S`onaU&o#MiaxOrme+`}<$w6ODaekxsv z&CpSbBxD4STF4su?MS{Dx6ywi|6(q;?d6w?hO)6Me-^{Z&+1}*@Z*PTSx~4seheRP zcz52IHlDG|P^W5U7*D2SoYii#6;{2?TrJpUm5$%+kXjXrC1OT6T}9A9TVb-tLV1-n zFd9{vgkAc!?u!0I*8<6peDBUubEdhh$@Eo(4S@c3q7}|WM`|EtMnerB8p8%e!iUNJ#d_7UVLch!*T8YI|cb6!jRZ{wu;v^v>v39yJ z#%d2XK=Va)kqdTfsO4fMB65dk&$Xox`4APQPJD+b;2+W6z-= z*(y9vfp%LX5_ms+S!qc`q65<0b>c^{U$6Z5znu_I1nEK@dQ2^f?(!^sTCLdfxt2sq zG6Zcw;fi1?RH2K=>|pHTiu}Z4@Q;#Y_~n(2(a`tp^~~k1O=#qobaq=PJV1uERoOkf zA{Vbud%&#$92KQ^aHa(hEB#K9PZC_c`f-`R_Fr z9g@{--xvbWB&&D*RE+}Cf-HTg;`nyn1Vj-{p}vC!r7U!51Iq?>DDv#=#_q@&er(*B zm<@i7(w#}*mdIDo5ny%>>%sA_4l?>(+Y@!m>+9Tz565UCx&qdVy;E1488#o|7|pMV;aLE! zp=Hi}uru?ACvOh@Gp;1F3fzmMm8SZJyc=F@K8}>eUjFDOd(h>oj0%$fknGe(dHTMF z37J!z|%ZoQDsuVAPl}*}hXt2!c zLggW3O!B1xBNx+l4p)LN1r^z{{@%65`fN9VeQe-y{*T$x|AB?U`s{yJX8Knw46wQX z9~K51>7NXszp^m?DEB{__`kO>SXsGQ{>sA0P0@0g=fv$xO_09 z5Zh2#>zq8B)H2FyWG5QU$Yj^FY}&d>yweRefQh=(^;v>gz_ZUl{emO^U)!Yr!kIB%Y+T^=n!6ij#_tcZ#nRmu~GAl$kvU%E!3_{fLt){Dv_Mf37tdmPxYw=Qp6Bh?5m4l^Z zkXAL`(DbdYE}~68=tpxX9*A-jrHD`AI9;v}c~fq4F}+zEjyeZz_6sITk^iq!tvV{I z_3etJLr6G;Gz<*_Q!s#pbV&>_w1}W0-Hl2}gES~1Dbmf*EinozA>Bx~fRf*M?^@rz za__gkwSK?z*O`6ZHScqtb=JH0+RqL_mps}WAdK%-pbanHsV&MM)kR7zsvq6>(*DEn zUG4bmx4}N8az!AvT^NdMqhKX(|1|3CK-TD?$#nB(7});ec$XBDH`r)vfU2xWca486 z*I0d;`Q&78XLc_6P0(=QKkf)d+{QdIxaVeLvIS~U>SyP>#^hQM zMMxZeu4yKe@!jp$e{Ln2r;5-(a2+26rAs7Dc%$y6y8zRO||Q-f8Xe<0)u3jv11v(vHy*a~&7VJgVNCzTo&S zX>)z&giAd=2xPUUJe@T5G<+?c(FW)P8KtgL&KZ$){A>}P8l&@N_yx&fPPH7sGcY^% ziGxdwdV@oEeBXc-gb6e?yWx9CuA@IQ$OEkxkL6phtM7Yd_Ee|ctCGRk4=%=cdum%f z24!+1Z!9Uz(&TW^bxLG#R;T-g;y8WMKpRdd&ZJHoCw*k!-1`;^PEJZr>hzW!yv|Sh z(R**}LL%;-noC+&ocm4VH$AzPJ(R|}GbvK0*tgwoE7uw$!#l98OUCsAkO$eIgR}Va z3C^s0&bWG_SsS9b)GQVY3>3sjd5i}-0G4+EUk?a^0(r1M2jk4V5-Xq{(8iE zVX57<87a=5t5xTCcjfx!w5rS05;w>a3wCZ=IyR!7HWTwCrrBt=o(B?gVEn4owP;28 z%j&Ovs~*biq;{oJdp!2?#m;22s6KUjTMc*x-LvQv`2DS*J10~ zfM|6L1=w_sQvgSke{MEOI$!YftE;pw5|$BO_}S&+Eo*Yj9o$jLMDH>{G1~D<+-ifpnCoqq5xD*_I$yh64#v)bEv;RU+k5VHvF-(WFmolZJyg^h4a7p1k5MSv%(CYm{Sf@kt);Ex86L<6+kdC&yQ0h6mhkUzMBO7DF zS6a8X3f8r?B@{)i^UPpz!Wtw2hCmIKegG@;!f(K49D=kdTJ5v zWUcY!Wb#6Gq^7Gj)fe{yn$bhh`^d(Wx8=*!m6m~V%dtTQcG;XZJ=y(w?&gjmQz?r< ziP7pZRSR?P?fo|zblWZmnBzmYi^vBX>hqjq<%G1f$qBd9OjODT$7%=s(@X$S;tggk zOM{+zK;-C+H=G(u206I6?2$H3NzDmOA~y2%4av3{_(M?!8Bd}L|u&gF5!pIR(KRb;l>%>Rbgs<1jF zxL1FQyQBu?V}G)j=u%EYkJy>R*(=a=o;+@lka}R?+C6Pz{AJtPJCA!3V0HA+$3UuL zpVxjXfLzdoUG=2JNIE@eY`rc&@1@uDU7jY-RR+4jlRlOFsfuw4XUpo!pvCELG28EM zw|xMUbY;tx`Y8jlelUv*((E_mXzM-bDajbWb+1Y@x@=7bdCuMyG#rJ$GqT`rqieKV6^yM9)E4{;-h*m0>Ku94xG#^mje?*Uanx)^oxzA<@6< zIfDtO2{^g$SJZ~;qj5SEL2F^USPXmjDyW2y_rOKW_YH@VxjH34N&Yj}fndX@Amdkg zA)E_@6XZt>8@@@$5<7Is%t@s9q9pz?A|xGJMEe`ZO~m%Yvqx>qG9G?Jlda5Qt^2R9 zdvy~=Ypf2pIo0yWtr8zddS=E zA(YPAqlWVNT1$c^u3VB9_2HI#_LbN_lbx37mS04T0rmRr#(^Z2M~*4^r*Frk0(v)G zSsp$kY2k`GC2E^u4TyG(?MRb*=F=s}5=d~x7Km=V>Vfw3Cf49p&VBLDSVdsJZliNn zk%S2@<3THm6R}X*3v3L{(irRo2F`u5lp39Ue;@c#>nf*#cA~bCiGR=C1$oLO6&iS| z&(PweO(kh3pZLenowuh5RD-HrA-93P@|2w=pz`c}R20!o4VtnrTbJ*OGU~~8xiD5B zL$_4~PI=iJ@!2cE{yB9mkUS5KSx>RYwbqO;DqXFGpy@s~gQYUHVKS8z+ngv&aSc1C z&>`7@Z*7YFTarNfge1eR#f!QeEs_RZDxPIJx8YH2CO)3@1$sW z6WV_#yPugave(?=u1U_f@0EGtwQ|2K+n&a)2(R-6WTgj6RWXu^7dZCvcwT%Vgv~`U zW;xPOODF9>A!hl-NAC9TeKnriTLTL9k}kvE23zrdHJ>NmdH0X?*PMm*O&1!wz1u+B zCJa0}<0TF*;GfOzq|2?Prc*W*w}|cYd>2<&uJiE9n<_2v<1D$yX;nsHnviLdIn_8^ z4t6K%G}@F&3UA$BBEreN$9zSb6|EjMbytEKQ1DZk+i^|d;r8D+gn-)^X>j3tjC4W2 z7jEyt`gr&4)M@4i&b`P_Mmb^Y!MEMlZM)1$s@V5O#m@Nn_Zi29m}G}=-)v{%EUb$+op9y zp-z@U%ux7u>j9qwS6qr0s!fd24)|0lskzjU1rC|)DEaLV!!+;8`?q)FJ}5ZHhHn#l z_%yy&C_^Yf&4SxKnJ;wF%?T&>iZ-OLYy*vDodc{5786e0iuNZmf1Uvaz)=`i$5FMc z>Tv@Sy!Ut@o<{`{Nc7A@jmO%ldt@GZPpHUWYx61`%wG8SZ`dPhHi(TRS@Wcus03cA znLYZ>Xzw!qd0A}id*g~&-hMuhSNTYWMJF+xb<+o!hs85-XJ)gT!X((GJqmQ*Y9nMw?q zO1-v3n;5@It4Y^yu6fxuqz@c#t__t?wMPKy5gKKX$-WMw5307g1gv}ovjm{j{#G!kLsboBKA5qJYIn^IRb2w=Ch3vx% z9Hf(f=)X|er2+DPLuG&33;zj~!CC$soG4@Gy1%>Gk0hkYje z2Vee9b-(ug|EvoH0`fPeD@I}5uuS)BHbR%N!0jWIm{>ft{0 zWR3YiP@F|xNiv2$rEl&I8_{GP1nqoH5>Zb44KNjY{W-BRwfYyRDr>3aWTyj1sz!fY z99bNT+I(n++%6_mS*kpP19g{Nnv&~mpL?`qXP3e?al+uJ1;JDmB_0|F#dD9HXE@!3 zeBCC0sWQ(bmn=4j@6VqTc{GZZmrf_{55sLO-SKsr4~-!-onqEe zWO&4wY!y*uA8`@cls0YqoP5txe>flZB#^ocdY!r7;REmT=th=Hh(n}7l^pWg&leE6 zmv6UKl1WxjvCu;8j&U22LKKl`3%Y{{VHcM5Cg_UpeXD5P_gfWC_RmLjO5m+@3M=I^ z#)MK8$8;56IT8}2i&tXCylc2hc~(P~vPQHW@K@lUv`G3|z{{EPz2xdzD;{KAuCrqjrLa6nIFeZ{wqbIKM4hLQeo8%5&bqgNJ{WNdjpsr75t&J=(bqtbiYw965nq<%N9+7A^r;`!ciy3wa zB$Y3zj%X0n6w>i$DjXLgJ}hGj9Z}F*c%wb>^WHtBnd32fvnfU!oBM*I?IT3TYIB=! zGlVK?gifY$c9~5F)2J5D#%70(3~3EzlfhaPxo+oo+)^T3>ps<{?+{{gpJCP;7Ge~c zjAd!AZz``uiq^fb`y#j*RpProLlGL(Y|guU2NrwIZ)-uGlM;jwNMcbC4_PpiyCsu) zs2a%~-k$F;TXA7)?vK_)Xf={PbDvCM{vZhdWco}l`&<@3^CF0@v`>MTwDNohynSlg zCQz8nGke=am?7rJQR!0b(hbS4Y5_DE9mXBoSw%oKdY=w$9cI0gZD+KtT6FE5HA;K# z65%Ta7P0M{A5>Wy(+_)33HeDetN%RaJe({a0*fCTe=vv$HpFuYgF;3Afx*DoH@&nD_Rm^ia3S#JzQEux=sz(y^m5HG z5imAdbZHL}=s(vXpy12zhMlvEUhWlK7y-H5Q#cqKSh} ElmhurstSiteNotes: return ElmhurstSiteNotesExtractor(pages).extract() +@pytest.fixture(scope="module") +def result2() -> ElmhurstSiteNotes: + with open(FIXTURE_PATH_2) as f: + pages = json.load(f) + return ElmhurstSiteNotesExtractor(pages).extract() + + class TestSurveyorInfo: def test_surveyor_code(self, result: ElmhurstSiteNotes) -> None: assert result.surveyor_info.surveyor_code == "P960-0001" @@ -448,3 +458,58 @@ class TestEnergyPerformance: def test_co2_emissions_current_t(self, result: ElmhurstSiteNotes) -> None: assert result.co2_emissions_current_t == 1.683 + + +class TestWindowsWithFrameDetails: + def test_window_count(self, result2: ElmhurstSiteNotes) -> None: + assert len(result2.windows) == 8 + + def test_draught_proofing_percent(self, result2: ElmhurstSiteNotes) -> None: + assert result2.draught_proofing_percent == 90 + + def test_first_window_glazing_type_excludes_frame_type(self, result2: ElmhurstSiteNotes) -> None: + assert result2.windows[0].glazing_type == "Double with unknown install date" + + def test_first_window_frame_type(self, result2: ElmhurstSiteNotes) -> None: + assert result2.windows[0].frame_type == "PVC" + + def test_first_window_frame_factor(self, result2: ElmhurstSiteNotes) -> None: + assert result2.windows[0].frame_factor == 0.70 + + def test_first_window_glazing_gap(self, result2: ElmhurstSiteNotes) -> None: + assert result2.windows[0].glazing_gap == "16 mm or more" + + def test_first_window_location(self, result2: ElmhurstSiteNotes) -> None: + assert result2.windows[0].building_part == "Main" + assert result2.windows[0].location == "External wall" + assert result2.windows[0].orientation == "East" + + def test_first_window_performance(self, result2: ElmhurstSiteNotes) -> None: + assert result2.windows[0].data_source == "Manufacturer" + assert result2.windows[0].u_value == 2.70 + assert result2.windows[0].g_value == 0.76 + assert result2.windows[0].draught_proofed is True + assert result2.windows[0].permanent_shutters == "None" + + def test_fourth_window_orientation(self, result2: ElmhurstSiteNotes) -> None: + assert result2.windows[3].orientation == "South" + + +class TestLightingLedCflUnknown: + def test_total_bulbs(self, result2: ElmhurstSiteNotes) -> None: + assert result2.lighting.total_bulbs == 10 + + def test_led_cfl_count_known_false(self, result2: ElmhurstSiteNotes) -> None: + assert result2.lighting.led_cfl_count_known is False + + def test_low_energy_count(self, result2: ElmhurstSiteNotes) -> None: + assert result2.lighting.low_energy_count == 5 + + def test_incandescent_count(self, result2: ElmhurstSiteNotes) -> None: + assert result2.lighting.incandescent_count == 5 + + def test_led_count_zero_when_unknown(self, result2: ElmhurstSiteNotes) -> None: + assert result2.lighting.led_count == 0 + + def test_cfl_count_zero_when_unknown(self, result2: ElmhurstSiteNotes) -> None: + assert result2.lighting.cfl_count == 0 diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index 0234a29c..3b2c279f 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -53,27 +53,27 @@ class BuildingPartDimensions: @dataclass class WallDetails: - wall_type: str # e.g. "CA Cavity" - insulation: str # e.g. "F Filled Cavity" + wall_type: str # e.g. "CA Cavity" + insulation: str # e.g. "F Filled Cavity" thickness_unknown: bool u_value_known: bool - party_wall_type: str # e.g. "U Unable to determine" + party_wall_type: str # e.g. "U Unable to determine" thickness_mm: Optional[int] = None @dataclass class RoofDetails: - roof_type: str # e.g. "PA Pitched (slates/tiles), access to loft" - insulation: str # e.g. "J Joists" + roof_type: str # e.g. "PA Pitched (slates/tiles), access to loft" + insulation: str # e.g. "J Joists" u_value_known: bool insulation_thickness_mm: Optional[int] = None @dataclass class FloorDetails: - location: str # e.g. "G Ground floor" - floor_type: str # e.g. "N Suspended, not timber" - insulation: str # e.g. "A As built" + location: str # e.g. "G Ground floor" + floor_type: str # e.g. "N Suspended, not timber" + insulation: str # e.g. "A As built" u_value_known: bool default_u_value: Optional[float] = None @@ -109,7 +109,7 @@ class VentilationAndCooling: passive_vents_count: int flueless_gas_fires_count: int fixed_space_cooling: bool - draught_lobby: str # e.g. "Not present" + draught_lobby: str # e.g. "Not present" mechanical_ventilation: bool pressure_test_method: str # e.g. "Not available" @@ -125,15 +125,19 @@ class Lighting: @dataclass class MainHeating: - heat_emitter: str # e.g. "Radiators" - fuel_type: str # e.g. "Mains gas" - flue_type: str # e.g. "Balanced" + heat_emitter: str # e.g. "Radiators" + fuel_type: str # e.g. "Mains gas" + flue_type: str # e.g. "Balanced" fan_assisted_flue: bool design_flow_temperature: str # e.g. "Unknown" - heating_controls_ees: str # e.g. "CBE" - heating_controls_sap: str # e.g. "SAP code 2106, Programmer, room thermostat and TRVs" + heating_controls_ees: str # e.g. "CBE" + heating_controls_sap: ( + str # e.g. "SAP code 2106, Programmer, room thermostat and TRVs" + ) percentage_of_heat: int - pcdf_boiler_reference: Optional[str] = None # e.g. "17742 Potterton, Promax 33 Combi ErP, 88.30%" + pcdf_boiler_reference: Optional[str] = ( + None # e.g. "17742 Potterton, Promax 33 Combi ErP, 88.30%" + ) heat_pump_age: Optional[str] = None @@ -147,7 +151,7 @@ class Meters: @dataclass class WaterHeating: - water_heating_code: str # e.g. "HWP" + water_heating_code: str # e.g. "HWP" water_heating_sap_code: int water_heating_fuel_type: str hot_water_cylinder_present: bool @@ -157,7 +161,7 @@ class WaterHeating: class Shower: shower_number: int outlet_type: str - connected: str # e.g. "None" + connected: str # e.g. "None" @dataclass @@ -172,7 +176,7 @@ class Renewables: solar_water_heating: bool wwhrs_present: bool flue_gas_heat_recovery_present: bool - photovoltaic_panel: str # e.g. "None" + photovoltaic_panel: str # e.g. "None" export_capable_meter: bool wind_turbine_present: bool wind_turbines_terrain_type: str @@ -192,8 +196,8 @@ class ElmhurstSiteNotes: co2_emissions_current_t: float # Section 1.0 - property_type: str # e.g. "B Bungalow" - attachment: str # e.g. "E End-Terrace" + property_type: str # e.g. "B Bungalow" + attachment: str # e.g. "E End-Terrace" # Section 2.0 number_of_storeys: int From 01ebb2e0e1049782b40d71601640b29eb7285045 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 16:04:02 +0000 Subject: [PATCH 23/24] =?UTF-8?q?extract=20window=20frame=20details=20from?= =?UTF-8?q?=20elmhurst=20site=20notes=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../documents_parser/elmhurst_extractor.py | 22 +++++++++++++++++-- datatypes/epc/surveys/elmhurst_site_notes.py | 1 + 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 3063e358..e78d98de 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -230,7 +230,7 @@ class ElmhurstSiteNotesExtractor: i += 1 continue i += 3 - # Collect glazing type until frame_factor (0 < v ≤ 1.0) + # Collect glazing type tokens until frame_factor (0 < v ≤ 1.0) glazing_parts: List[str] = [] while i < len(tokens): try: @@ -241,10 +241,21 @@ class ElmhurstSiteNotesExtractor: except ValueError: glazing_parts.append(tokens[i]) i += 1 + # If last glazing token is a single word (no spaces, not numeric) it's the frame_type + frame_type: Optional[str] = None + if glazing_parts and " " not in glazing_parts[-1] and not glazing_parts[-1].replace(".", "").isdigit(): + frame_type = glazing_parts.pop() glazing_type = " ".join(glazing_parts).strip() if i >= len(tokens): break frame_factor = float(tokens[i]); i += 1 + # Consume glazing_gap if present ("mm" token, possibly multi-token e.g. "16 mm or more") + glazing_gap: Optional[str] = None + if i < len(tokens) and "mm" in tokens[i]: + gap_parts = [tokens[i]]; i += 1 + while i < len(tokens) and tokens[i].lower() in {"or", "more"}: + gap_parts.append(tokens[i]); i += 1 + glazing_gap = " ".join(gap_parts) building_part = tokens[i]; i += 1 location = tokens[i]; i += 1 orientation = tokens[i]; i += 1 @@ -268,6 +279,8 @@ class ElmhurstSiteNotesExtractor: g_value=g_value, draught_proofed=draught_proofed, permanent_shutters=permanent_shutters, + frame_type=frame_type, + glazing_gap=glazing_gap, ) ) return windows @@ -296,12 +309,17 @@ class ElmhurstSiteNotesExtractor: ) def _extract_lighting(self) -> Lighting: + led_cfl_count_known = self._bool_val("Number of LED and CFL Known") return Lighting( total_bulbs=self._int_val("Total number of bulbs"), - led_cfl_count_known=self._bool_val("Number of LED and CFL Known"), + led_cfl_count_known=led_cfl_count_known, led_count=self._int_val("Number of LED lights"), cfl_count=self._int_val("Number of CFL lights"), incandescent_count=self._int_val("Total number of incandescents"), + low_energy_count=( + 0 if led_cfl_count_known + else self._int_val("Total number of Low Energy") + ), ) def _extract_main_heating(self) -> MainHeating: diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index 3b2c279f..eec22a27 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -121,6 +121,7 @@ class Lighting: led_count: int cfl_count: int incandescent_count: int + low_energy_count: int = 0 @dataclass From 51bd18e0d7ab4f346e26d7f3a32422b5d73a7aa1 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 27 Apr 2026 16:11:32 +0000 Subject: [PATCH 24/24] =?UTF-8?q?Rename=20window=20frame=20material=20colu?= =?UTF-8?q?mn=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/db/models/epc_property.py | 4 ++-- .../tests/test_elmhurst_end_to_end.py | 24 +++++++++++++++++++ .../documents_parser/tests/test_end_to_end.py | 16 ++++++------- datatypes/epc/domain/epc_property_data.py | 2 +- datatypes/epc/domain/mapper.py | 13 ++++++---- .../domain/tests/test_from_rdsap_schema.py | 4 ++++ .../epc/domain/tests/test_from_site_notes.py | 8 +++---- 7 files changed, 51 insertions(+), 20 deletions(-) diff --git a/backend/app/db/models/epc_property.py b/backend/app/db/models/epc_property.py index 49901644..2a47d57d 100644 --- a/backend/app/db/models/epc_property.py +++ b/backend/app/db/models/epc_property.py @@ -593,7 +593,7 @@ class EpcWindowModel(SQLModel, table=True): id: Optional[int] = Field(default=None, primary_key=True) epc_property_id: int = Field(foreign_key="epc_property.id", nullable=False) - pvc_frame: str + frame_material: Optional[str] = Field(default=None) glazing_gap: str orientation: str window_type: str @@ -615,7 +615,7 @@ class EpcWindowModel(SQLModel, table=True): td = window.window_transmission_details return cls( epc_property_id=epc_property_id, - pvc_frame=str(window.pvc_frame), + frame_material=window.frame_material, glazing_gap=str(window.glazing_gap), orientation=str(window.orientation), window_type=str(window.window_type), diff --git a/backend/documents_parser/tests/test_elmhurst_end_to_end.py b/backend/documents_parser/tests/test_elmhurst_end_to_end.py index 53c81164..977ea138 100644 --- a/backend/documents_parser/tests/test_elmhurst_end_to_end.py +++ b/backend/documents_parser/tests/test_elmhurst_end_to_end.py @@ -11,6 +11,9 @@ from datatypes.epc.domain.mapper import EpcPropertyDataMapper FIXTURE_PATH = os.path.join( os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json" ) +FIXTURE_PATH_2 = os.path.join( + os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_2_text.json" +) @pytest.fixture(scope="module") @@ -21,6 +24,14 @@ def result() -> EpcPropertyData: return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) +@pytest.fixture(scope="module") +def result2() -> EpcPropertyData: + with open(FIXTURE_PATH_2) as f: + pages = json.load(f) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + class TestAddress: def test_address_line_1(self, result: EpcPropertyData) -> None: assert result.address_line_1 == "19, Queens Road" @@ -330,3 +341,16 @@ class TestEnergyPerformance: def test_co2_emissions_current(self, result: EpcPropertyData) -> None: assert result.co2_emissions_current == 1.683 + + +class TestWindowFrameMaterial: + def test_frame_material_from_elmhurst(self, result2: EpcPropertyData) -> None: + assert result2.sap_windows[0].frame_material == "PVC" + + def test_glazing_gap_from_elmhurst(self, result2: EpcPropertyData) -> None: + assert result2.sap_windows[0].glazing_gap == "16 mm or more" + + +class TestLowEnergyLighting: + def test_low_energy_fixed_lighting_bulbs_count(self, result2: EpcPropertyData) -> None: + assert result2.low_energy_fixed_lighting_bulbs_count == 5 diff --git a/backend/documents_parser/tests/test_end_to_end.py b/backend/documents_parser/tests/test_end_to_end.py index 5278c002..c413b55f 100644 --- a/backend/documents_parser/tests/test_end_to_end.py +++ b/backend/documents_parser/tests/test_end_to_end.py @@ -71,7 +71,7 @@ class TestPdfToEpcPropertyData: ), sap_windows=[ SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="North West", window_type="Window", @@ -84,7 +84,7 @@ class TestPdfToEpcPropertyData: permanent_shutters_present=False, ), SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="North West", window_type="Window", @@ -97,7 +97,7 @@ class TestPdfToEpcPropertyData: permanent_shutters_present=False, ), SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="North East", window_type="Window", @@ -110,7 +110,7 @@ class TestPdfToEpcPropertyData: permanent_shutters_present=False, ), SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="North", window_type="Window", @@ -123,7 +123,7 @@ class TestPdfToEpcPropertyData: permanent_shutters_present=False, ), SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="North East", window_type="Window", @@ -136,7 +136,7 @@ class TestPdfToEpcPropertyData: permanent_shutters_present=False, ), SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="North West", window_type="Window", @@ -149,7 +149,7 @@ class TestPdfToEpcPropertyData: permanent_shutters_present=False, ), SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="North West", window_type="Window", @@ -162,7 +162,7 @@ class TestPdfToEpcPropertyData: permanent_shutters_present=False, ), SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="North East", window_type="Window", diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py index 6e6f73a7..8795b389 100644 --- a/datatypes/epc/domain/epc_property_data.py +++ b/datatypes/epc/domain/epc_property_data.py @@ -95,7 +95,7 @@ class WindowTransmissionDetails: @dataclass class SapWindow: - pvc_frame: str + frame_material: Optional[str] glazing_gap: Union[int, str] orientation: Union[int, str] window_type: Union[int, str] diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index ab931305..1ce4c73c 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -268,6 +268,9 @@ class EpcPropertyDataMapper: "None" if not survey.renewables.wwhrs_present else "Present" ), any_unheated_rooms=survey.heated_habitable_rooms < survey.habitable_rooms, + low_energy_fixed_lighting_bulbs_count=( + survey.lighting.low_energy_count if not survey.lighting.led_cfl_count_known else None + ), energy_rating_current=survey.current_sap_rating, energy_rating_potential=survey.potential_sap_rating, environmental_impact_current=survey.current_ei_rating, @@ -941,7 +944,7 @@ class EpcPropertyDataMapper: # 20.0.0 SapWindow lacks frame/gap/draught fields present in later schemas sap_windows=[ SapWindow( - pvc_frame="", + frame_material=None, glazing_gap=0, orientation=w.orientation, window_type=w.window_type, @@ -1118,7 +1121,7 @@ class EpcPropertyDataMapper: ), sap_windows=[ SapWindow( - pvc_frame=w.pvc_frame, + frame_material="PVC" if w.pvc_frame == "true" else None, glazing_gap=w.glazing_gap, orientation=w.orientation, window_type=w.window_type, @@ -1352,7 +1355,7 @@ class EpcPropertyDataMapper: # SAP windows sap_windows=[ SapWindow( - pvc_frame=w.pvc_frame, + frame_material="PVC" if w.pvc_frame == "true" else None, glazing_gap=w.glazing_gap, orientation=w.orientation, window_type=w.window_type, @@ -1613,7 +1616,7 @@ def _map_extension_building_part( def _map_sap_window(window: Window) -> SapWindow: return SapWindow( - pvc_frame=window.frame_type, + frame_material=window.frame_type, glazing_gap=window.glazing_gap, orientation=window.orientation, window_type=window.window_type, @@ -1740,7 +1743,7 @@ def _map_elmhurst_building_part(survey: ElmhurstSiteNotes) -> SapBuildingPart: def _map_elmhurst_window(w: ElmhurstWindow) -> SapWindow: return SapWindow( - pvc_frame=w.frame_type or "", + frame_material=w.frame_type or None, glazing_gap=w.glazing_gap or "", orientation=w.orientation, window_type="Window", diff --git a/datatypes/epc/domain/tests/test_from_rdsap_schema.py b/datatypes/epc/domain/tests/test_from_rdsap_schema.py index 9e6fa0b9..9e86ae42 100644 --- a/datatypes/epc/domain/tests/test_from_rdsap_schema.py +++ b/datatypes/epc/domain/tests/test_from_rdsap_schema.py @@ -481,6 +481,10 @@ class TestFromRdSapSchema21_0_1: # draught_proofed: "true" assert result.sap_windows[0].draught_proofed is True + def test_window_frame_material_false(self, result: EpcPropertyData) -> None: + # pvc_frame: "false" in fixture → frame_material should be None + assert result.sap_windows[0].frame_material is None + # --- sap building parts --- def test_building_part_count(self, result: EpcPropertyData) -> None: diff --git a/datatypes/epc/domain/tests/test_from_site_notes.py b/datatypes/epc/domain/tests/test_from_site_notes.py index ed4bf1ae..ff25933c 100644 --- a/datatypes/epc/domain/tests/test_from_site_notes.py +++ b/datatypes/epc/domain/tests/test_from_site_notes.py @@ -398,7 +398,7 @@ class TestFromSiteNotesExample1: # Windows sap_windows=[ SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="South East", window_type="Window", @@ -411,7 +411,7 @@ class TestFromSiteNotesExample1: permanent_shutters_present=False, ), SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="South East", window_type="Window", @@ -424,7 +424,7 @@ class TestFromSiteNotesExample1: permanent_shutters_present=False, ), SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="North West", window_type="Window", @@ -437,7 +437,7 @@ class TestFromSiteNotesExample1: permanent_shutters_present=False, ), SapWindow( - pvc_frame="Wooden or PVC", + frame_material="Wooden or PVC", glazing_gap="16 mm or more", orientation="North West", window_type="Window",