Merge branch 'main' into feature/magicplan-api-client

This commit is contained in:
Daniel Roth 2026-05-01 13:34:00 +00:00
commit 9e7d8c004f
60 changed files with 4691 additions and 469 deletions

View file

@ -110,6 +110,8 @@ class Addresses:
landlord_multi_glaze_proportion=float(row["landlord_multi_glaze_proportion"]) if row.get(
"landlord_multi_glaze_proportion") else None,
landlord_construction_age_band=row.get("landlord_construction_age_band"),
lmk_key=None,
epc_certificate_number=None,
)
@staticmethod

View file

@ -18,8 +18,9 @@ class EpcPropertyModel(SQLModel, table=True):
__tablename__ = "epc_property"
id: Optional[int] = Field(default=None, primary_key=True)
property_id: int = Field(foreign_key="property.id", nullable=False)
portfolio_id: int = Field(foreign_key="portfolio.id", nullable=False)
property_id: Optional[int] = Field(default=None)
portfolio_id: Optional[int] = Field(default=None)
uploaded_file_id: Optional[int] = Field(default=None)
# Identity / admin
uprn: Optional[int] = Field(default=None)
@ -148,8 +149,8 @@ class EpcPropertyModel(SQLModel, table=True):
def from_epc_property_data(
cls,
data: EpcPropertyData,
property_id: int,
portfolio_id: int,
property_id: Optional[int] = None,
portfolio_id: Optional[int] = None,
) -> EpcPropertyModel:
es = data.sap_energy_source
h = data.sap_heating
@ -593,7 +594,7 @@ class EpcWindowModel(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
epc_property_id: int = Field(foreign_key="epc_property.id", nullable=False)
pvc_frame: str
frame_material: Optional[str] = Field(default=None)
glazing_gap: str
orientation: str
window_type: str
@ -607,7 +608,7 @@ class EpcWindowModel(SQLModel, table=True):
frame_factor: Optional[float] = Field(default=None)
permanent_shutters_insulated: Optional[str] = Field(default=None)
transmission_u_value: Optional[float] = Field(default=None)
transmission_data_source: Optional[int] = Field(default=None)
transmission_data_source: Optional[str] = Field(default=None)
transmission_solar_transmittance: Optional[float] = Field(default=None)
@classmethod
@ -615,7 +616,7 @@ class EpcWindowModel(SQLModel, table=True):
td = window.window_transmission_details
return cls(
epc_property_id=epc_property_id,
pvc_frame=str(window.pvc_frame),
frame_material=window.frame_material,
glazing_gap=str(window.glazing_gap),
orientation=str(window.orientation),
window_type=str(window.window_type),

View file

@ -67,6 +67,17 @@ class HubspotDealData(SQLModel, table=True):
surveyed_date: Optional[datetime] = Field(default=None)
design_type: Optional[str] = Field(default=None)
survey_type: Optional[str] = Field(default=None)
measures_for_pibi_ordered: Optional[str] = Field(default=None)
pibi_order_date: Optional[datetime] = Field(default=None)
pibi_completed_date: Optional[datetime] = Field(default=None)
property_halted_date: Optional[datetime] = Field(default=None)
property_halted_reason: Optional[str] = Field(default=None)
technical_approved_measures_for_install: Optional[str] = Field(default=None)
sent_to_installer_for_pricing: Optional[datetime] = Field(default=None)
domna_survey_required: Optional[bool] = Field(default=None)
domna_survey_date: Optional[datetime] = Field(default=None)
created_at: Optional[datetime] = Field(
sa_column=Column(
DateTime(timezone=True),

View file

@ -0,0 +1,13 @@
from sqlmodel import SQLModel, Field
from datetime import datetime
from typing import Optional
class HubspotUser(SQLModel, table=True):
__tablename__ = "hubspot_users"
hubspot_owner_id: str = Field(primary_key=True)
first_name: Optional[str] = Field(default=None)
last_name: Optional[str] = Field(default=None)
email: Optional[str] = Field(default=None)
updated_at: datetime

View file

@ -0,0 +1,76 @@
from typing import Optional
from sqlmodel import Session
from backend.app.db.models.epc_property import (
EpcBuildingPartModel,
EpcEnergyElementModel,
EpcFlatDetailsModel,
EpcFloorDimensionModel,
EpcMainHeatingDetailModel,
EpcPropertyEnergyPerformanceModel,
EpcPropertyModel,
EpcWindowModel,
)
from datatypes.epc.domain.epc_property_data import EpcPropertyData
def save_epc_property_data(
session: Session,
data: EpcPropertyData,
uploaded_file_id: Optional[int] = None,
property_id: Optional[int] = None,
portfolio_id: Optional[int] = None,
) -> EpcPropertyModel:
epc_prop = EpcPropertyModel.from_epc_property_data(
data, property_id=property_id, portfolio_id=portfolio_id
)
epc_prop.uploaded_file_id = uploaded_file_id
session.add(epc_prop)
session.flush()
assert epc_prop.id is not None
epc_property_id: int = epc_prop.id
session.add(
EpcPropertyEnergyPerformanceModel.from_epc_property_data(
data, epc_property_id=epc_property_id
)
)
for detail in data.sap_heating.main_heating_details:
session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id))
for part in data.sap_building_parts:
bp = EpcBuildingPartModel.from_domain(part, epc_property_id)
session.add(bp)
session.flush()
assert bp.id is not None
for dim in part.sap_floor_dimensions:
session.add(EpcFloorDimensionModel.from_domain(dim, bp.id))
for window in data.sap_windows:
session.add(EpcWindowModel.from_domain(window, epc_property_id))
for el in data.roofs:
session.add(EpcEnergyElementModel.from_domain(el, "roof", epc_property_id))
for el in data.walls:
session.add(EpcEnergyElementModel.from_domain(el, "wall", epc_property_id))
for el in data.floors:
session.add(EpcEnergyElementModel.from_domain(el, "floor", epc_property_id))
for el in data.main_heating:
session.add(EpcEnergyElementModel.from_domain(el, "main_heating", epc_property_id))
for el, etype in [
(data.window, "window"),
(data.lighting, "lighting"),
(data.hot_water, "hot_water"),
(data.secondary_heating, "secondary_heating"),
(data.main_heating_controls, "main_heating_controls"),
]:
if el is not None:
session.add(EpcEnergyElementModel.from_domain(el, etype, epc_property_id))
if data.sap_flat_details is not None:
session.add(EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id))
return epc_prop

View file

@ -0,0 +1,451 @@
import re
from datetime import date, datetime
from typing import List, Optional
from datatypes.epc.surveys.elmhurst_site_notes import (
BathsAndShowers,
BuildingPartDimensions,
ElmhurstSiteNotes,
FloorDetails,
FloorDimension,
Lighting,
MainHeating,
Meters,
PropertyDetails,
Renewables,
RoofDetails,
Shower,
SurveyorInfo,
VentilationAndCooling,
WallDetails,
WaterHeating,
Window,
)
class ElmhurstSiteNotesExtractor:
def __init__(self, pages: List[str]) -> None:
self._text = "\n".join(pages)
self._lines = [l.strip() for l in self._text.splitlines() if l.strip()]
# --- generic helpers ---
def _next_val(self, label: str) -> Optional[str]:
lc = label.rstrip(":") + ":"
lb = label.rstrip(":")
for i, line in enumerate(self._lines):
if line.startswith(lc) and len(line) > len(lc):
return line[len(lc):].strip() or None
if line == lc or line == lb:
for j in range(i + 1, min(i + 4, len(self._lines))):
v = self._lines[j]
if v.endswith(":") or v.startswith("©"):
return None
if v:
return v
return None
return None
def _str_val(self, label: str) -> str:
v = self._next_val(label)
return " ".join(v.split()) if v else ""
def _opt_str(self, label: str) -> Optional[str]:
v = self._next_val(label)
return " ".join(v.split()) if v else None
def _bool_val(self, label: str) -> bool:
v = self._next_val(label)
return v is not None and v.lower() == "yes"
def _int_val(self, label: str) -> int:
v = self._next_val(label)
try:
return int(v.split()[0]) if v else 0
except (ValueError, IndexError):
return 0
def _date_val(self, label: str) -> date:
v = self._next_val(label)
if not v:
raise ValueError(f"Missing date for label: {label}")
return datetime.strptime(v.strip(), "%d/%m/%Y").date()
def _between(self, start: str, end: str) -> str:
try:
s = self._text.index(start) + len(start)
e = self._text.index(end, s)
return self._text[s:e]
except ValueError:
return ""
def _section_lines(self, start: str, end: str) -> List[str]:
text = self._between(start, end)
return [l.strip() for l in text.splitlines() if l.strip()]
def _local_val(self, lines: List[str], label: str) -> Optional[str]:
lb = label.rstrip(":")
lc = lb + ":"
for i, line in enumerate(lines):
if line.startswith(lc) and len(line) > len(lc):
return line[len(lc):].strip() or None
if line == lc or line == lb:
for j in range(i + 1, min(i + 4, len(lines))):
v = lines[j]
if v.endswith(":") or v.startswith("©"):
return None
if v:
return v
return None
return None
def _local_str(self, lines: List[str], label: str) -> str:
v = self._local_val(lines, label)
return " ".join(v.split()) if v else ""
def _local_bool(self, lines: List[str], label: str) -> bool:
v = self._local_val(lines, label)
return v is not None and v.lower() == "yes"
# --- section extractors ---
def _extract_surveyor_info(self) -> SurveyorInfo:
return SurveyorInfo(
surveyor_code=self._str_val("Surveyor"),
name=self._str_val("Name"),
title=self._str_val("Title"),
tel_number=self._str_val("Tel Number"),
survey_reference=self._str_val("Survey Reference"),
my_reference=self._opt_str("My Reference"),
)
def _extract_property_details(self) -> PropertyDetails:
epc_m = re.search(
r"Check for the existence of\nan EPC:\n(Yes|No)", self._text
)
epc_exists = epc_m.group(1).lower() == "yes" if epc_m else False
return PropertyDetails(
rdsap_version=self._str_val("RdSAP version"),
reference_number=self._str_val("Reference Number"),
lodgement_required=self._bool_val("Lodgement Required"),
regs_region=self._str_val("Regs Region"),
epc_language=self._str_val("EPC Language"),
postcode=self._str_val("Postcode"),
region=self._str_val("Region"),
street=self._str_val("Street"),
town=self._str_val("Town"),
tenure=self._str_val("Property Tenure"),
transaction_type=self._str_val("Transaction Type"),
inspection_date=self._date_val("Inspection Date"),
process_date=self._date_val("Process date"),
epc_exists=epc_exists,
uprn=self._opt_str("UPRN"),
house_name=self._opt_str("House Name"),
house_number=self._opt_str("House No"),
locality=self._opt_str("Locality"),
county=self._opt_str("County"),
)
def _extract_attachment(self) -> str:
m = re.search(r"1\.0 Property type:\n[^\n]+\n([^\n]+)", self._text)
return " ".join(m.group(1).strip().split()) if m else ""
def _extract_dimensions(self) -> BuildingPartDimensions:
dim_type = self._str_val("Dimension type")
section = self._between("4.0 Dimensions:", "5.0 Conservatory:")
floor_matches = re.findall(
r"([A-Za-z ]+Floor):\n([\d.]+)\n([\d.]+)\n([\d.]+)\n([\d.]+)",
section,
)
floors = [
FloorDimension(
name=name.strip(),
area_m2=float(area),
room_height_m=float(height),
heat_loss_perimeter_m=float(hlp),
party_wall_length_m=float(pwl),
)
for name, area, height, hlp, pwl in floor_matches
]
return BuildingPartDimensions(dimension_type=dim_type, floors=floors)
def _extract_walls(self) -> WallDetails:
lines = self._section_lines("7.0 Walls:", "8.0 Roofs:")
thickness_raw = self._local_val(lines, "Wall Thickness")
thickness_mm = (
int(thickness_raw.split()[0]) if thickness_raw else None
)
return WallDetails(
wall_type=self._local_str(lines, "Type"),
insulation=self._local_str(lines, "Insulation"),
thickness_unknown=self._local_bool(lines, "Wall Thickness Unknown"),
u_value_known=self._local_bool(lines, "U-value Known"),
party_wall_type=self._local_str(lines, "Party Wall Type"),
thickness_mm=thickness_mm,
)
def _extract_roof(self) -> RoofDetails:
lines = self._section_lines("8.0 Roofs:", "8.1 Rooms in Roof:")
thickness_raw = self._local_val(lines, "Insulation Thickness")
thickness_mm = (
int(thickness_raw.split()[0]) if thickness_raw else None
)
return RoofDetails(
roof_type=self._local_str(lines, "Type"),
insulation=self._local_str(lines, "Insulation"),
u_value_known=self._local_bool(lines, "U-value Known"),
insulation_thickness_mm=thickness_mm,
)
def _extract_floor(self) -> FloorDetails:
lines = self._section_lines("9.0 Floors:", "10.0 Doors:")
u_val_raw = self._local_val(lines, "Default U-value")
default_u = float(u_val_raw) if u_val_raw else None
return FloorDetails(
location=self._local_str(lines, "Location"),
floor_type=self._local_str(lines, "Type"),
insulation=self._local_str(lines, "Insulation"),
u_value_known=self._local_bool(lines, "U-value Known"),
default_u_value=default_u,
)
def _extract_windows(self) -> List[Window]:
m = re.search(
r"Permanent\s+Shutters\n(.*?)Draught Proofing",
self._text,
re.DOTALL,
)
if not m:
return []
tokens = [t.strip() for t in m.group(1).splitlines() if t.strip()]
windows: List[Window] = []
i = 0
while i + 12 < len(tokens):
try:
width_m = float(tokens[i])
height_m = float(tokens[i + 1])
area_m2 = float(tokens[i + 2])
except (ValueError, IndexError):
i += 1
continue
i += 3
# Collect glazing type tokens until frame_factor (0 < v ≤ 1.0)
glazing_parts: List[str] = []
while i < len(tokens):
try:
v = float(tokens[i])
if 0.0 < v <= 1.0:
break
glazing_parts.append(tokens[i])
except ValueError:
glazing_parts.append(tokens[i])
i += 1
# If last glazing token is a single word (no spaces, not numeric) it's the frame_type
frame_type: Optional[str] = None
if glazing_parts and " " not in glazing_parts[-1] and not glazing_parts[-1].replace(".", "").isdigit():
frame_type = glazing_parts.pop()
glazing_type = " ".join(glazing_parts).strip()
if i >= len(tokens):
break
frame_factor = float(tokens[i]); i += 1
# Consume glazing_gap if present ("mm" token, possibly multi-token e.g. "16 mm or more")
glazing_gap: Optional[str] = None
if i < len(tokens) and "mm" in tokens[i]:
gap_parts = [tokens[i]]; i += 1
while i < len(tokens) and tokens[i].lower() in {"or", "more"}:
gap_parts.append(tokens[i]); i += 1
glazing_gap = " ".join(gap_parts)
building_part = tokens[i]; i += 1
location = tokens[i]; i += 1
orientation = tokens[i]; i += 1
data_source = tokens[i]; i += 1
u_value = float(tokens[i]); i += 1
g_value = float(tokens[i]); i += 1
draught_proofed = tokens[i].lower() == "yes"; i += 1
permanent_shutters = tokens[i]; i += 1
windows.append(
Window(
width_m=width_m,
height_m=height_m,
area_m2=area_m2,
glazing_type=glazing_type,
frame_factor=frame_factor,
building_part=building_part,
location=location,
orientation=orientation,
data_source=data_source,
u_value=u_value,
g_value=g_value,
draught_proofed=draught_proofed,
permanent_shutters=permanent_shutters,
frame_type=frame_type,
glazing_gap=glazing_gap,
)
)
return windows
def _extract_ventilation(self) -> VentilationAndCooling:
return VentilationAndCooling(
open_chimneys_count=self._int_val("No. of open chimneys"),
open_flues_count=self._int_val("No. of open flues"),
open_chimneys_closed_fire_count=self._int_val(
"No. of open chimneys/open flues attached to closed fire"
),
solid_fuel_boiler_flues_count=self._int_val(
"No. of flues attached to solid fuel boiler"
),
other_heater_flues_count=self._int_val(
"No. of open flues attached to other heater"
),
blocked_chimneys_count=self._int_val("No. of blocked chimneys"),
extract_fans_count=self._int_val("No. of intermittent extract fans"),
passive_vents_count=self._int_val("No. of passive vents"),
flueless_gas_fires_count=self._int_val("No. of flueless gas fires"),
fixed_space_cooling=self._bool_val("Fixed Space Cooling"),
draught_lobby=self._str_val("Draught Lobby"),
mechanical_ventilation=self._bool_val("Mechanical Ventilation"),
pressure_test_method=self._str_val("Test Method"),
)
def _extract_lighting(self) -> Lighting:
led_cfl_count_known = self._bool_val("Number of LED and CFL Known")
return Lighting(
total_bulbs=self._int_val("Total number of bulbs"),
led_cfl_count_known=led_cfl_count_known,
led_count=self._int_val("Number of LED lights"),
cfl_count=self._int_val("Number of CFL lights"),
incandescent_count=self._int_val("Total number of incandescents"),
low_energy_count=(
0 if led_cfl_count_known
else self._int_val("Total number of Low Energy")
),
)
def _extract_main_heating(self) -> MainHeating:
lines = self._section_lines("14.0 Main Heating1", "14.1 Main Heating2")
pct_raw = self._local_val(lines, "Percentage of Heat")
pct = int(pct_raw.split()[0]) if pct_raw else 0
return MainHeating(
heat_emitter=self._local_str(lines, "Heat Emitter"),
fuel_type=self._local_str(lines, "Fuel Type"),
flue_type=self._local_str(lines, "Flue Type"),
fan_assisted_flue=self._local_bool(lines, "Fan Assisted Flue"),
design_flow_temperature=self._local_str(lines, "Design flow temperature"),
heating_controls_ees=self._local_str(lines, "Main Heating Controls EES"),
heating_controls_sap=self._local_str(lines, "Main Heating Controls Sap"),
percentage_of_heat=pct,
pcdf_boiler_reference=self._local_val(lines, "PCDF boiler Reference"),
heat_pump_age=self._local_val(lines, "Heat pump age"),
)
def _extract_meters(self) -> Meters:
return Meters(
electricity_meter_type=self._str_val("Electricity meter type"),
main_gas=self._bool_val("Main gas"),
electricity_smart_meter=self._bool_val("Electricity Smart Meter Present"),
gas_smart_meter=self._bool_val("Gas Smart Meter Present"),
)
def _extract_water_heating(self) -> WaterHeating:
return WaterHeating(
water_heating_code=self._str_val("Water Heating Code"),
water_heating_sap_code=self._int_val("Water Heating SapCode"),
water_heating_fuel_type=self._str_val("Water Heating Fuel Type"),
hot_water_cylinder_present=self._bool_val("Hot Water Cylinder Present"),
)
def _extract_baths_and_showers(self) -> BathsAndShowers:
n_baths = self._int_val("Total Number of Baths")
n_connected = self._int_val("Number of Baths Connected")
try:
idx = self._lines.index("Connected")
except ValueError:
return BathsAndShowers(
number_of_baths=n_baths,
number_of_baths_connected=n_connected,
showers=[],
)
showers: List[Shower] = []
j = idx + 1
while j + 2 <= len(self._lines) - 1:
num_line = self._lines[j]
if not num_line.isdigit():
break
showers.append(
Shower(
shower_number=int(num_line),
outlet_type=self._lines[j + 1],
connected=self._lines[j + 2],
)
)
j += 3
return BathsAndShowers(
number_of_baths=n_baths,
number_of_baths_connected=n_connected,
showers=showers,
)
def _rating_val(self, label: str) -> int:
v = self._next_val(label)
try:
return int(v.split()[-1]) if v else 0
except (ValueError, IndexError):
return 0
def _extract_renewables(self) -> Renewables:
fghrs_lines = self._section_lines(
"18.0 Flue Gas Heat Recovery System", "19.0 Photovoltaic Panel"
)
fghrs = self._local_bool(fghrs_lines, "Present")
terrain = self._str_val("Terrain Type")
hydro_raw = self._next_val("Electricity generated [kWh/year]")
hydro = float(hydro_raw) if hydro_raw else 0.0
return Renewables(
solar_water_heating=self._bool_val("Solar Water Heating"),
wwhrs_present=self._bool_val("Is WWHRS present in the property?"),
flue_gas_heat_recovery_present=fghrs,
photovoltaic_panel=self._str_val("Photovoltaic Panel"),
export_capable_meter=self._bool_val("Export capable meter"),
wind_turbine_present=self._bool_val("Wind turbine present?"),
wind_turbines_terrain_type=terrain,
hydro_electricity_generated_kwh=hydro,
)
def extract(self) -> ElmhurstSiteNotes:
emissions_raw = self._next_val("Emissions (t/year)")
co2 = float(emissions_raw.split()[0]) if emissions_raw else 0.0
return ElmhurstSiteNotes(
surveyor_info=self._extract_surveyor_info(),
property_details=self._extract_property_details(),
current_sap_rating=self._rating_val("Current SAP rating"),
potential_sap_rating=self._rating_val("Potential SAP rating"),
current_ei_rating=self._rating_val("Current EI rating"),
potential_ei_rating=self._rating_val("Potential EI rating"),
co2_emissions_current_t=co2,
property_type=self._str_val("1.0 Property type"),
attachment=self._extract_attachment(),
number_of_storeys=self._int_val("Storeys"),
habitable_rooms=self._int_val("Habitable Rooms"),
heated_habitable_rooms=self._int_val("Heated Habitable Rooms"),
construction_age_band=self._str_val("Main Property"),
dimensions=self._extract_dimensions(),
has_conservatory=self._bool_val("Is there a conservatory?"),
walls=self._extract_walls(),
roof=self._extract_roof(),
floor=self._extract_floor(),
door_count=self._int_val("Total Number of Doors"),
insulated_door_count=self._int_val("Number of Insulated Doors"),
windows=self._extract_windows(),
draught_proofing_percent=self._int_val("Draught Proofing"),
ventilation=self._extract_ventilation(),
lighting=self._extract_lighting(),
main_heating=self._extract_main_heating(),
meters=self._extract_meters(),
water_heating=self._extract_water_heating(),
baths_and_showers=self._extract_baths_and_showers(),
renewables=self._extract_renewables(),
)

View file

@ -66,9 +66,11 @@ class PasHubRdSapSiteNotesExtractor:
val = self._get_in(list_to_process, key)
return val is not None and val.lower() != "not known"
def _wall_thickness_in(self, list_to_process: List[str]) -> int:
def _wall_thickness_in(self, list_to_process: List[str]) -> Optional[int]:
val = self._get_in(list_to_process, "Wall thickness:")
return int(val.split()[0]) if val else 0
if not val or val.split()[0].lower() == "unmeasurable":
return None
return int(val.split()[0])
def _section(self, start: str, end: str) -> List[str]:
try:
@ -83,10 +85,17 @@ class PasHubRdSapSiteNotesExtractor:
def extract_inspection_metadata(self) -> InspectionMetadata:
try:
addr_start = self.text_list.index("Property Address:") + 1
addr_end = self.text_list.index("Property Photo", addr_start)
property_address = ", ".join(
t.rstrip(",") for t in self.text_list[addr_start:addr_end]
)
try:
addr_end = self.text_list.index("Property Photo", addr_start)
address_tokens = self.text_list[addr_start:addr_end]
except ValueError:
addr_end = self.text_list.index("RdSAP Assessment", addr_start)
address_tokens = []
for t in self.text_list[addr_start:addr_end]:
if not t or t.startswith("Page "):
break
address_tokens.append(t)
property_address = ", ".join(t.rstrip(",") for t in address_tokens)
except ValueError:
property_address = ""

View file

@ -0,0 +1,131 @@
#!/usr/bin/env python3
"""
Parse a local site-notes PDF and load the result into the database.
Usage:
python local_runner.py <pdf_path>
"""
from typing import List, Optional, Tuple
from backend.app.db.connection import db_session
from backend.app.db.models.epc_property import (
EpcBuildingPartModel,
EpcEnergyElementModel,
EpcFlatDetailsModel,
EpcFloorDimensionModel,
EpcMainHeatingDetailModel,
EpcPropertyEnergyPerformanceModel,
EpcPropertyModel,
EpcWindowModel,
)
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
from backend.documents_parser.pdf import pdf_to_pages, pdf_to_text_list
from datatypes.epc.domain.epc_property_data import EnergyElement, EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
def _parse_pdf(pdf_path: str) -> EpcPropertyData:
with open(pdf_path, "rb") as f:
pdf_bytes: bytes = f.read()
pages: List[str] = pdf_to_pages(pdf_bytes)
full_text: str = "\n".join(pages)
if "Elmhurst Energy Systems" in full_text:
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
tokens: List[str] = pdf_to_text_list(pdf_bytes)
pashub_notes = PasHubRdSapSiteNotesExtractor(tokens).extract()
return EpcPropertyDataMapper.from_site_notes(pashub_notes)
def _insert_energy_elements(
session,
elements: List[EnergyElement],
element_type: str,
epc_property_id: int,
) -> None:
for el in elements:
session.add(
EpcEnergyElementModel.from_domain(el, element_type, epc_property_id)
)
def _insert_optional_energy_element(
session,
el: Optional[EnergyElement],
element_type: str,
epc_property_id: int,
) -> None:
if el is not None:
session.add(
EpcEnergyElementModel.from_domain(el, element_type, epc_property_id)
)
def run(pdf_path: str) -> None:
data: EpcPropertyData = _parse_pdf(pdf_path)
print("successfully mapped pdf")
with db_session() as session:
epc_prop: EpcPropertyModel = EpcPropertyModel.from_epc_property_data(data)
session.add(epc_prop)
session.flush()
assert epc_prop.id is not None
epc_property_id: int = epc_prop.id
session.add(
EpcPropertyEnergyPerformanceModel.from_epc_property_data(
data, epc_property_id=epc_property_id
)
)
for detail in data.sap_heating.main_heating_details:
session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id))
for part in data.sap_building_parts:
bp: EpcBuildingPartModel = EpcBuildingPartModel.from_domain(
part, epc_property_id
)
session.add(bp)
session.flush()
assert bp.id is not None
for dim in part.sap_floor_dimensions:
session.add(EpcFloorDimensionModel.from_domain(dim, bp.id))
for window in data.sap_windows:
session.add(EpcWindowModel.from_domain(window, epc_property_id))
list_elements: List[Tuple[List[EnergyElement], str]] = [
(data.roofs, "roof"),
(data.walls, "wall"),
(data.floors, "floor"),
(data.main_heating, "main_heating"),
]
for elements, etype in list_elements:
_insert_energy_elements(session, elements, etype, epc_property_id)
optional_elements: List[Tuple[Optional[EnergyElement], str]] = [
(data.window, "window"),
(data.lighting, "lighting"),
(data.hot_water, "hot_water"),
(data.secondary_heating, "secondary_heating"),
(data.main_heating_controls, "main_heating_controls"),
]
for el, etype in optional_elements:
_insert_optional_energy_element(session, el, etype, epc_property_id)
if data.sap_flat_details is not None:
session.add(
EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id)
)
print(f"epc_property_id={epc_property_id}")
print(f"address: {data.address_line_1}, {data.post_town}, {data.postcode}")
if __name__ == "__main__":
# run("backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf")
run("backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf")

View file

@ -0,0 +1,28 @@
from typing import List
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
from backend.documents_parser.pdf import pdf_to_pages, pdf_to_text_list
def parse_site_notes_pdf(file_path: str) -> EpcPropertyData:
with open(file_path, "rb") as f:
pdf_bytes = f.read()
pages = pdf_to_pages(pdf_bytes)
if "Elmhurst Energy Systems" in "\n".join(pages):
return _parse_elmhurst(pages)
return _parse_pashub(pdf_bytes)
def _parse_elmhurst(pages: List[str]) -> EpcPropertyData:
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
def _parse_pashub(pdf_bytes: bytes) -> EpcPropertyData:
tokens = pdf_to_text_list(pdf_bytes)
site_notes = PasHubRdSapSiteNotesExtractor(tokens).extract()
return EpcPropertyDataMapper.from_site_notes(site_notes)

View file

@ -10,3 +10,8 @@ def pdf_to_text_list(pdf_bytes: bytes) -> List[str]:
for line in page.get_text().split("\n"):
tokens.append(line)
return tokens
def pdf_to_pages(pdf_bytes: bytes) -> List[str]:
with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc:
return [page.get_text() for page in doc]

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,6 @@
[
"Summary Information\nSurveyor:\nP960-0001\nName:\nRichard Matthew Ratcliff\nTitle: Mr.\nTel Number: 07760 443 469\nSurvey Reference:\n001573\nMy Reference:\nCurrent SAP rating:\nC 69\nPotential SAP rating: C 77\nEmissions (t/year):\n1.683 tonnes\nCurrent EI rating:\nC 76\nPotential EI rating:\nB 81\nFuel Bill:\n\u00a3896\nProperty Details:\nRdSAP version:\nRdSAP10\nReference Number:\nP960-0001-001573\nMy Reference:\nLodgement Required:\nNo\nRegs Region:\nEngland\nEPC Language:\nEnglish\nUPRN:\nPostcode:\nBB10 1XX\nRegion:\nWest Pennines\nHouse Name:\nHouse No:\n19\nStreet:\nQueens Road\nLocality:\nTown:\nBURNLEY\nCounty:\nProperty Tenure:\nRented (social)\nTransaction Type:\nGrant scheme\nInspection Date:\n06/03/2026\nProcess date:\n06/03/2026\nCheck for the existence of\nan EPC:\nNo\nDoes an EPC exist at the\npoint of carrying out this\nenergy assessment:\nNo\nReason why another energy\nassessment needs to be\nundertaken:\nRdSAP Inputs\nProperty Description:\n1.0 Property type:\nB Bungalow\nE End-Terrace\n2.0 Number of\nStoreys:\n1\nHabitable Rooms:\n2\nHeated Habitable Rooms:\n2\n3.0 Date Built:\nMain Property\nD 1950-1966\n4.0 Dimensions:\nDimension type:\nInternal\nMain Property\nFloor\nArea\n[m2]\nRoom\nHeight\n[m]\nHeat Loss\nWall Perimeter\n[m]\nParty Wall\nLength\n[m]\nLowest Floor:\n44.89\n2.24\n20.10\n6.70\nNo\n5.0 Conservatory:\nIs there a conservatory?\nNo\n7.0 Walls:\nMain Property\nType\nCA Cavity\nInsulation\nF Filled Cavity\nWall Thickness Unknown\nNo\nWall Thickness\n300 mm\nU-value Known\nNo\nParty Wall Type\nU Unable to determine\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\n8.0 Roofs:\nMain Property\nType\nPA Pitched (slates/tiles), access to loft\nInsulation\nJ Joists\nInsulation Thickness\n270 mm\nU-value Known\nNo\n8.1 Rooms in Roof:\n9.0 Floors:\nMain Property\nLocation\nG Ground floor\nType\nN Suspended, not timber\nInsulation\nA As built\nDefault U-value\n0.69\nU-value Known\nNo\n10.0 Doors:\nTotal Number of Doors\n0\nNumber of Insulated Doors\n0\n11.0 Windows:\nW\nH\nArea Glazing Type\nFrame \nType\nFrame \nFactor\nGlazing \nGap\nBuilding \nPart\nLocation\nOrient. Data-Source\nU \nvalue\ng \nvalue\nDraught \nProofed\nPermanent \nShutters\n1.30\n1.10\n1.43\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nNorth\nManufacturer\n1.40\n0.72\nYes\nNone\n1.80\n1.00\n1.80\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nNorth\nManufacturer\n1.40\n0.72\nYes\nNone\n0.70\n0.80\n0.56\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nSouth\nManufacturer\n1.40\n0.72\nYes\nNone\n0.70\n1.30\n0.91\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nSouth\nManufacturer\n1.40\n0.72\nYes\nNone\nDraught Proofing\n100 %\n12.0 Ventilation & Cooling\nNo. of open chimneys\n0\nNo. of open flues\n0\nNo. of open chimneys/open flues attached to closed fire\n0\nNo. of flues attached to solid fuel boiler\n0\nNo. of open flues attached to other heater\n0\nNo. of blocked chimneys\n0\nNo. of intermittent extract fans\n2\nNo. of passive vents\n0\nNo. of flueless gas fires\n0\nFixed Space Cooling\nNo\nDraught Lobby\nNot present\n12.1 Mechanical Ventilation\nMechanical Ventilation\nNo\n12.2 Air Pressure Test\nTest Method\nNot available\n13.0 Lighting\nTotal number of bulbs\n8\nNumber of LED and CFL Known\nYes\nNumber of LED lights\n4\nNumber of CFL lights\n4\nTotal number of Low Energy\n8\nTotal number of incandescents\n0\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\n14.0 Main Heating1\nPCDF boiler Reference\n17742 Potterton, Promax 33 Combi ErP, 88.30%\nHeat Emitter\nRadiators\nHeat pump age\nUnknown\nFuel Type\nMains gas\nFlue Type\nBalanced\nFan Assisted Flue\nYes\nDesign flow temperature\nUnknown\nPCDF Heating Controls\n0 \nMain Heating Controls EES\nCBE\nMain Heating Controls Sap\nSAP code 2106, Programmer, room thermostat and TRVs\nPCDF Compensator\n0 \nPercentage of Heat\n100 %\n14.1 Main Heating2\nPCDF boiler Reference\n0 \nMain Heating EES Code\nMain Heating SAP Code\n0\nPercentage of Heat\n0 %\n14.1 Community Heating/Heat Network\nHeating Type\nNone\n14.2 Meters\nElectricity meter type\nSingle\nMain gas\nYes\nElectricity Smart Meter Present\nNo\nGas Smart Meter Present\nNo\n15.0 Water Heating\nWater Heating Code\nHWP\nWater Heating SapCode\n901\nWater Heating Fuel Type\nMains gas\n15.1 Hot Water Cylinder\nHot Water Cylinder Present\nNo\n15.2 Community Hot Water\nPCDF boiler Reference\n0\n16.0 Solar water heating\nSolar Water Heating\nNo\n17.0 Waste Water Heat Recovery System\nIs WWHRS present in the property?\nNo / Unknown\n1x.0 Baths and Showers\nTotal Number of Baths\n0\nNumber of Baths Connected\n0\nDescription\nType\nConnected\n1\nElectric shower\nNone\n18.0 Flue Gas Heat Recovery System\nPresent\nNo\n19.0 Photovoltaic Panel\nPhotovoltaic Panel\nNone\nExport capable meter\nNo\n20.0 Wind Turbine\nTerrain Type\nSuburban\nWind turbine present?\nNo\n22.0 Special Features\n21.0 Small-Scale Hydro\nElectricity generated [kWh/year]\n0.00\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\nRecommendations\nLoft insulation (Already installed)\nFlat roof insulation (Not applicable)\nRoom-in-roof insulation (Not applicable)\nCavity wall insulation (Already installed)\nSolid wall insulation (Not applicable)\nFloor insulation (suspended floor) (Recommended)\nHot water cylinder insulation (Not applicable)\nDraught proofing (Already installed)\nLow energy lighting (Already installed)\nCylinder thermostat (Not applicable)\nHeating controls for wet central heating system (Already installed)\nUpgrade boiler, same fuel (Already installed)\nChange heating to condensing gas condensing boiler (fuel switch) (Not applicable)\nFlue gas heat recovery in conjunction with new boiler (Not applicable)\nSolar water heating (SAP increase too small)\nHeat recovery system for mixer showers (Not applicable)\nDouble glazed windows (Already installed)\nInsulated doors (Already installed)\nSolar photovoltaic panels (Recommended)\nWind turbine (Not applicable)\nPV diverter (Not applicable)\nPV battery (Not applicable)\nWater heating controls (Not applicable)\nAlternative Recommendations\nExternal wall insulation with cavity insulation (Not applicable)\nBiomass boiler (alternative) (Not applicable)\nMicro CHP (alternative) (Not applicable)\nRelated Party Disclosure\nAddenda\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n"
]

View file

@ -0,0 +1,6 @@
[
"Summary Information\nSurveyor:\nBW22-0001\nName:\nIan Marsh\nTitle:\nTel Number: 07709266472\nSurvey Reference:\n001233\nMy Reference:\nCurrent SAP rating:\nD 68\nPotential SAP rating: A 92\nEmissions (t/year):\n2.812 tonnes\nCurrent EI rating:\nD 68\nPotential EI rating:\nC 76\nFuel Bill:\n\u00a31098\nProperty Details:\nRdSAP version:\nRdSAP10\nReference Number:\nBW22-0001-001233\nMy Reference:\nLodgement Required:\nNo\nRegs Region:\nEngland\nEPC Language:\nEnglish\nUPRN:\nPostcode:\nBB11 2NU\nRegion:\nWest Pennines\nHouse Name:\nHouse No:\n39\nStreet:\nConstable Avenue\nLocality:\nTown:\nBURNLEY\nCounty:\nProperty Tenure:\nRented (social)\nTransaction Type:\nGrant scheme\nInspection Date:\n06/03/2026\nProcess date:\n06/03/2026\nCheck for the existence of\nan EPC:\nNo\nDoes an EPC exist at the\npoint of carrying out this\nenergy assessment:\nNo\nReason why another energy\nassessment needs to be\nundertaken:\nRdSAP Inputs\nProperty Description:\n1.0 Property type:\nH House\nS Semi-Detached\n2.0 Number of\nStoreys:\n2\nHabitable Rooms:\n4\nHeated Habitable Rooms:\n4\n3.0 Date Built:\nMain Property\nD 1950-1966\n4.0 Dimensions:\nDimension type:\nInternal\nMain Property\nFloor\nArea\n[m2]\nRoom\nHeight\n[m]\nHeat Loss\nWall Perimeter\n[m]\nParty Wall\nLength\n[m]\n1st Floor:\n35.88\n2.51\n17.46\n6.62\nLowest Floor:\n35.88\n2.67\n17.46\n6.62\nNo\n5.0 Conservatory:\nIs there a conservatory?\nNo\n7.0 Walls:\nMain Property\nType\nCA Cavity\nInsulation\nF Filled Cavity\nWall Thickness Unknown\nNo\nWall Thickness\n300 mm\nU-value Known\nNo\nParty Wall Type\nCU Cavity masonry unfilled\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\n8.0 Roofs:\nMain Property\nType\nPA Pitched (slates/tiles), access to loft\nInsulation\nJ Joists\nInsulation Thickness\n200 mm\nU-value Known\nNo\n8.1 Rooms in Roof:\n9.0 Floors:\nMain Property\nLocation\nG Ground floor\nType\nT Suspended timber\nInsulation\nA As built\nDefault U-value\n0.72\nU-value Known\nNo\n10.0 Doors:\nTotal Number of Doors\n2\nNumber of Insulated Doors\n0\n11.0 Windows:\nW\nH\nArea Glazing Type\nFrame \nType\nFrame \nFactor\nGlazing \nGap\nBuilding \nPart\nLocation\nOrient. Data-Source\nU \nvalue\ng \nvalue\nDraught \nProofed\nPermanent \nShutters\n1.59\n1.36\n2.16\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nEast\nManufacturer\n2.70\n0.76\nYes\nNone\n1.27\n0.43\n0.55\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nEast\nManufacturer\n2.70\n0.76\nYes\nNone\n1.54\n1.06\n1.63\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nEast\nManufacturer\n2.70\n0.76\nYes\nNone\n0.61\n1.07\n0.65\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nSouth\nManufacturer\n2.70\n0.76\nYes\nNone\n1.07\n1.05\n1.12\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\n1.07\n1.08\n1.16\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\n1.10\n1.06\n1.17\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\n1.12\n1.06\n1.19\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\nDraught Proofing\n90 %\n12.0 Ventilation & Cooling\nNo. of open chimneys\n0\nNo. of open flues\n0\nNo. of open chimneys/open flues attached to closed fire\n0\nNo. of flues attached to solid fuel boiler\n0\nNo. of open flues attached to other heater\n0\nNo. of blocked chimneys\n0\nNo. of intermittent extract fans\n2\nNo. of passive vents\n2\nNo. of flueless gas fires\n0\nFixed Space Cooling\nNo\nDraught Lobby\nNot present\n12.1 Mechanical Ventilation\nMechanical Ventilation\nNo\n12.2 Air Pressure Test\nTest Method\nNot available\n13.0 Lighting\nTotal number of bulbs\n10\nNumber of LED and CFL Known\nNo\nTotal number of Low Energy\n5\nTotal number of incandescents\n5\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\n14.0 Main Heating1\nPCDF boiler Reference\n18737 Baxi, ASSURE, 88.40%\nHeat Emitter\nRadiators\nHeat pump age\nUnknown\nFuel Type\nMains gas\nFlue Type\nBalanced\nFan Assisted Flue\nYes\nDesign flow temperature\nUnknown\nPCDF Heating Controls\n0 \nMain Heating Controls EES\nCBE\nMain Heating Controls Sap\nSAP code 2106, Programmer, room thermostat and TRVs\nPCDF Compensator\n0 \nPercentage of Heat\n100 %\n14.1 Main Heating2\nPCDF boiler Reference\n0 \nMain Heating EES Code\nMain Heating SAP Code\n0\nPercentage of Heat\n0 %\n14.1 Community Heating/Heat Network\nHeating Type\nNone\n14.2 Meters\nElectricity meter type\nSingle\nMain gas\nYes\nElectricity Smart Meter Present\nNo\nGas Smart Meter Present\nNo\n15.0 Water Heating\nWater Heating Code\nHWP\nWater Heating SapCode\n901\nWater Heating Fuel Type\nMains gas\n15.1 Hot Water Cylinder\nHot Water Cylinder Present\nNo\n15.2 Community Hot Water\nPCDF boiler Reference\n0\n16.0 Solar water heating\nSolar Water Heating\nNo\n17.0 Waste Water Heat Recovery System\nIs WWHRS present in the property?\nNo / Unknown\n1x.0 Baths and Showers\nTotal Number of Baths\n1\nNumber of Baths Connected\n0\nDescription\nType\nConnected\n1\nNon-electric shower\nNone\n18.0 Flue Gas Heat Recovery System\nPresent\nNo\n19.0 Photovoltaic Panel\nPhotovoltaic Panel\nNone\nExport capable meter\nNo\n20.0 Wind Turbine\nTerrain Type\nRural\nWind turbine present?\nNo\n22.0 Special Features\n21.0 Small-Scale Hydro\nElectricity generated [kWh/year]\n0.00\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\nRecommendations\nLoft insulation (Already installed)\nFlat roof insulation (Not applicable)\nRoom-in-roof insulation (Not applicable)\nCavity wall insulation (Already installed)\nSolid wall insulation (Not applicable)\nFloor insulation (suspended floor) (Recommended)\nHot water cylinder insulation (Not applicable)\nDraught proofing (SAP increase too small)\nLow energy lighting (Recommended)\nCylinder thermostat (Not applicable)\nHeating controls for wet central heating system (Already installed)\nUpgrade boiler, same fuel (Already installed)\nChange heating to condensing gas condensing boiler (fuel switch) (Not applicable)\nFlue gas heat recovery in conjunction with new boiler (Not applicable)\nSolar water heating (SAP increase too small)\nHeat recovery system for mixer showers (SAP increase too small)\nDouble glazed windows (Already installed)\nInsulated doors (SAP increase too small)\nSolar photovoltaic panels (Recommended)\nWind turbine (Recommended)\nPV diverter (Not applicable)\nPV battery (Not applicable)\nWater heating controls (Not applicable)\nAlternative Recommendations\nExternal wall insulation with cavity insulation (Not applicable)\nBiomass boiler (alternative) (Not applicable)\nMicro CHP (alternative) (Not applicable)\nRelated Party Disclosure\nAddenda\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n"
]

View file

@ -0,0 +1,670 @@
[
"SMART EPC: Record of",
"Inspection & Site Notes",
"Inspection Surveyor:",
"Dave Elliott",
"E-Mail Address:",
"davejohns36@icloud.com",
"Report Reference:",
"Not Applicable",
"Created On:",
"12 September 2025",
"Date of Inspection:",
"08 September 2025",
"Property Address:",
"Flat 3,",
"29 Watcombe Circus,",
"NOTTINGHAM,",
"NG5 2DU",
"Page 1",
"",
"Photo of electricity meter:",
"Single Smart Meter",
"RdSAP Assessment",
"General",
"Confirm you have checked for the existence of an",
"EPC before carrying out another energy assessment.",
"Yes",
"Does an EPC exist at the point of carrying out this",
"energy assessment?",
"No",
"Inspection Date:",
"08/09/2025",
"Transaction Type:",
"None of the Above",
"Tenure:",
"Rented Social",
"Type of Property:",
"Maisonette",
"Detachment Type:",
"Semi-Detached",
"Flat Type:",
"Mid-floor",
"Flat Location:",
"3",
"Corridor Type:",
"Unheated Corridor",
"Unheated corridor wall length:",
"6.59 m",
"Number of storeys:",
"2 Storeys",
"Terrain Type:",
"Suburban",
"Number of Extensions:",
"2 Extensions",
"Is an electricity smart meter present?",
"Yes",
"Electric meter type:",
"Single",
"Is the dwelling export-capable?",
"No",
"Is mains gas available?",
"Yes",
"Is there a gas smart meter?",
"No",
"Is the gas meter accessible?",
"Yes",
"Page 2",
"",
"Photo of Gas Meter:",
"Gas Meter",
"External indicators of Solid Brick construction:",
"Brick Pattern",
"Select Measurements Location:",
"Internal",
"Building Construction",
"Main Building",
"Age Range:",
"1900-1929",
"Record indicators of property age:",
"Property checker",
"Walls - Construction Type:",
"Solid brick",
"Record external indicators of Solid Brick",
"Construction:",
"consistent with build age",
"Walls - Insulation Type:",
"As built",
"Thermal conductivity of wall insulation:",
"Unknown",
"Wall U-Value known?",
"Not Known",
"Wall thickness:",
"280 mm",
"Page 3",
"",
"Photo wall thickness:",
"Wall Measurements",
"Wall Dry-Lined?",
"No",
"Party wall construction type:",
"Solid Masonry, Timber Frame, or System Built",
"Floor type:",
"Other dwelling below",
"Extension 1",
"Age Range:",
"1900-1929",
"Record indicators of property age:",
"Property checker",
"Walls - Construction Type:",
"Solid brick",
"Record external indicators of Solid Brick",
"Construction:",
"headers and stretchers in brick bond",
"Walls - Insulation Type:",
"As built",
"Thermal conductivity of wall insulation:",
"Unknown",
"Wall U-Value known?",
"Not Known",
"Wall thickness:",
"280 mm",
"Wall Dry-Lined?",
"Yes",
"Party wall construction type:",
"Solid Masonry, Timber Frame, or System Built",
"Floor type:",
"Other dwelling below",
"Extension 2",
"Age Range:",
"1900-1929",
"Record indicators of property age:",
"Property checker",
"Walls - Construction Type:",
"Solid brick",
"Record external indicators of Solid Brick",
"Construction:",
"headers and stretchers in brick bond",
"Walls - Insulation Type:",
"As built",
"Thermal conductivity of wall insulation:",
"Unknown",
"Page 4",
"",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Wall U-Value known?",
"Not Known",
"Wall thickness:",
"280 mm",
"Wall Dry-Lined?",
"Yes",
"Party wall construction type:",
"Solid Masonry, Timber Frame, or System Built",
"Floor type:",
"Other dwelling below",
"Building Measurements",
"Area (m2)",
"Height (m)",
"Heat Loss Perimeter (m)",
"PWL (m)",
"Main Building",
"Floor 1",
"39.5",
"3.58",
"11.02",
"15.21",
"Floor 0",
"23.06",
"2.87",
"11.72",
"10.8",
"Extension 1",
"Floor 1",
"3.43",
"3.58",
"4.97",
"1",
"Floor 0",
"3.43",
"2.87",
"4.97",
"1",
"Extension 2",
"Floor 0",
"1.81",
"3.58",
"4.96",
"1",
"Roof Space",
"Main Building",
"Roofs - Construction Type:",
"Pitched roof (Slates or tiles), Access to loft",
"Roofs - Insulation At:",
"Joists",
"Roof U-Value:",
"Not Known",
"Roofs - Insulation Thickness:",
"225 mm",
"Page 5",
"",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Page 6",
"",
"Loft insulation:",
"Loft",
"Indicators of Solid Brick Wall Construction in roof space:",
"solid wall construction visible to gables",
"Record indicators of Solid Brick Wall Construction in",
"roof space:",
"solid wall construction visible to gables",
"Extension 1",
"Roofs - Construction Type:",
"Flat",
"Roofs - Insulation At:",
"Unknown",
"Record indicators of Solid Brick Wall Construction in",
"roof space:",
"solid wall construction visible at eaves",
"Extension 2",
"Roofs - Construction Type:",
"Flat",
"Roofs - Insulation At:",
"Unknown",
"Record indicators of Solid Brick Wall Construction in",
"roof space:",
"Couldn\u2019t enter",
"Page 7",
"",
"Alternative Wall",
"Main Building",
"Alternative Wall 1",
"Construction type:",
"Solid brick",
"Record external indicators of Solid Brick",
"Construction:",
"consistent with building age, no visible cavity trays",
"Insulation Type:",
"As Built",
"Sheltered wall?",
"Yes",
"Thermal conductivity of wall insulation:",
"Unknown",
"Wall thickness:",
"280 mm",
"Wall Dry-Lined?",
"Yes",
"Windows",
"Window 1",
"Window location:",
"Main Building",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.2 m",
"Window width:",
"0.8 m",
"Orientation:",
"South West",
"Window 2",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.65 m",
"Window width:",
"0.52 m",
"Orientation:",
"East",
"Page 8",
"",
"Window 3",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.95 m",
"Window width:",
"0.86 m",
"Orientation:",
"East",
"Window 4",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.76 m",
"Window width:",
"0.65 m",
"Orientation:",
"North",
"Window 5",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.68 m",
"Window width:",
"0.68 m",
"Orientation:",
"East",
"Page 9",
"",
"Window 6",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.84 m",
"Window width:",
"1.18 m",
"Orientation:",
"North East",
"Window 7",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.76 m",
"Window width:",
"0.65 m",
"Orientation:",
"North",
"Window 8",
"Window location:",
"Extension 2",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.82 m",
"Window width:",
"0.84 m",
"Orientation:",
"South East",
"Page 10",
"",
"Window 9",
"Window location:",
"Extension 2",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.65 m",
"Window width:",
"0.5 m",
"Orientation:",
"South",
"Window 10",
"Window location:",
"Extension 2",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.71 m",
"Window width:",
"0.47 m",
"Orientation:",
"East",
"Window 11",
"Window location:",
"Extension 2",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.2 m",
"Window width:",
"0.8 m",
"Orientation:",
"South West",
"Page 11",
"",
"Heating & Hot Water",
"Main Heating Systems",
"Main Heating 1",
"How would you like to select the Heating System?",
"PCDF Search",
"System type:",
"Boiler with radiators or underfloor heating",
"Product Id",
"15030",
"Manufacturer",
"Baxi",
"Model",
"Duo-tec Combi",
"Orig Manuf",
"Baxi Heating",
"Fuel",
"Mains gas",
"S. Efficiency",
"0",
"Type",
"Combi",
"Condensing",
"Yes",
"Year",
"2006 - 2008",
"Mount",
"Wall",
"Open Flue",
"Room-sealed",
"Fan Assist",
"Yes",
"Status",
"Normal status for an actual product",
"Central heating pump age:",
"Unknown",
"Controls:",
"Programmer, room thermostat and TRVs",
"Does the boiler have a Flue Gas Heat Recover",
"System (FGHRS)?",
"No",
"Is there a weather compensator?",
"No",
"Emitter:",
"Radiators",
"Emitter Temperature:",
"Unknown",
"Secondary Heating System",
"Secondary Fuel",
"No Secondary Heating",
"Water Heating & Cylinder",
"Water Heating Type:",
"Regular",
"Water Heating System:",
"From main heating 1",
"Cylinder Size:",
"No Cylinder",
"Ventilation",
"Ventilation type:",
"Natural",
"Has fixed air conditioning?",
"No",
"Number of open flues:",
"0",
"Number of closed flues:",
"0",
"Number of boiler flues:",
"1",
"Page 12",
"",
"Number of other flues:",
"0",
"Number of extract fans:",
"2",
"Number of passive vents:",
"1",
"Number of flueless gas fires:",
"0",
"Pressure test:",
"No test",
"Is there a draught lobby?",
"Yes",
"Conservatories",
"Is there conservatory?",
"No conservatory",
"Renewables",
"Wind Turbines",
"Has wind turbines?",
"No",
"Solar hot water",
"Has solar hot water?",
"No",
"Photovoltaics",
"Has photovoltaic array?",
"No",
"Number of PV batteries:",
"None",
"Hydro",
"Is the dwelling connected to Hydro?",
"No",
"Room Count Elements",
"Number of habitable rooms?",
"3",
"Are any of these rooms unheated?",
"No",
"Number of external doors?",
"1",
"Number of insulated external doors?",
"0",
"Number of draughtproofed external doors?",
"1",
"Number of open chimneys?",
"0",
"Number of blocked chimneys?",
"0",
"Number of fixed incandescent bulbs:",
"7",
"Is the exact number of LED and CFL bulbs known?",
"Yes",
"Number of fixed LED bulbs:",
"7",
"Number of fixed CFL bulbs:",
"0",
"Are there any waste water heat recovery systems?",
"None",
"Number of baths:",
"1",
"How many special features are there at the",
"property?",
"0",
"Customer Response",
"Customer present?",
"Yes",
"Page 13",
"",
"Customer willing to answer satisfaction survey?",
"No",
"Addendum + Related Party Disclosure",
"Addendum",
"None",
"Related party disclosure",
"No related party",
"Photographs Required",
"Page 14",
""
]

View file

@ -0,0 +1,356 @@
import json
import os
from datetime import date
import pytest
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
FIXTURE_PATH = os.path.join(
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json"
)
FIXTURE_PATH_2 = os.path.join(
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_2_text.json"
)
@pytest.fixture(scope="module")
def result() -> EpcPropertyData:
with open(FIXTURE_PATH) as f:
pages = json.load(f)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
@pytest.fixture(scope="module")
def result2() -> EpcPropertyData:
with open(FIXTURE_PATH_2) as f:
pages = json.load(f)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
class TestAddress:
def test_address_line_1(self, result: EpcPropertyData) -> None:
assert result.address_line_1 == "19, Queens Road"
def test_post_town(self, result: EpcPropertyData) -> None:
assert result.post_town == "BURNLEY"
def test_postcode(self, result: EpcPropertyData) -> None:
assert result.postcode == "BB10 1XX"
class TestInspectionInfo:
def test_inspection_date(self, result: EpcPropertyData) -> None:
assert result.inspection_date == date(2026, 3, 6)
def test_tenure(self, result: EpcPropertyData) -> None:
assert result.tenure == "Rented (social)"
def test_transaction_type(self, result: EpcPropertyData) -> None:
assert result.transaction_type == "Grant scheme"
def test_report_reference(self, result: EpcPropertyData) -> None:
assert result.report_reference == "P960-0001-001573"
class TestPropertyDescription:
def test_property_type(self, result: EpcPropertyData) -> None:
assert result.property_type == "Bungalow"
def test_built_form(self, result: EpcPropertyData) -> None:
assert result.built_form == "End-Terrace"
def test_dwelling_type(self, result: EpcPropertyData) -> None:
assert result.dwelling_type == "End-Terrace bungalow"
def test_number_of_storeys(self, result: EpcPropertyData) -> None:
assert result.number_of_storeys == 1
def test_has_conservatory(self, result: EpcPropertyData) -> None:
assert result.has_conservatory is False
def test_total_floor_area(self, result: EpcPropertyData) -> None:
assert result.total_floor_area_m2 == 44.89
class TestCounts:
def test_habitable_rooms_count(self, result: EpcPropertyData) -> None:
assert result.habitable_rooms_count == 2
def test_heated_rooms_count(self, result: EpcPropertyData) -> None:
assert result.heated_rooms_count == 2
def test_door_count(self, result: EpcPropertyData) -> None:
assert result.door_count == 0
def test_insulated_door_count(self, result: EpcPropertyData) -> None:
assert result.insulated_door_count == 0
def test_open_chimneys_count(self, result: EpcPropertyData) -> None:
assert result.open_chimneys_count == 0
def test_blocked_chimneys_count(self, result: EpcPropertyData) -> None:
assert result.blocked_chimneys_count == 0
class TestLighting:
def test_led_count(self, result: EpcPropertyData) -> None:
assert result.led_fixed_lighting_bulbs_count == 4
def test_cfl_count(self, result: EpcPropertyData) -> None:
assert result.cfl_fixed_lighting_bulbs_count == 4
def test_incandescent_count(self, result: EpcPropertyData) -> None:
assert result.incandescent_fixed_lighting_bulbs_count == 0
class TestFlags:
def test_solar_water_heating(self, result: EpcPropertyData) -> None:
assert result.solar_water_heating is False
def test_has_hot_water_cylinder(self, result: EpcPropertyData) -> None:
assert result.has_hot_water_cylinder is False
def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
assert result.has_fixed_air_conditioning is False
def test_hydro(self, result: EpcPropertyData) -> None:
assert result.hydro is False
def test_photovoltaic_array(self, result: EpcPropertyData) -> None:
assert result.photovoltaic_array is False
class TestBuildingPart:
def test_single_building_part(self, result: EpcPropertyData) -> None:
assert len(result.sap_building_parts) == 1
def test_identifier(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].identifier == "main"
def test_construction_age_band(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].construction_age_band == "1950-1966"
def test_wall_construction(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_construction == "Cavity"
def test_wall_insulation_type(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_insulation_type == "Filled Cavity"
def test_wall_thickness_measured(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_thickness_measured is True
def test_wall_thickness_mm(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_thickness_mm == 300
def test_roof_insulation_location(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].roof_insulation_location == "Joists"
def test_roof_insulation_thickness(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].roof_insulation_thickness == 270
def test_floor_type(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].floor_type == "Ground floor"
def test_floor_construction_type(self, result: EpcPropertyData) -> None:
assert (
result.sap_building_parts[0].floor_construction_type
== "Suspended, not timber"
)
def test_floor_insulation_type_str(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].floor_insulation_type_str == "As built"
def test_floor_u_value_known(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].floor_u_value_known is False
def test_single_floor_dimension(self, result: EpcPropertyData) -> None:
assert len(result.sap_building_parts[0].sap_floor_dimensions) == 1
def test_floor_dimension_area(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].sap_floor_dimensions[0].total_floor_area_m2 == 44.89
def test_floor_dimension_room_height(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].sap_floor_dimensions[0].room_height_m == 2.24
def test_floor_dimension_heat_loss_perimeter(self, result: EpcPropertyData) -> None:
assert (
result.sap_building_parts[0].sap_floor_dimensions[0].heat_loss_perimeter_m
== 20.10
)
def test_floor_dimension_party_wall_length(self, result: EpcPropertyData) -> None:
assert (
result.sap_building_parts[0].sap_floor_dimensions[0].party_wall_length_m
== 6.70
)
class TestWindows:
def test_window_count(self, result: EpcPropertyData) -> None:
assert len(result.sap_windows) == 4
def test_first_window_width(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_width == 1.30
def test_first_window_height(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_height == 1.10
def test_first_window_orientation(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].orientation == "North"
def test_first_window_glazing_type(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].glazing_type == "Double post or during 2022"
def test_first_window_draught_proofed(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].draught_proofed is True
def test_third_window_orientation(self, result: EpcPropertyData) -> None:
assert result.sap_windows[2].orientation == "South"
def test_frame_factor(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].frame_factor == 0.7
def test_transmission_u_value(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_transmission_details is not None
assert result.sap_windows[0].window_transmission_details.u_value == 1.4
def test_transmission_solar_transmittance(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_transmission_details is not None
assert result.sap_windows[0].window_transmission_details.solar_transmittance == 0.72
def test_transmission_data_source(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_transmission_details is not None
assert result.sap_windows[0].window_transmission_details.data_source == "Manufacturer"
class TestHeating:
def test_single_heating_detail(self, result: EpcPropertyData) -> None:
assert len(result.sap_heating.main_heating_details) == 1
def test_fuel_type(self, result: EpcPropertyData) -> None:
assert result.sap_heating.main_heating_details[0].main_fuel_type == "Mains gas"
def test_heat_emitter_type(self, result: EpcPropertyData) -> None:
assert (
result.sap_heating.main_heating_details[0].heat_emitter_type == "Radiators"
)
def test_emitter_temperature(self, result: EpcPropertyData) -> None:
assert (
result.sap_heating.main_heating_details[0].emitter_temperature == "Unknown"
)
def test_fan_flue_present(self, result: EpcPropertyData) -> None:
assert result.sap_heating.main_heating_details[0].fan_flue_present is True
def test_has_fghrs(self, result: EpcPropertyData) -> None:
assert result.sap_heating.main_heating_details[0].has_fghrs is False
def test_main_heating_control(self, result: EpcPropertyData) -> None:
assert (
result.sap_heating.main_heating_details[0].main_heating_control
== "Programmer, room thermostat and TRVs"
)
def test_shower_outlet_type(self, result: EpcPropertyData) -> None:
assert result.sap_heating.shower_outlets is not None
assert (
result.sap_heating.shower_outlets.shower_outlet.shower_outlet_type
== "Electric shower"
)
def test_no_hot_water_cylinder_size(self, result: EpcPropertyData) -> None:
assert result.sap_heating.cylinder_size is None
def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
assert result.sap_heating.has_fixed_air_conditioning is False
def test_water_heating_code(self, result: EpcPropertyData) -> None:
assert result.sap_heating.water_heating_code == 901
class TestEnergySource:
def test_mains_gas(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.mains_gas is True
def test_meter_type(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.meter_type == "Single"
def test_electricity_smart_meter(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.electricity_smart_meter_present is False
def test_gas_smart_meter(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.gas_smart_meter_present is False
def test_wind_turbines_count(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.wind_turbines_count == 0
def test_wind_turbines_terrain_type(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.wind_turbines_terrain_type == "Suburban"
def test_pv_battery_count(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.pv_battery_count == 0
class TestVentilation:
def test_draught_lobby(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.draught_lobby is False
def test_pressure_test(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.pressure_test == "Not available"
def test_extract_fans_count(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.extract_fans_count == 2
def test_open_flues_count(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.open_flues_count == 0
class TestDraughtproofingAndWater:
def test_percent_draughtproofed(self, result: EpcPropertyData) -> None:
assert result.percent_draughtproofed == 100
def test_waste_water_heat_recovery_absent(self, result: EpcPropertyData) -> None:
assert result.waste_water_heat_recovery == "None"
def test_any_unheated_rooms_false(self, result: EpcPropertyData) -> None:
assert result.any_unheated_rooms is False
class TestEnergyPerformance:
def test_energy_rating_current(self, result: EpcPropertyData) -> None:
assert result.energy_rating_current == 69
def test_energy_rating_potential(self, result: EpcPropertyData) -> None:
assert result.energy_rating_potential == 77
def test_environmental_impact_current(self, result: EpcPropertyData) -> None:
assert result.environmental_impact_current == 76
def test_environmental_impact_potential(self, result: EpcPropertyData) -> None:
assert result.environmental_impact_potential == 81
def test_co2_emissions_current(self, result: EpcPropertyData) -> None:
assert result.co2_emissions_current == 1.683
class TestWindowFrameMaterial:
def test_frame_material_from_elmhurst(self, result2: EpcPropertyData) -> None:
assert result2.sap_windows[0].frame_material == "PVC"
def test_glazing_gap_from_elmhurst(self, result2: EpcPropertyData) -> None:
assert result2.sap_windows[0].glazing_gap == "16 mm or more"
class TestLowEnergyLighting:
def test_low_energy_fixed_lighting_bulbs_count(self, result2: EpcPropertyData) -> None:
assert result2.low_energy_fixed_lighting_bulbs_count == 5

View file

@ -0,0 +1,515 @@
import json
import os
from datetime import date
import pytest
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.surveys.elmhurst_site_notes import (
BathsAndShowers,
BuildingPartDimensions,
ElmhurstSiteNotes,
FloorDetails,
FloorDimension,
Lighting,
MainHeating,
Meters,
PropertyDetails,
Renewables,
RoofDetails,
Shower,
SurveyorInfo,
VentilationAndCooling,
WallDetails,
WaterHeating,
Window,
)
FIXTURE_PATH = os.path.join(
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json"
)
FIXTURE_PATH_2 = os.path.join(
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_2_text.json"
)
@pytest.fixture(scope="module")
def result() -> ElmhurstSiteNotes:
with open(FIXTURE_PATH) as f:
pages = json.load(f)
return ElmhurstSiteNotesExtractor(pages).extract()
@pytest.fixture(scope="module")
def result2() -> ElmhurstSiteNotes:
with open(FIXTURE_PATH_2) as f:
pages = json.load(f)
return ElmhurstSiteNotesExtractor(pages).extract()
class TestSurveyorInfo:
def test_surveyor_code(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.surveyor_code == "P960-0001"
def test_name(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.name == "Richard Matthew Ratcliff"
def test_title(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.title == "Mr."
def test_tel_number(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.tel_number == "07760 443 469"
def test_survey_reference(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.survey_reference == "001573"
def test_my_reference_none(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.my_reference is None
class TestPropertyDetails:
def test_rdsap_version(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.rdsap_version == "RdSAP10"
def test_reference_number(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.reference_number == "P960-0001-001573"
def test_lodgement_required(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.lodgement_required is False
def test_regs_region(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.regs_region == "England"
def test_epc_language(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.epc_language == "English"
def test_uprn_none(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.uprn is None
def test_postcode(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.postcode == "BB10 1XX"
def test_region(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.region == "West Pennines"
def test_house_name_none(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.house_name is None
def test_house_number(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.house_number == "19"
def test_street(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.street == "Queens Road"
def test_locality_none(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.locality is None
def test_town(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.town == "BURNLEY"
def test_county_none(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.county is None
def test_tenure(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.tenure == "Rented (social)"
def test_transaction_type(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.transaction_type == "Grant scheme"
def test_inspection_date(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.inspection_date == date(2026, 3, 6)
def test_process_date(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.process_date == date(2026, 3, 6)
def test_epc_exists(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.epc_exists is False
class TestPropertyDescription:
def test_property_type(self, result: ElmhurstSiteNotes) -> None:
assert result.property_type == "B Bungalow"
def test_attachment(self, result: ElmhurstSiteNotes) -> None:
assert result.attachment == "E End-Terrace"
def test_number_of_storeys(self, result: ElmhurstSiteNotes) -> None:
assert result.number_of_storeys == 1
def test_habitable_rooms(self, result: ElmhurstSiteNotes) -> None:
assert result.habitable_rooms == 2
def test_heated_habitable_rooms(self, result: ElmhurstSiteNotes) -> None:
assert result.heated_habitable_rooms == 2
def test_construction_age_band(self, result: ElmhurstSiteNotes) -> None:
assert result.construction_age_band == "D 1950-1966"
def test_has_conservatory(self, result: ElmhurstSiteNotes) -> None:
assert result.has_conservatory is False
class TestDimensions:
def test_dimension_type(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.dimension_type == "Internal"
def test_floor_count(self, result: ElmhurstSiteNotes) -> None:
assert len(result.dimensions.floors) == 1
def test_floor_name(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.floors[0].name == "Lowest Floor"
def test_floor_area(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.floors[0].area_m2 == 44.89
def test_floor_room_height(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.floors[0].room_height_m == 2.24
def test_floor_heat_loss_perimeter(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.floors[0].heat_loss_perimeter_m == 20.10
def test_floor_party_wall_length(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.floors[0].party_wall_length_m == 6.70
class TestWalls:
def test_wall_type(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.wall_type == "CA Cavity"
def test_insulation(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.insulation == "F Filled Cavity"
def test_thickness_unknown(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.thickness_unknown is False
def test_thickness_mm(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.thickness_mm == 300
def test_u_value_known(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.u_value_known is False
def test_party_wall_type(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.party_wall_type == "U Unable to determine"
class TestRoof:
def test_roof_type(self, result: ElmhurstSiteNotes) -> None:
assert result.roof.roof_type == "PA Pitched (slates/tiles), access to loft"
def test_insulation(self, result: ElmhurstSiteNotes) -> None:
assert result.roof.insulation == "J Joists"
def test_insulation_thickness_mm(self, result: ElmhurstSiteNotes) -> None:
assert result.roof.insulation_thickness_mm == 270
def test_u_value_known(self, result: ElmhurstSiteNotes) -> None:
assert result.roof.u_value_known is False
class TestFloor:
def test_location(self, result: ElmhurstSiteNotes) -> None:
assert result.floor.location == "G Ground floor"
def test_floor_type(self, result: ElmhurstSiteNotes) -> None:
assert result.floor.floor_type == "N Suspended, not timber"
def test_insulation(self, result: ElmhurstSiteNotes) -> None:
assert result.floor.insulation == "A As built"
def test_default_u_value(self, result: ElmhurstSiteNotes) -> None:
assert result.floor.default_u_value == 0.69
def test_u_value_known(self, result: ElmhurstSiteNotes) -> None:
assert result.floor.u_value_known is False
class TestDoors:
def test_door_count(self, result: ElmhurstSiteNotes) -> None:
assert result.door_count == 0
def test_insulated_door_count(self, result: ElmhurstSiteNotes) -> None:
assert result.insulated_door_count == 0
class TestWindows:
def test_window_count(self, result: ElmhurstSiteNotes) -> None:
assert len(result.windows) == 4
def test_draught_proofing_percent(self, result: ElmhurstSiteNotes) -> None:
assert result.draught_proofing_percent == 100
def test_first_window_dimensions(self, result: ElmhurstSiteNotes) -> None:
w = result.windows[0]
assert w.width_m == 1.30
assert w.height_m == 1.10
assert w.area_m2 == 1.43
def test_first_window_glazing(self, result: ElmhurstSiteNotes) -> None:
w = result.windows[0]
assert w.glazing_type == "Double post or during 2022"
assert w.frame_factor == 0.70
def test_first_window_location(self, result: ElmhurstSiteNotes) -> None:
w = result.windows[0]
assert w.building_part == "Main"
assert w.location == "External wall"
assert w.orientation == "North"
def test_first_window_performance(self, result: ElmhurstSiteNotes) -> None:
w = result.windows[0]
assert w.data_source == "Manufacturer"
assert w.u_value == 1.40
assert w.g_value == 0.72
assert w.draught_proofed is True
assert w.permanent_shutters == "None"
def test_third_window_orientation(self, result: ElmhurstSiteNotes) -> None:
assert result.windows[2].orientation == "South"
def test_fourth_window_dimensions(self, result: ElmhurstSiteNotes) -> None:
w = result.windows[3]
assert w.width_m == 0.70
assert w.height_m == 1.30
assert w.area_m2 == 0.91
class TestVentilation:
def test_open_chimneys(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.open_chimneys_count == 0
def test_open_flues(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.open_flues_count == 0
def test_open_chimneys_closed_fire(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.open_chimneys_closed_fire_count == 0
def test_solid_fuel_boiler_flues(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.solid_fuel_boiler_flues_count == 0
def test_other_heater_flues(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.other_heater_flues_count == 0
def test_blocked_chimneys(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.blocked_chimneys_count == 0
def test_extract_fans(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.extract_fans_count == 2
def test_passive_vents(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.passive_vents_count == 0
def test_flueless_gas_fires(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.flueless_gas_fires_count == 0
def test_fixed_space_cooling(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.fixed_space_cooling is False
def test_draught_lobby(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.draught_lobby == "Not present"
def test_mechanical_ventilation(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.mechanical_ventilation is False
def test_pressure_test_method(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.pressure_test_method == "Not available"
class TestLighting:
def test_total_bulbs(self, result: ElmhurstSiteNotes) -> None:
assert result.lighting.total_bulbs == 8
def test_led_cfl_count_known(self, result: ElmhurstSiteNotes) -> None:
assert result.lighting.led_cfl_count_known is True
def test_led_count(self, result: ElmhurstSiteNotes) -> None:
assert result.lighting.led_count == 4
def test_cfl_count(self, result: ElmhurstSiteNotes) -> None:
assert result.lighting.cfl_count == 4
def test_incandescent_count(self, result: ElmhurstSiteNotes) -> None:
assert result.lighting.incandescent_count == 0
class TestMainHeating:
def test_pcdf_boiler_reference(self, result: ElmhurstSiteNotes) -> None:
assert (
result.main_heating.pcdf_boiler_reference
== "17742 Potterton, Promax 33 Combi ErP, 88.30%"
)
def test_heat_emitter(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.heat_emitter == "Radiators"
def test_heat_pump_age(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.heat_pump_age == "Unknown"
def test_fuel_type(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.fuel_type == "Mains gas"
def test_flue_type(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.flue_type == "Balanced"
def test_fan_assisted_flue(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.fan_assisted_flue is True
def test_design_flow_temperature(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.design_flow_temperature == "Unknown"
def test_heating_controls_ees(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.heating_controls_ees == "CBE"
def test_heating_controls_sap(self, result: ElmhurstSiteNotes) -> None:
assert (
result.main_heating.heating_controls_sap
== "SAP code 2106, Programmer, room thermostat and TRVs"
)
def test_percentage_of_heat(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.percentage_of_heat == 100
class TestMeters:
def test_electricity_meter_type(self, result: ElmhurstSiteNotes) -> None:
assert result.meters.electricity_meter_type == "Single"
def test_main_gas(self, result: ElmhurstSiteNotes) -> None:
assert result.meters.main_gas is True
def test_electricity_smart_meter(self, result: ElmhurstSiteNotes) -> None:
assert result.meters.electricity_smart_meter is False
def test_gas_smart_meter(self, result: ElmhurstSiteNotes) -> None:
assert result.meters.gas_smart_meter is False
class TestWaterHeating:
def test_water_heating_code(self, result: ElmhurstSiteNotes) -> None:
assert result.water_heating.water_heating_code == "HWP"
def test_water_heating_sap_code(self, result: ElmhurstSiteNotes) -> None:
assert result.water_heating.water_heating_sap_code == 901
def test_water_heating_fuel_type(self, result: ElmhurstSiteNotes) -> None:
assert result.water_heating.water_heating_fuel_type == "Mains gas"
def test_hot_water_cylinder_present(self, result: ElmhurstSiteNotes) -> None:
assert result.water_heating.hot_water_cylinder_present is False
class TestBathsAndShowers:
def test_number_of_baths(self, result: ElmhurstSiteNotes) -> None:
assert result.baths_and_showers.number_of_baths == 0
def test_number_of_baths_connected(self, result: ElmhurstSiteNotes) -> None:
assert result.baths_and_showers.number_of_baths_connected == 0
def test_shower_count(self, result: ElmhurstSiteNotes) -> None:
assert len(result.baths_and_showers.showers) == 1
def test_shower_number(self, result: ElmhurstSiteNotes) -> None:
assert result.baths_and_showers.showers[0].shower_number == 1
def test_shower_outlet_type(self, result: ElmhurstSiteNotes) -> None:
assert result.baths_and_showers.showers[0].outlet_type == "Electric shower"
def test_shower_connected(self, result: ElmhurstSiteNotes) -> None:
assert result.baths_and_showers.showers[0].connected == "None"
class TestRenewables:
def test_solar_water_heating(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.solar_water_heating is False
def test_wwhrs_present(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.wwhrs_present is False
def test_flue_gas_heat_recovery_present(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.flue_gas_heat_recovery_present is False
def test_photovoltaic_panel(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.photovoltaic_panel == "None"
def test_export_capable_meter(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.export_capable_meter is False
def test_wind_turbine_present(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.wind_turbine_present is False
def test_wind_turbines_terrain_type(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.wind_turbines_terrain_type == "Suburban"
def test_hydro_electricity_generated_kwh(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.hydro_electricity_generated_kwh == 0.0
class TestEnergyPerformance:
def test_current_sap_rating(self, result: ElmhurstSiteNotes) -> None:
assert result.current_sap_rating == 69
def test_potential_sap_rating(self, result: ElmhurstSiteNotes) -> None:
assert result.potential_sap_rating == 77
def test_current_ei_rating(self, result: ElmhurstSiteNotes) -> None:
assert result.current_ei_rating == 76
def test_potential_ei_rating(self, result: ElmhurstSiteNotes) -> None:
assert result.potential_ei_rating == 81
def test_co2_emissions_current_t(self, result: ElmhurstSiteNotes) -> None:
assert result.co2_emissions_current_t == 1.683
class TestWindowsWithFrameDetails:
def test_window_count(self, result2: ElmhurstSiteNotes) -> None:
assert len(result2.windows) == 8
def test_draught_proofing_percent(self, result2: ElmhurstSiteNotes) -> None:
assert result2.draught_proofing_percent == 90
def test_first_window_glazing_type_excludes_frame_type(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].glazing_type == "Double with unknown install date"
def test_first_window_frame_type(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].frame_type == "PVC"
def test_first_window_frame_factor(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].frame_factor == 0.70
def test_first_window_glazing_gap(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].glazing_gap == "16 mm or more"
def test_first_window_location(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].building_part == "Main"
assert result2.windows[0].location == "External wall"
assert result2.windows[0].orientation == "East"
def test_first_window_performance(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].data_source == "Manufacturer"
assert result2.windows[0].u_value == 2.70
assert result2.windows[0].g_value == 0.76
assert result2.windows[0].draught_proofed is True
assert result2.windows[0].permanent_shutters == "None"
def test_fourth_window_orientation(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[3].orientation == "South"
class TestLightingLedCflUnknown:
def test_total_bulbs(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.total_bulbs == 10
def test_led_cfl_count_known_false(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.led_cfl_count_known is False
def test_low_energy_count(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.low_energy_count == 5
def test_incandescent_count(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.incandescent_count == 5
def test_led_count_zero_when_unknown(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.led_count == 0
def test_cfl_count_zero_when_unknown(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.cfl_count == 0

View file

@ -20,9 +20,9 @@ from datatypes.epc.domain.epc_property_data import (
)
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf")
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_1.pdf")
PDF_PATH_2 = os.path.join(
os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_2.pdf"
os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_2.pdf"
)
@ -71,7 +71,7 @@ class TestPdfToEpcPropertyData:
),
sap_windows=[
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -84,7 +84,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -97,7 +97,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
@ -110,7 +110,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North",
window_type="Window",
@ -123,7 +123,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
@ -136,7 +136,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -149,7 +149,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -162,7 +162,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
@ -302,7 +302,7 @@ class TestPdfToEpcPropertyDataFixture2:
PDF_PATH_3 = os.path.join(
os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_3.pdf"
os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_3.pdf"
)
@ -339,7 +339,7 @@ class TestPdfToEpcPropertyDataFixture3:
PDF_PATH_4 = os.path.join(
os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_4.pdf"
os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_4.pdf"
)
@ -369,7 +369,7 @@ class TestPdfToEpcPropertyDataFixture4:
PDF_PATH_5 = os.path.join(
os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_5.pdf"
os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_5.pdf"
)
@ -401,7 +401,7 @@ class TestPdfToEpcPropertyDataFixture5:
PDF_PATH_6 = os.path.join(
os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_6.pdf"
os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_6.pdf"
)

View file

@ -37,32 +37,37 @@ FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures")
def load_text_fixture() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_1_text.json")) as f:
return json.load(f)
def load_text_fixture_2() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_2_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_2_text.json")) as f:
return json.load(f)
def load_text_fixture_3() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_3_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_3_text.json")) as f:
return json.load(f)
def load_text_fixture_4() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_4_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_4_text.json")) as f:
return json.load(f)
def load_text_fixture_5() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_5_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_5_text.json")) as f:
return json.load(f)
def load_text_fixture_6() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_6_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_6_text.json")) as f:
return json.load(f)
def load_text_fixture_7() -> list[str]:
with open(os.path.join(FIXTURES, "pashub_site_notes_7_text.json")) as f:
return json.load(f)
@ -785,6 +790,38 @@ class TestElectricShowerExtraction:
assert wu.showers[0].outlet_type == "Electric Shower"
# --- fixture 7: maisonette, 2 extensions, no property photo ---
class TestExtractNoPropertyPhoto:
def test_address_extracted_when_no_property_photo(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture_7()).extract()
assert result.inspection_metadata.property_address == "Flat 3, 29 Watcombe Circus, NOTTINGHAM, NG5 2DU"
assert result.inspection_metadata.property_photo is False
assert result.general.property_type == "Maisonette"
assert result.general.number_of_extensions == 2
class TestWallThicknessExtraction:
def _extractor(self) -> PasHubRdSapSiteNotesExtractor:
return PasHubRdSapSiteNotesExtractor([])
def test_numeric_value_returns_int(self) -> None:
assert self._extractor()._wall_thickness_in(["Wall thickness:", "310 mm"]) == 310
def test_unmeasurable_returns_none(self) -> None:
assert self._extractor()._wall_thickness_in(["Wall thickness:", "Unmeasurable"]) is None
def test_unmeasurable_lowercase_returns_none(self) -> None:
assert self._extractor()._wall_thickness_in(["Wall thickness:", "unmeasurable"]) is None
def test_unmeasurable_uppercase_returns_none(self) -> None:
assert self._extractor()._wall_thickness_in(["Wall thickness:", "UNMEASURABLE"]) is None
def test_missing_field_returns_none(self) -> None:
assert self._extractor()._wall_thickness_in([]) is None
class TestSolidMasonryPartyWall:
@pytest.fixture
def bc(self) -> BuildingConstruction:

View file

@ -5,8 +5,8 @@ import pytest
from backend.documents_parser.pdf import pdf_to_text_list
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf")
FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "site_notes_example_text.json")
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_1.pdf")
FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "pashub_site_notes_1_text.json")
@pytest.fixture

View file

@ -0,0 +1,257 @@
import os
from typing import Dict
from playwright.sync_api import Browser, BrowserContext, Locator, Page, sync_playwright
from backend.app.db.connection import db_session
from backend.app.db.functions.uploaded_files_functions import (
get_uploaded_file_by_listing_type_and_source,
)
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
from backend.ecmk_fetcher.address_list import (
PropertyRow,
extract_addresses_from_spreadsheet,
)
from backend.ecmk_fetcher.browser import (
attach_debug_listeners,
download_with_retry,
go_to_assessment_details,
go_to_assessments,
go_to_next_page,
login,
)
from backend.ecmk_fetcher.excel_writer import write_row
from backend.ecmk_fetcher.reports import (
REPORT_TYPES,
FileDownloadButtonType,
build_property_id,
map_report_type_to_db_file_type,
)
from backend.ecmk_fetcher.upload import (
upload_excel_to_sharepoint,
upload_file_to_s3_and_record,
upload_file_to_sharepoint,
)
from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
logger = setup_logger()
DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
class EcmkService:
def __init__(
self,
sharepoint_client: DomnaSharepointClient,
s3_bucket: str,
property_list_filepath: str,
sharepoint_base_path: str,
sharepoint_excel_path: str,
local_dimensions_path: str,
) -> None:
self._sharepoint_client = sharepoint_client
self._s3_bucket = s3_bucket
self._sharepoint_base_path = sharepoint_base_path
self._sharepoint_excel_path = sharepoint_excel_path
self._local_dimensions_path = local_dimensions_path
self._property_map: Dict[str, PropertyRow] = extract_addresses_from_spreadsheet(
property_list_filepath
)
def run(self) -> None:
self._sharepoint_client.download_file(
sharepoint_path=f"{self._sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
local_path=self._local_dimensions_path,
)
with sync_playwright() as p:
browser: Browser = p.chromium.launch(headless=True)
context: BrowserContext = browser.new_context()
page: Page = context.new_page()
try:
self._run_browser_session(page)
finally:
context.close()
browser.close()
def _run_browser_session(self, page: Page) -> None:
username: str = "" # TODO: get from github secrets
password: str = ""
attach_debug_listeners(page)
login(page, username, password)
go_to_assessments(page)
while True:
rows: Locator = page.locator("#assessmentDatatable tbody tr")
row_count: int = rows.count()
for i in range(row_count):
row: Locator = rows.nth(i)
try:
cells: Locator = row.locator("td")
first_name: str = cells.nth(1).inner_text().strip()
last_name: str = cells.nth(2).inner_text().strip()
address: str = cells.nth(5).inner_text().strip()
postcode: str = cells.nth(7).inner_text().strip()
status: str = cells.nth(9).inner_text().strip()
if first_name == "Oliver" and last_name == "Stephens":
continue
if status != "Submitted (not Lodged)":
continue
property_id: str = build_property_id(address, postcode)
property_row: PropertyRow | None = self._property_map.get(
property_id
)
if not property_row:
continue
logger.info(f"Match found for property {address}")
sharepoint_address: str = property_row.address
go_to_assessment_details(page, row)
for report_type in REPORT_TYPES:
hubspot_listing_id: str = property_row.listing_id
try:
db_file_type: FileTypeEnum = (
map_report_type_to_db_file_type(report_type)
)
except ValueError:
logger.error(
f"Unknown report type {report_type}, skipping file"
)
continue
if get_uploaded_file_by_listing_type_and_source(
hubspot_listing_id=int(hubspot_listing_id),
file_type=db_file_type,
file_source=FileSourceEnum.ECMK,
):
logger.debug("File already uploaded to s3, skipping")
continue
file_path: str | None = download_with_retry(page, report_type)
if not file_path:
continue
logger.info(
f"Successfully downloaded file {os.path.basename(file_path)} from ECMK"
)
try:
self._process_file(
file_path=file_path,
report_type=report_type,
db_file_type=db_file_type,
sharepoint_address=sharepoint_address,
hubspot_listing_id=hubspot_listing_id,
)
except Exception:
raise
finally:
if os.path.exists(file_path):
os.remove(file_path)
page.go_back()
page.wait_for_selector(
"#assessmentDatatable tbody tr", timeout=15000
)
except Exception as e:
raise Exception(f"Row processing failed: {str(e)}") from e
if not go_to_next_page(page):
break
def _process_file(
self,
file_path: str,
report_type: int,
db_file_type: FileTypeEnum,
sharepoint_address: str,
hubspot_listing_id: str,
) -> None:
if report_type == FileDownloadButtonType.RAW_XML.value:
self._process_xml_file(
file_path=file_path,
db_file_type=db_file_type,
hubspot_listing_id=hubspot_listing_id,
)
else:
self._process_pdf_file(
file_path=file_path,
file_type=db_file_type,
sharepoint_address=sharepoint_address,
hubspot_listing_id=hubspot_listing_id,
)
def _process_xml_file(
self,
file_path: str,
db_file_type: FileTypeEnum,
hubspot_listing_id: str,
) -> None:
with open(file_path, "r", encoding="utf-8") as f:
xml_string: str = f.read()
details = parse_rdsap(xml_string)
row_data = flatten_sap_property(details)
write_row(self._local_dimensions_path, row_data)
upload_excel_to_sharepoint(
client=self._sharepoint_client,
file_path=self._local_dimensions_path,
sharepoint_path=self._sharepoint_excel_path,
)
upload_file_to_s3_and_record(
bucket=self._s3_bucket,
file_path=file_path,
hubspot_listing_id=hubspot_listing_id,
file_type=db_file_type,
)
def _process_pdf_file(
self,
file_path: str,
file_type: FileTypeEnum,
sharepoint_address: str,
hubspot_listing_id: str,
) -> None:
upload_file_to_sharepoint(
client=self._sharepoint_client,
file_path=file_path,
base_path=self._sharepoint_base_path,
subpath=sharepoint_address,
)
uploaded_file_id: int = upload_file_to_s3_and_record(
bucket=self._s3_bucket,
file_path=file_path,
hubspot_listing_id=hubspot_listing_id,
file_type=file_type,
)
if file_type == FileTypeEnum.ECMK_RD_SAP_SITE_NOTE:
try:
epc_data = parse_site_notes_pdf(file_path)
with db_session() as session:
save_epc_property_data(
session=session,
data=epc_data,
uploaded_file_id=uploaded_file_id,
)
except Exception:
logger.warning(
f"EPC extraction failed for {os.path.basename(file_path)} — file record retained"
)

View file

@ -1,14 +1,32 @@
import os
from typing import Any, Mapping
from backend.ecmk_fetcher.processor import run_job
from backend.ecmk_fetcher.ecmk_service import EcmkService
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
logger = setup_logger()
_PROPERTY_LIST_FILE: str = (
"hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
)
_BASE_DIR: str = os.path.dirname(os.path.dirname(__file__))
def handler(event: Mapping[str, Any], context: Any) -> None:
logger.info("Entered handler")
run_job()
service = EcmkService(
sharepoint_client=DomnaSharepointClient(
sharepoint_location=DomnaSites.PRIVATE_PAY
),
s3_bucket="retrofit-energy-assessments-dev",
property_list_filepath=os.path.join(_BASE_DIR, _PROPERTY_LIST_FILE),
sharepoint_base_path="/Projects/Southern Housing/SH-SURV-26-001/Assessments",
sharepoint_excel_path="/Projects/Southern Housing/SH-SURV-26-001/Modelling",
local_dimensions_path=os.path.join(_BASE_DIR, "Dimensions.xlsx"),
)
service.run()
if __name__ == "__main__":

View file

@ -1,209 +0,0 @@
import os
from typing import Dict
from playwright.sync_api import (
sync_playwright,
Locator,
Page,
Browser,
BrowserContext,
)
from backend.app.db.functions.uploaded_files_functions import (
get_uploaded_file_by_listing_type_and_source,
)
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
from backend.ecmk_fetcher.address_list import (
PropertyRow,
extract_addresses_from_spreadsheet,
)
from backend.ecmk_fetcher.browser import (
attach_debug_listeners,
download_with_retry,
go_to_assessment_details,
go_to_assessments,
go_to_next_page,
login,
)
from backend.ecmk_fetcher.reports import (
REPORT_TYPES,
FileDownloadButtonType,
build_property_id,
map_report_type_to_db_file_type,
)
from backend.ecmk_fetcher.excel_writer import write_row
from backend.ecmk_fetcher.upload import (
upload_excel_to_sharepoint,
upload_file_to_s3_and_update_db,
upload_file_to_sharepoint,
)
from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
logger = setup_logger()
def run_job() -> None:
username: str = "" # TODO: get from github secrets
password: str = ""
property_list_file: str = (
"hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
)
BASE_DIR: str = os.path.dirname(__file__)
filepath: str = os.path.join(BASE_DIR, property_list_file)
property_map: Dict[str, PropertyRow] = extract_addresses_from_spreadsheet(filepath)
sharepoint_client: DomnaSharepointClient = DomnaSharepointClient(
sharepoint_location=DomnaSites.PRIVATE_PAY
)
sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
sharepoint_excel_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
local_dimensions_path: str = os.path.join(BASE_DIR, DIMENSIONS_FILENAME)
sharepoint_client.download_file(
sharepoint_path=f"{sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
local_path=local_dimensions_path,
)
s3_bucket: str = "retrofit-energy-assessments-dev"
with sync_playwright() as p:
browser: Browser = p.chromium.launch(headless=True)
context: BrowserContext = browser.new_context()
page: Page = context.new_page()
attach_debug_listeners(page)
try:
login(page, username, password)
go_to_assessments(page)
while True:
rows: Locator = page.locator("#assessmentDatatable tbody tr")
row_count: int = rows.count()
for i in range(row_count):
row: Locator = rows.nth(i)
try:
cells: Locator = row.locator("td")
first_name: str = cells.nth(1).inner_text().strip()
last_name: str = cells.nth(2).inner_text().strip()
address: str = cells.nth(5).inner_text().strip()
postcode: str = cells.nth(7).inner_text().strip()
status: str = cells.nth(9).inner_text().strip()
if first_name == "Oliver" and last_name == "Stephens":
continue
if status != "Submitted (not Lodged)":
continue
property_id: str = build_property_id(address, postcode)
property_row: PropertyRow | None = property_map.get(property_id)
if not property_row:
continue
logger.info(f"Match found for property {address}")
sharepoint_address: str = property_row.address
go_to_assessment_details(page, row)
for report_type in REPORT_TYPES:
hubspot_listing_id: str = property_row.listing_id
try:
db_file_type: FileTypeEnum = (
map_report_type_to_db_file_type(report_type)
)
except ValueError:
logger.error(
f"Unknown report type {report_type}, skipping file"
)
continue
if get_uploaded_file_by_listing_type_and_source(
hubspot_listing_id=int(hubspot_listing_id),
file_type=db_file_type,
file_source=FileSourceEnum.ECMK,
):
logger.debug("File already uploaded to s3, skipping")
continue
file_path: str | None = download_with_retry(
page, report_type
)
if not file_path:
continue
logger.info(
f"Successfully downloaded file {os.path.basename(file_path)} from ECMK"
)
try:
if report_type == FileDownloadButtonType.RAW_XML.value:
with open(file_path, "r", encoding="utf-8") as f:
xml_string = f.read()
details = parse_rdsap(xml_string)
row_data = flatten_sap_property(details)
write_row(local_dimensions_path, row_data)
upload_excel_to_sharepoint(
client=sharepoint_client,
file_path=local_dimensions_path,
sharepoint_path=sharepoint_excel_path,
)
logger.info(
f"Written dimensions row and uploaded Dimensions.xlsx for {address}"
)
else:
upload_file_to_sharepoint(
client=sharepoint_client,
file_path=file_path,
base_path=sharepoint_base_path,
subpath=sharepoint_address,
)
logger.info(
f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
)
# Upload to s3 and update db
upload_file_to_s3_and_update_db(
bucket=s3_bucket,
file_path=file_path,
hubspot_listing_id=hubspot_listing_id,
file_type=db_file_type,
)
except Exception:
raise
finally:
if os.path.exists(file_path):
os.remove(file_path)
page.go_back()
page.wait_for_selector(
"#assessmentDatatable tbody tr", timeout=15000
)
except Exception as e:
raise Exception(f"Row processing failed: {str(e)}") from e
if not go_to_next_page(page):
break
finally:
context.close()
browser.close()

View file

@ -0,0 +1,594 @@
from typing import Dict
from unittest.mock import MagicMock, call, patch
from backend.app.db.models.uploaded_file import FileTypeEnum
from backend.ecmk_fetcher.address_list import PropertyRow
from backend.ecmk_fetcher.ecmk_service import EcmkService
from backend.ecmk_fetcher.reports import FileDownloadButtonType
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
FAKE_PROPERTY_MAP: Dict[str, PropertyRow] = {
"10 FAKE ST SW1A 1AA": PropertyRow(
row_index=2, address="10 Fake St SW1A 1AA", listing_id="hs-001"
)
}
def make_service(
sharepoint_client: DomnaSharepointClient | None = None,
s3_bucket: str = "test-bucket",
property_list_filepath: str = "/fake/properties.xlsx",
sharepoint_base_path: str = "/base",
sharepoint_excel_path: str = "/excel",
local_dimensions_path: str = "/fake/Dimensions.xlsx",
) -> EcmkService:
return EcmkService(
sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient),
s3_bucket=s3_bucket,
property_list_filepath=property_list_filepath,
sharepoint_base_path=sharepoint_base_path,
sharepoint_excel_path=sharepoint_excel_path,
local_dimensions_path=local_dimensions_path,
)
# ---------------------------------------------------------------------------
# __init__: loads property map from spreadsheet filepath
# ---------------------------------------------------------------------------
def test_init_loads_property_map_from_filepath() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
) as mock_extract:
_ = make_service(property_list_filepath="/some/props.xlsx")
mock_extract.assert_called_once_with("/some/props.xlsx")
# ---------------------------------------------------------------------------
# run(): downloads Dimensions.xlsx before Playwright browser launches
# ---------------------------------------------------------------------------
def _make_playwright_mocks() -> tuple[MagicMock, MagicMock, MagicMock, MagicMock]:
mock_page = MagicMock()
mock_context = MagicMock()
mock_context.new_page.return_value = mock_page
mock_browser = MagicMock()
mock_browser.new_context.return_value = mock_context
mock_playwright = MagicMock()
mock_playwright.chromium.launch.return_value = mock_browser
return mock_page, mock_context, mock_browser, mock_playwright
def test_run_downloads_dimensions_before_browser_launch() -> None:
call_order: list[str] = []
mock_client = MagicMock(spec=DomnaSharepointClient)
def _on_download(**_: object) -> None:
call_order.append("download")
mock_client.download_file.side_effect = _on_download
_, _, mock_browser, mock_playwright = _make_playwright_mocks()
def _on_launch(**_: object) -> MagicMock:
call_order.append("browser")
return mock_browser
mock_playwright.chromium.launch.side_effect = _on_launch
with (
patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
),
patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
):
mock_sync_pw.return_value.__enter__.return_value = mock_playwright
service = make_service(
sharepoint_client=mock_client,
sharepoint_excel_path="/excel",
local_dimensions_path="/fake/Dimensions.xlsx",
)
with patch.object(service, "_run_browser_session"):
service.run()
assert call_order == ["download", "browser"]
def test_run_downloads_dimensions_with_correct_paths() -> None:
mock_client = MagicMock(spec=DomnaSharepointClient)
_, _, _, mock_playwright = _make_playwright_mocks()
with (
patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
),
patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
):
mock_sync_pw.return_value.__enter__.return_value = mock_playwright
service = make_service(
sharepoint_client=mock_client,
sharepoint_excel_path="/excel",
local_dimensions_path="/fake/Dimensions.xlsx",
)
with patch.object(service, "_run_browser_session"):
service.run()
mock_client.download_file.assert_called_once_with(
sharepoint_path="/excel/Dimensions.xlsx",
local_path="/fake/Dimensions.xlsx",
)
# ---------------------------------------------------------------------------
# run(): passes the Playwright Page into _run_browser_session
# ---------------------------------------------------------------------------
def test_run_passes_page_to_run_browser_session() -> None:
mock_page, _, _, mock_playwright = _make_playwright_mocks()
with (
patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
),
patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
):
mock_sync_pw.return_value.__enter__.return_value = mock_playwright
service = make_service()
with patch.object(service, "_run_browser_session") as mock_session:
service.run()
mock_session.assert_called_once_with(mock_page)
# ---------------------------------------------------------------------------
# _process_file: dispatches based on report_type
# ---------------------------------------------------------------------------
def test_process_file_dispatches_to_xml_for_raw_xml() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch.object(service, "_process_xml_file") as mock_xml,
patch.object(service, "_process_pdf_file") as mock_pdf,
):
service._process_file(
file_path="/tmp/file.xml",
report_type=FileDownloadButtonType.RAW_XML.value,
db_file_type=FileTypeEnum.ECMK_SURVEY_XML,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_xml.assert_called_once()
mock_pdf.assert_not_called()
def test_process_file_dispatches_to_pdf_for_non_xml() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch.object(service, "_process_xml_file") as mock_xml,
patch.object(service, "_process_pdf_file") as mock_pdf,
):
service._process_file(
file_path="/tmp/file.pdf",
report_type=FileDownloadButtonType.SITENOTE_REPORT.value,
db_file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_pdf.assert_called_once()
mock_xml.assert_not_called()
# ---------------------------------------------------------------------------
# _process_xml_file: parse → flatten → write row → upload excel → S3
# ---------------------------------------------------------------------------
def test_process_xml_file_full_chain() -> None:
fake_details = MagicMock()
fake_row_data = MagicMock()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service(
s3_bucket="my-bucket",
sharepoint_excel_path="/excel",
local_dimensions_path="/dims/Dimensions.xlsx",
)
with (
patch(
"backend.ecmk_fetcher.ecmk_service.parse_rdsap", return_value=fake_details
) as mock_parse,
patch(
"backend.ecmk_fetcher.ecmk_service.flatten_sap_property",
return_value=fake_row_data,
) as mock_flatten,
patch("backend.ecmk_fetcher.ecmk_service.write_row") as mock_write,
patch(
"backend.ecmk_fetcher.ecmk_service.upload_excel_to_sharepoint"
) as mock_upload_excel,
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record"
) as mock_s3,
patch(
"builtins.open",
MagicMock(return_value=MagicMock(
__enter__=lambda s: MagicMock(read=lambda: "<xml/>"),
__exit__=MagicMock(return_value=False),
)),
),
):
service._process_xml_file(
file_path="/tmp/report.xml",
db_file_type=FileTypeEnum.ECMK_SURVEY_XML,
hubspot_listing_id="hs-001",
)
mock_parse.assert_called_once()
mock_flatten.assert_called_once_with(fake_details)
mock_write.assert_called_once_with("/dims/Dimensions.xlsx", fake_row_data)
mock_upload_excel.assert_called_once_with(
client=service._sharepoint_client,
file_path="/dims/Dimensions.xlsx",
sharepoint_path="/excel",
)
mock_s3.assert_called_once_with(
bucket="my-bucket",
file_path="/tmp/report.xml",
hubspot_listing_id="hs-001",
file_type=FileTypeEnum.ECMK_SURVEY_XML,
)
# ---------------------------------------------------------------------------
# _process_pdf_file: sharepoint upload → S3 upload
# ---------------------------------------------------------------------------
def test_process_pdf_file_uploads_to_sharepoint_then_s3() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service(
s3_bucket="my-bucket",
sharepoint_base_path="/base",
)
with (
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"
) as mock_sp,
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=42,
) as mock_s3,
patch("backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"),
patch("backend.ecmk_fetcher.ecmk_service.db_session"),
):
service._process_pdf_file(
file_path="/tmp/report.pdf",
file_type=FileTypeEnum.ECMK_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_sp.assert_called_once_with(
client=service._sharepoint_client,
file_path="/tmp/report.pdf",
base_path="/base",
subpath="10 Fake St",
)
mock_s3.assert_called_once_with(
bucket="my-bucket",
file_path="/tmp/report.pdf",
hubspot_listing_id="hs-001",
file_type=FileTypeEnum.ECMK_SITE_NOTE,
)
# ---------------------------------------------------------------------------
# _process_pdf_file: EPC extraction conditional on file_type
# ---------------------------------------------------------------------------
def test_process_pdf_file_runs_epc_extraction_for_rd_sap_site_note() -> None:
fake_epc_data = MagicMock()
fake_session = MagicMock()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=99,
),
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
return_value=fake_epc_data,
) as mock_parse,
patch(
"backend.ecmk_fetcher.ecmk_service.save_epc_property_data"
) as mock_save,
patch(
"backend.ecmk_fetcher.ecmk_service.db_session"
) as mock_db_session,
):
mock_db_session.return_value.__enter__.return_value = fake_session
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_parse.assert_called_once_with("/tmp/sitenote.pdf")
mock_save.assert_called_once_with(
session=fake_session,
data=fake_epc_data,
uploaded_file_id=99,
)
def test_process_pdf_file_skips_epc_extraction_for_ecmk_site_note() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=42,
),
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"
) as mock_parse,
patch("backend.ecmk_fetcher.ecmk_service.db_session") as mock_db_session,
):
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_parse.assert_not_called()
mock_db_session.assert_not_called()
def test_process_pdf_file_epc_uses_separate_db_session_from_s3_upload() -> None:
"""EPC db_session opens only after upload_file_to_s3_and_record returns."""
call_order: list[str] = []
def _on_s3(**_: object) -> int:
call_order.append("s3")
return 77
def _on_db_session() -> MagicMock:
call_order.append("db_session")
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=MagicMock())
ctx.__exit__ = MagicMock(return_value=False)
return ctx
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
side_effect=_on_s3,
),
patch("backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"),
patch("backend.ecmk_fetcher.ecmk_service.save_epc_property_data"),
patch(
"backend.ecmk_fetcher.ecmk_service.db_session",
side_effect=_on_db_session,
),
):
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
assert call_order == ["s3", "db_session"]
# ---------------------------------------------------------------------------
# _process_pdf_file: EPC failures swallowed with warning
# ---------------------------------------------------------------------------
def _pdf_file_patches_for_failure() -> tuple: # type: ignore[type-arg]
return (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=1,
),
)
def test_process_pdf_file_parse_failure_logged_as_warning_not_raised() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
sp_patch, s3_patch = _pdf_file_patches_for_failure()
with (
sp_patch,
s3_patch,
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
side_effect=ValueError("bad pdf"),
),
patch("backend.ecmk_fetcher.ecmk_service.save_epc_property_data") as mock_save,
patch("backend.ecmk_fetcher.ecmk_service.db_session"),
patch("backend.ecmk_fetcher.ecmk_service.logger") as mock_logger,
):
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_logger.warning.assert_called_once()
mock_save.assert_not_called()
def test_process_pdf_file_save_failure_logged_as_warning_not_raised() -> None:
fake_session = MagicMock()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
sp_patch, s3_patch = _pdf_file_patches_for_failure()
with (
sp_patch,
s3_patch,
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
return_value=MagicMock(),
),
patch(
"backend.ecmk_fetcher.ecmk_service.save_epc_property_data",
side_effect=RuntimeError("db exploded"),
),
patch("backend.ecmk_fetcher.ecmk_service.db_session") as mock_db_session,
patch("backend.ecmk_fetcher.ecmk_service.logger") as mock_logger,
):
mock_db_session.return_value.__enter__.return_value = fake_session
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_logger.warning.assert_called_once()
# ---------------------------------------------------------------------------
# _run_browser_session: delegates file processing to _process_file
# ---------------------------------------------------------------------------
def _make_page_mock_with_one_matching_row() -> MagicMock:
cells_nth: dict[int, MagicMock] = {n: MagicMock() for n in (1, 2, 5, 7, 9)}
cells_nth[1].inner_text.return_value = "John"
cells_nth[2].inner_text.return_value = "Doe"
cells_nth[5].inner_text.return_value = "10 FAKE ST"
cells_nth[7].inner_text.return_value = "SW1A 1AA"
cells_nth[9].inner_text.return_value = "Submitted (not Lodged)"
cells_mock = MagicMock()
cells_mock.nth.side_effect = lambda n: cells_nth[n]
row_mock = MagicMock()
row_mock.locator.return_value = cells_mock
rows_mock = MagicMock()
rows_mock.count.return_value = 1
rows_mock.nth.return_value = row_mock
page = MagicMock()
page.locator.return_value = rows_mock
return page
# address "10 FAKE ST" + postcode "SW1A 1AA" → build_property_id → "10SW1A1AA"
_BROWSER_SESSION_PROPERTY_MAP: Dict[str, PropertyRow] = {
"10SW1A1AA": PropertyRow(
row_index=2, address="10 Fake St SW1A 1AA", listing_id="12345"
)
}
def test_run_browser_session_calls_process_file_for_downloaded_file() -> None:
mock_page = _make_page_mock_with_one_matching_row()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=_BROWSER_SESSION_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.attach_debug_listeners"),
patch("backend.ecmk_fetcher.ecmk_service.login"),
patch("backend.ecmk_fetcher.ecmk_service.go_to_assessments"),
patch("backend.ecmk_fetcher.ecmk_service.go_to_assessment_details"),
patch("backend.ecmk_fetcher.ecmk_service.go_to_next_page", return_value=False),
patch(
"backend.ecmk_fetcher.ecmk_service.get_uploaded_file_by_listing_type_and_source",
return_value=None,
),
patch(
"backend.ecmk_fetcher.ecmk_service.download_with_retry",
return_value="/tmp/fake.pdf",
),
patch(
"backend.ecmk_fetcher.ecmk_service.map_report_type_to_db_file_type",
return_value=FileTypeEnum.ECMK_SITE_NOTE,
),
patch(
"backend.ecmk_fetcher.ecmk_service.REPORT_TYPES",
[FileDownloadButtonType.SITENOTE_REPORT.value],
),
patch.object(service, "_process_file") as mock_process_file,
patch("os.path.exists", return_value=False),
):
service._run_browser_session(mock_page)
mock_process_file.assert_called_once_with(
file_path="/tmp/fake.pdf",
report_type=FileDownloadButtonType.SITENOTE_REPORT.value,
db_file_type=FileTypeEnum.ECMK_SITE_NOTE,
sharepoint_address="10 Fake St SW1A 1AA",
hubspot_listing_id="12345",
)

View file

@ -0,0 +1,59 @@
from unittest.mock import MagicMock, patch
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
def test_handler_constructs_ecmk_service_and_calls_run() -> None:
mock_service = MagicMock()
mock_service_cls = MagicMock(return_value=mock_service)
with (
patch(
"backend.ecmk_fetcher.handler.handler.EcmkService",
mock_service_cls,
),
patch(
"backend.ecmk_fetcher.handler.handler.DomnaSharepointClient",
return_value=MagicMock(spec=DomnaSharepointClient),
),
):
from backend.ecmk_fetcher.handler.handler import handler
handler({}, None)
mock_service_cls.assert_called_once()
mock_service.run.assert_called_once()
def test_handler_passes_correct_config_to_ecmk_service() -> None:
mock_service = MagicMock()
mock_service_cls = MagicMock(return_value=mock_service)
with (
patch(
"backend.ecmk_fetcher.handler.handler.EcmkService",
mock_service_cls,
),
patch(
"backend.ecmk_fetcher.handler.handler.DomnaSharepointClient",
return_value=MagicMock(spec=DomnaSharepointClient),
),
):
from backend.ecmk_fetcher.handler.handler import handler
handler({}, None)
_, kwargs = mock_service_cls.call_args
assert kwargs["s3_bucket"] == "retrofit-energy-assessments-dev"
assert (
kwargs["sharepoint_base_path"]
== "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
)
assert (
kwargs["sharepoint_excel_path"]
== "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
)
assert kwargs["property_list_filepath"].endswith(
"hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
)
assert kwargs["local_dimensions_path"].endswith("Dimensions.xlsx")

View file

@ -0,0 +1,108 @@
from typing import Generator
from unittest.mock import MagicMock, call, patch
import pytest
from backend.app.db.models.uploaded_file import FileTypeEnum
from backend.ecmk_fetcher.upload import upload_file_to_s3_and_record
@pytest.fixture
def mock_uploaded_file() -> MagicMock:
obj = MagicMock()
obj.id = 42
return obj
@pytest.fixture
def mock_session() -> MagicMock:
return MagicMock()
@pytest.fixture
def patched_deps(
mock_uploaded_file: MagicMock, mock_session: MagicMock
) -> Generator[dict[str, MagicMock], None, None]:
with (
patch(
"backend.ecmk_fetcher.upload.upload_file_to_s3"
) as mock_s3,
patch(
"backend.ecmk_fetcher.upload.db_session"
) as mock_db_ctx,
patch(
"backend.ecmk_fetcher.upload.UploadedFile",
return_value=mock_uploaded_file,
) as mock_model,
):
mock_db_ctx.return_value.__enter__.return_value = mock_session
mock_db_ctx.return_value.__exit__.return_value = False
yield {
"s3": mock_s3,
"db_ctx": mock_db_ctx,
"model": mock_model,
"session": mock_session,
"uploaded_file": mock_uploaded_file,
}
def test_returns_uploaded_file_id_as_int(
patched_deps: dict[str, MagicMock],
) -> None:
result = upload_file_to_s3_and_record(
bucket="test-bucket",
file_path="/tmp/report.pdf",
hubspot_listing_id="hs-001",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
)
assert isinstance(result, int)
assert result == 42
def test_uploads_to_s3_with_key_derived_from_listing_id_and_filename(
patched_deps: dict[str, MagicMock],
) -> None:
upload_file_to_s3_and_record(
bucket="my-bucket",
file_path="/some/path/site_note.pdf",
hubspot_listing_id="hs-999",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
)
patched_deps["s3"].assert_called_once_with(
"/some/path/site_note.pdf",
"my-bucket",
"documents/hubspot_listing_id/hs-999/site_note.pdf",
)
def test_adds_uploaded_file_record_to_session(
patched_deps: dict[str, MagicMock],
) -> None:
upload_file_to_s3_and_record(
bucket="test-bucket",
file_path="/tmp/report.pdf",
hubspot_listing_id="hs-001",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
)
patched_deps["session"].add.assert_called_once_with(
patched_deps["uploaded_file"]
)
patched_deps["session"].flush.assert_called_once()
def test_site_note_type_does_not_trigger_pdf_parsing(
patched_deps: dict[str, MagicMock],
) -> None:
# If parsing branch still existed, this would blow up without a
# parse_site_notes_pdf mock — test passes only when branch is absent.
result = upload_file_to_s3_and_record(
bucket="test-bucket",
file_path="/tmp/site_note.pdf",
hubspot_listing_id="hs-002",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
)
assert result == 42

View file

@ -1,5 +1,6 @@
from datetime import datetime, timezone
import os
from typing import cast
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
@ -7,9 +8,12 @@ from backend.app.db.models.uploaded_file import (
FileTypeEnum,
UploadedFile,
)
from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
logger = setup_logger()
def upload_file_to_sharepoint(
client: DomnaSharepointClient,
@ -41,9 +45,9 @@ def upload_excel_to_sharepoint(
# TODO: this should be moved to somewhere common and called by pashub fetcher
def upload_file_to_s3_and_update_db(
def upload_file_to_s3_and_record(
bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum
) -> None:
) -> int:
filename: str = os.path.basename(file_path)
key: str = f"documents/hubspot_listing_id/{hubspot_listing_id}/{filename}"
@ -61,4 +65,7 @@ def upload_file_to_s3_and_update_db(
with db_session() as session:
# TODO: we should do multiple files at once to reduce db trips
session.add(uploaded_file)
session.commit()
session.flush()
uploaded_file_id: int = int(cast(int, uploaded_file.id))
return uploaded_file_id

View file

@ -282,11 +282,6 @@ def test_default_export_integration(db_session):
df["sap_points"].sum()
)
assert df.shape == (
10,
100,
), "Expected dataframe shape to be (10, 100), got {}".format(df.shape)
def test_solar_with_battery_example(db_session):
test_portfolio_id = 1
@ -337,7 +332,7 @@ def test_solar_with_battery_example(db_session):
"creation_status": "PropertyCreationStatus.READY",
"uprn": 100090438731,
"landlord_property_id": "BARR052",
"building_reference_number": 3460742868.0,
"building_reference_number": 3460742868,
"status": "PortfolioStatus.ASSESSMENT",
"address": "52, Barrack Street",
"postcode": "CO1 2LR",
@ -566,6 +561,8 @@ def test_solar_with_battery_example(db_session):
creation_status=PropertyCreationStatus[row.creation_status.split(".")[-1]],
status=PortfolioStatus[row.status.split(".")[-1]],
uprn=row.uprn,
address=row.address,
postcode=row.postcode,
property_type=row.property_type,
current_sap_points=row.current_sap_points,
current_epc_rating=Epc[row.current_epc_rating.split(".")[-1]],

View file

@ -1,72 +1,18 @@
from datetime import datetime, timezone
import os
import re
from typing import Any, Dict, List, Optional
from openpyxl import load_workbook
from typing import Any, Dict, List
from backend.app.config import get_settings
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
FileSourceEnum,
UploadedFile,
)
from backend.pashub_fetcher.core_files import infer_file_type
from backend.pashub_fetcher.job import Job
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
from backend.pashub_fetcher.pashub_service import PashubService
from backend.pashub_fetcher.pashub_to_ara_trigger_request import PashubToAraTriggerRequest
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
from backend.utils.subtasks import task_handler
from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
logger = setup_logger()
def extract_jobs(filepath: str) -> List[Job]:
wb = load_workbook(filepath, data_only=True)
# ws = wb["watford warm homes (wave 3) mai"]
ws = wb["filtered"]
HEADER_ROW = 3
headers: Dict[str, int] = {}
for col in range(1, ws.max_column + 1):
value = str(ws.cell(row=HEADER_ROW, column=col).value)
if value:
headers[value.strip()] = col
name_col = headers["Name"]
# link_col = headers["Pashub Link"]
link_col = headers["PasHub Link"]
jobs: List[Job] = []
for row in range(HEADER_ROW + 1, ws.max_row + 1):
name = ws.cell(row=row, column=name_col).value
link = ws.cell(row=row, column=link_col).value
if not name or not link:
continue
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
if not match:
continue
jobs.append(
{
"id": match.group(1),
"address": str(name),
}
)
return jobs
S3_BUCKET = "retrofit-energy-assessments-dev"
def get_pashub_client(email: str, password: str) -> PashubClient:
@ -75,114 +21,6 @@ def get_pashub_client(email: str, password: str) -> PashubClient:
return PashubClient(token=token)
def upload_job_to_sharepoint(
sharepoint_client: DomnaSharepointClient,
# base_path: str,
sharepoint_link: str,
job_files: List[str],
) -> None:
# job_path = f"{base_path}/{job['address']}"
# Create main job folder
# sharepoint_client.makedir(job["address"], base_path)
# Create subfolders
# for folder in SharepointSubfolders:
# sharepoint_client.makedir(folder.value, job_path)
# Upload into assessment folder
assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}"
for file_path in job_files:
filename = file_path.split("/")[-1]
sharepoint_client.upload_file(
file_path,
assessment_path,
filename,
)
def upload_job_to_s3_and_update_db(
job_files: List[str], uprn: Optional[str], hubspot_deal_id: Optional[str]
) -> None:
bucket = "retrofit-energy-assessments-dev"
if not uprn and not hubspot_deal_id:
return
base_path = (
f"documents/uprn/{uprn}"
if uprn
else f"documents/hubspot_deal_id/{hubspot_deal_id}"
)
uploaded_files: List[UploadedFile] = []
for file_path in job_files:
filename = os.path.basename(file_path)
file_key = f"{base_path}/{filename}"
upload_file_to_s3(file_path, bucket, file_key)
# load row to db
# TODO: use same upload_file_to_s3_and_update_db method as ecmk fetcher does
uploaded_files.append(
UploadedFile(
s3_file_bucket=bucket,
s3_file_key=file_key,
s3_upload_timestamp=datetime.now(timezone.utc),
uprn=int(uprn) if uprn else None,
hubspot_deal_id=hubspot_deal_id,
file_source=FileSourceEnum.PAS_HUB.value,
file_type=infer_file_type(filename),
)
)
with db_session() as session:
session.add_all(uploaded_files)
session.commit()
pass
def process_job(
job: PashubToAraTriggerRequest,
pashub_client: PashubClient,
sharepoint_client: DomnaSharepointClient,
) -> List[str]:
job_id = job.pashub_job_id
uprn: Optional[str] = job.uprn or pashub_client.get_uprn_by_job_id(job_id)
hubspot_deal_id: Optional[str] = job.hubspot_deal_id
if uprn:
logger.info(f"Got UPRN {uprn} for job {job_id}")
else:
logger.info(f"No UPRN found for job {job_id}")
job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id(job_id)
if uprn or hubspot_deal_id:
logger.info("Uploading files to s3")
upload_job_to_s3_and_update_db(job_files, uprn, hubspot_deal_id)
# # Comment out sharepoint loading for now:
# Seems like the sharepoint link in pas hub is inconsistent in terms
# of whether it points to a property or a project
# if job.sharepoint_link:
# upload_job_to_sharepoint(sharepoint_client, job.sharepoint_link, job_files)
for file_path in job_files:
try:
os.remove(file_path)
except OSError:
logger.warning(f"Failed to delete temp file {file_path}")
return job_files
@task_handler()
def handler(body: Dict[str, Any], context: Any) -> List[str]:
logger.info("Received message")
@ -195,8 +33,6 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:
if (not pas_hub_email) or (not pas_hub_password):
raise ValueError("Pas Hub credentials not provided")
pashub_client = get_pashub_client(pas_hub_email, pas_hub_password)
sharepoint_client = DomnaSharepointClient(
sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3
)
@ -205,26 +41,24 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:
payload = PashubToAraTriggerRequest.model_validate(body)
logger.debug("Successfully validated request body")
service = PashubService(
pashub_client=get_pashub_client(pas_hub_email, pas_hub_password),
sharepoint_client=sharepoint_client,
s3_bucket=S3_BUCKET,
)
try:
files: List[str] = process_job(
payload,
pashub_client,
sharepoint_client,
)
files: List[str] = service.run(payload)
except UnauthorizedError:
logger.warning("Token expired - refreshing")
pashub_client = get_pashub_client(
pas_hub_email,
pas_hub_password,
service = PashubService(
pashub_client=get_pashub_client(pas_hub_email, pas_hub_password),
sharepoint_client=sharepoint_client,
s3_bucket=S3_BUCKET,
)
# retry once
files = process_job(
payload,
pashub_client,
sharepoint_client,
)
files = service.run(payload)
logger.info(f"Saved {len(files)} files")

View file

@ -0,0 +1,158 @@
import os
from datetime import datetime, timezone
from typing import List, NamedTuple, Optional, cast
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
FileSourceEnum,
FileTypeEnum,
UploadedFile,
)
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
from backend.pashub_fetcher.core_files import infer_file_type
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
logger = setup_logger()
class _FileUploadRecord(NamedTuple):
file_path: str
file_type: Optional[str]
uploaded_file_id: int
class PashubService:
def __init__(
self,
pashub_client: PashubClient,
sharepoint_client: DomnaSharepointClient,
s3_bucket: str,
) -> None:
self._pashub_client = pashub_client
self._sharepoint_client = sharepoint_client
self._s3_bucket = s3_bucket
def run(self, request: PashubToAraTriggerRequest) -> List[str]:
job_id = request.pashub_job_id
uprn: Optional[str] = request.uprn or self._pashub_client.get_uprn_by_job_id(
job_id
)
hubspot_deal_id: Optional[str] = request.hubspot_deal_id
if uprn:
logger.info(f"Got UPRN {uprn} for job {job_id}")
else:
logger.info(f"No UPRN found for job {job_id}")
job_files: List[str] = self._pashub_client.get_core_evidence_files_by_job_id(
job_id
)
if uprn or hubspot_deal_id:
logger.info("Uploading files to s3")
upload_records = self._upload_to_s3_and_update_db(
job_files, uprn, hubspot_deal_id
)
self._save_site_notes(upload_records)
# SharePoint upload disabled: pashub sharepoint_link is inconsistent
# (points to property or project unpredictably)
# if request.sharepoint_link:
# self._upload_to_sharepoint(request.sharepoint_link, job_files)
for file_path in job_files:
try:
os.remove(file_path)
except OSError:
logger.warning(f"Failed to delete temp file {file_path}")
return job_files
def _upload_to_s3_and_update_db(
self,
job_files: List[str],
uprn: Optional[str],
hubspot_deal_id: Optional[str],
) -> List[_FileUploadRecord]:
if not uprn and not hubspot_deal_id:
return []
base_path = (
f"documents/uprn/{uprn}"
if uprn
else f"documents/hubspot_deal_id/{hubspot_deal_id}"
)
file_paths: List[str] = []
uploaded_files: List[UploadedFile] = []
for file_path in job_files:
filename = os.path.basename(file_path)
file_key = f"{base_path}/{filename}"
upload_file_to_s3(file_path, self._s3_bucket, file_key)
uploaded_file = UploadedFile(
s3_file_bucket=self._s3_bucket,
s3_file_key=file_key,
s3_upload_timestamp=datetime.now(timezone.utc),
uprn=int(uprn) if uprn else None,
hubspot_deal_id=hubspot_deal_id,
file_source=FileSourceEnum.PAS_HUB.value,
file_type=infer_file_type(filename),
)
file_paths.append(file_path)
uploaded_files.append(uploaded_file)
with db_session() as session:
session.add_all(uploaded_files)
session.flush()
upload_records = [
_FileUploadRecord(
file_path=fp,
file_type=cast(Optional[str], uf.file_type),
uploaded_file_id=cast(int, uf.id),
)
for fp, uf in zip(file_paths, uploaded_files)
]
return upload_records
def _save_site_notes(self, upload_records: List[_FileUploadRecord]) -> None:
for record in upload_records:
if (
record.file_type is None
or FileTypeEnum(record.file_type) != FileTypeEnum.RD_SAP_SITE_NOTE
):
continue
try:
epc_data: EpcPropertyData = parse_site_notes_pdf(record.file_path)
with db_session() as session:
save_epc_property_data(
session, epc_data, uploaded_file_id=record.uploaded_file_id
)
except Exception:
logger.warning(
f"Failed to parse site notes {record.file_path}", exc_info=True
)
def _upload_to_sharepoint(
self,
sharepoint_link: str,
job_files: List[str],
) -> None:
assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}"
for file_path in job_files:
filename = file_path.split("/")[-1]
self._sharepoint_client.upload_file(file_path, assessment_path, filename)

View file

@ -0,0 +1,43 @@
import re
from typing import Dict, List
from openpyxl import load_workbook
from backend.pashub_fetcher.job import Job
def extract_jobs(filepath: str) -> List[Job]:
wb = load_workbook(filepath, data_only=True)
ws = wb["filtered"]
HEADER_ROW = 3
headers: Dict[str, int] = {}
for col in range(1, ws.max_column + 1):
value = str(ws.cell(row=HEADER_ROW, column=col).value)
if value:
headers[value.strip()] = col
name_col = headers["Name"]
link_col = headers["PasHub Link"]
jobs: List[Job] = []
for row in range(HEADER_ROW + 1, ws.max_row + 1):
name = ws.cell(row=row, column=name_col).value
link = ws.cell(row=row, column=link_col).value
if not name or not link:
continue
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
if not match:
continue
jobs.append(
{
"id": match.group(1),
"address": str(name),
}
)
return jobs

View file

@ -0,0 +1,254 @@
from typing import Optional
from unittest.mock import MagicMock, call, patch
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.pashub_service import PashubService
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
FAKE_JOB_LINK = "https://pashub.net/jobs/job-id-123/details"
def make_request(
pashub_link: str = FAKE_JOB_LINK,
uprn: Optional[str] = None,
hubspot_deal_id: Optional[str] = None,
sharepoint_link: Optional[str] = None,
) -> PashubToAraTriggerRequest:
return PashubToAraTriggerRequest(
pashub_link=pashub_link,
uprn=uprn,
hubspot_deal_id=hubspot_deal_id,
sharepoint_link=sharepoint_link,
)
def make_service(
pashub_client: Optional[PashubClient] = None,
sharepoint_client: Optional[DomnaSharepointClient] = None,
s3_bucket: str = "test-bucket",
) -> PashubService:
return PashubService(
pashub_client=pashub_client or MagicMock(spec=PashubClient),
sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient),
s3_bucket=s3_bucket,
)
# ---------------------------------------------------------------------------
# run(): returns file paths
# ---------------------------------------------------------------------------
def test_run_returns_file_paths() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/a.pdf",
"/tmp/b.pdf",
]
service = make_service(pashub_client=mock_client)
with patch("backend.pashub_fetcher.pashub_service.os.remove"):
result = service.run(make_request())
assert result == ["/tmp/a.pdf", "/tmp/b.pdf"]
# ---------------------------------------------------------------------------
# run(): skips upload when neither uprn nor hubspot_deal_id
# ---------------------------------------------------------------------------
def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
service = make_service(pashub_client=mock_client)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
service.run(make_request(uprn=None, hubspot_deal_id=None))
mock_s3.assert_not_called()
# ---------------------------------------------------------------------------
# run(): UPRN present → uploads each file to S3 with correct bucket/key
# ---------------------------------------------------------------------------
def test_run_uploads_files_to_s3_using_uprn_path() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf",
"/tmp/Photopack_002.pdf",
]
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3,
patch("backend.pashub_fetcher.pashub_service.db_session"),
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
service.run(make_request(uprn="12345"))
mock_s3.assert_has_calls(
[
call(
"/tmp/SiteNote_001.pdf",
"my-bucket",
"documents/uprn/12345/SiteNote_001.pdf",
),
call(
"/tmp/Photopack_002.pdf",
"my-bucket",
"documents/uprn/12345/Photopack_002.pdf",
),
],
any_order=False,
)
# ---------------------------------------------------------------------------
# run(): UPRN present → UploadedFile records added to DB session
# ---------------------------------------------------------------------------
def test_run_persists_uploaded_file_records_to_db() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf"
]
fake_session = MagicMock()
service = make_service(pashub_client=mock_client)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
mock_db.return_value.__enter__.return_value = fake_session
service.run(make_request(uprn="12345"))
fake_session.add_all.assert_called_once()
added: list = fake_session.add_all.call_args[0][0]
assert len(added) == 1
assert added[0].s3_file_bucket == "test-bucket"
assert added[0].uprn == 12345
# ---------------------------------------------------------------------------
# run(): hubspot_deal_id only → uses deal_id S3 path prefix
# ---------------------------------------------------------------------------
def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf"
]
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3,
patch("backend.pashub_fetcher.pashub_service.db_session"),
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
service.run(make_request(uprn=None, hubspot_deal_id="deal-abc"))
mock_s3.assert_called_once_with(
"/tmp/SiteNote_001.pdf",
"my-bucket",
"documents/hubspot_deal_id/deal-abc/SiteNote_001.pdf",
)
# ---------------------------------------------------------------------------
# run(): RD_SAP_SITE_NOTE file → site notes parsed and saved to DB
# ---------------------------------------------------------------------------
def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/RdSAP_SiteNote_001.pdf"
]
fake_epc_data = MagicMock()
fake_session = MagicMock()
fake_uploaded_file_id = 99
service = make_service(pashub_client=mock_client)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch(
"backend.pashub_fetcher.pashub_service.parse_site_notes_pdf",
return_value=fake_epc_data,
) as mock_parse,
patch(
"backend.pashub_fetcher.pashub_service.save_epc_property_data"
) as mock_save,
patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
fake_session.add_all = MagicMock(
side_effect=lambda files: setattr(files[0], "id", fake_uploaded_file_id)
)
mock_db.return_value.__enter__.return_value = fake_session
service.run(make_request(uprn="12345"))
mock_parse.assert_called_once_with("/tmp/RdSAP_SiteNote_001.pdf")
mock_save.assert_called_once_with(
fake_session, fake_epc_data, uploaded_file_id=fake_uploaded_file_id
)
# ---------------------------------------------------------------------------
# run(): site notes parse failure → warning logged, run returns normally
# ---------------------------------------------------------------------------
def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/RdSAP_SiteNote_001.pdf"
]
service = make_service(pashub_client=mock_client)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch(
"backend.pashub_fetcher.pashub_service.parse_site_notes_pdf",
side_effect=ValueError("corrupt pdf"),
),
patch(
"backend.pashub_fetcher.pashub_service.save_epc_property_data"
) as mock_save,
patch("backend.pashub_fetcher.pashub_service.db_session"),
patch("backend.pashub_fetcher.pashub_service.logger") as mock_logger,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
result = service.run(make_request(uprn="12345"))
assert result == ["/tmp/RdSAP_SiteNote_001.pdf"]
mock_logger.warning.assert_called()
mock_save.assert_not_called()

View file

@ -89,13 +89,13 @@ class SapVentilation:
@dataclass
class WindowTransmissionDetails:
u_value: float
data_source: int
data_source: Union[int, str]
solar_transmittance: float
@dataclass
class SapWindow:
pvc_frame: str
frame_material: Optional[str]
glazing_gap: Union[int, str]
orientation: Union[int, str]
window_type: Union[int, str]

View file

@ -51,13 +51,17 @@ from datatypes.epc.schema.rdsap_schema_21_0_1 import (
RdSapSchema21_0_1,
EnergyElement as EnergyElement_21_0_1,
)
from datatypes.epc.surveys.elmhurst_site_notes import (
ElmhurstSiteNotes,
VentilationAndCooling as ElmhurstVentilation,
Window as ElmhurstWindow,
)
from datatypes.epc.surveys.pashub_rdsap_site_notes import (
BuildingConstruction,
BuildingMeasurements,
ExtensionConstruction,
ExtensionMeasurements,
ExtensionRoofSpace,
FloorConstruction,
FloorMeasurement,
HeatingAndHotWater,
PasHubRdSapSiteNotes,
@ -200,6 +204,80 @@ class EpcPropertyDataMapper:
sap_ventilation=_map_sap_ventilation(ventilation),
)
@staticmethod
def from_elmhurst_site_notes(survey: ElmhurstSiteNotes) -> EpcPropertyData:
pd = survey.property_details
built_form = _strip_code(survey.attachment)
property_type = _strip_code(survey.property_type)
prefix = pd.house_number or pd.house_name or ""
address_line_1 = f"{prefix}, {pd.street}" if prefix else pd.street
return EpcPropertyData(
dwelling_type=f"{built_form} {property_type.lower()}",
inspection_date=pd.inspection_date,
tenure=pd.tenure,
transaction_type=pd.transaction_type,
address_line_1=address_line_1,
post_town=pd.town,
postcode=pd.postcode,
report_reference=pd.reference_number,
roofs=[],
walls=[],
floors=[],
main_heating=[],
door_count=survey.door_count,
sap_heating=_map_elmhurst_sap_heating(survey),
sap_windows=[_map_elmhurst_window(w) for w in survey.windows],
sap_energy_source=SapEnergySource(
mains_gas=survey.meters.main_gas,
meter_type=survey.meters.electricity_meter_type,
pv_battery_count=0,
wind_turbines_count=1 if survey.renewables.wind_turbine_present else 0,
gas_smart_meter_present=survey.meters.gas_smart_meter,
is_dwelling_export_capable=survey.renewables.export_capable_meter,
wind_turbines_terrain_type=survey.renewables.wind_turbines_terrain_type,
electricity_smart_meter_present=survey.meters.electricity_smart_meter,
),
sap_building_parts=[_map_elmhurst_building_part(survey)],
solar_water_heating=survey.renewables.solar_water_heating,
has_hot_water_cylinder=survey.water_heating.hot_water_cylinder_present,
has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling,
wet_rooms_count=0,
extensions_count=0,
heated_rooms_count=survey.heated_habitable_rooms,
open_chimneys_count=survey.ventilation.open_chimneys_count,
habitable_rooms_count=survey.habitable_rooms,
insulated_door_count=survey.insulated_door_count,
cfl_fixed_lighting_bulbs_count=survey.lighting.cfl_count,
led_fixed_lighting_bulbs_count=survey.lighting.led_count,
incandescent_fixed_lighting_bulbs_count=survey.lighting.incandescent_count,
total_floor_area_m2=round(
sum(f.area_m2 for f in survey.dimensions.floors), 2
),
built_form=built_form,
property_type=property_type,
has_conservatory=survey.has_conservatory,
blocked_chimneys_count=survey.ventilation.blocked_chimneys_count,
number_of_storeys=survey.number_of_storeys,
hydro=survey.renewables.hydro_electricity_generated_kwh > 0,
photovoltaic_array=survey.renewables.photovoltaic_panel != "None",
sap_ventilation=_map_elmhurst_ventilation(survey.ventilation),
percent_draughtproofed=survey.draught_proofing_percent,
waste_water_heat_recovery=(
"None" if not survey.renewables.wwhrs_present else "Present"
),
any_unheated_rooms=survey.heated_habitable_rooms < survey.habitable_rooms,
low_energy_fixed_lighting_bulbs_count=(
survey.lighting.low_energy_count if not survey.lighting.led_cfl_count_known else None
),
energy_rating_current=survey.current_sap_rating,
energy_rating_potential=survey.potential_sap_rating,
environmental_impact_current=survey.current_ei_rating,
environmental_impact_potential=survey.potential_ei_rating,
co2_emissions_current=survey.co2_emissions_current_t,
)
@staticmethod
def from_rdsap_schema_17_0(schema: RdSapSchema17_0) -> EpcPropertyData:
es = schema.sap_energy_source
@ -866,7 +944,7 @@ class EpcPropertyDataMapper:
# 20.0.0 SapWindow lacks frame/gap/draught fields present in later schemas
sap_windows=[
SapWindow(
pvc_frame="",
frame_material=None,
glazing_gap=0,
orientation=w.orientation,
window_type=w.window_type,
@ -1043,7 +1121,7 @@ class EpcPropertyDataMapper:
),
sap_windows=[
SapWindow(
pvc_frame=w.pvc_frame,
frame_material="PVC" if w.pvc_frame == "true" else None,
glazing_gap=w.glazing_gap,
orientation=w.orientation,
window_type=w.window_type,
@ -1277,7 +1355,7 @@ class EpcPropertyDataMapper:
# SAP windows
sap_windows=[
SapWindow(
pvc_frame=w.pvc_frame,
frame_material="PVC" if w.pvc_frame == "true" else None,
glazing_gap=w.glazing_gap,
orientation=w.orientation,
window_type=w.window_type,
@ -1453,6 +1531,12 @@ class EpcPropertyDataMapper:
# ---------------------------------------------------------------------------
def _strip_code(value: str) -> str:
"""Strip leading uppercase code from Elmhurst coded strings, e.g. 'CA Cavity''Cavity'."""
parts = value.split(" ", 1)
return parts[1] if len(parts) > 1 else value
def _extract_age_band(age_range: str) -> str:
"""Return the letter code from a site-notes age range, e.g. 'I: 1996 - 2002''I'."""
return age_range.split(":")[0].strip()
@ -1497,7 +1581,7 @@ def _map_main_building_part(
construction_age_band=_extract_age_band(main.age_range),
wall_construction=main.walls_construction_type,
wall_insulation_type=main.walls_insulation_type,
wall_thickness_measured=main.wall_thickness_mm > 0,
wall_thickness_measured=main.wall_thickness_mm is not None,
party_wall_construction=main.party_wall_construction_type,
sap_floor_dimensions=_map_floor_dimensions(measurements.main_building.floors),
wall_thickness_mm=main.wall_thickness_mm,
@ -1521,7 +1605,7 @@ def _map_extension_building_part(
construction_age_band=_extract_age_band(ext_c.age_range),
wall_construction=ext_c.walls_construction_type,
wall_insulation_type=ext_c.walls_insulation_type,
wall_thickness_measured=ext_c.wall_thickness_mm > 0,
wall_thickness_measured=ext_c.wall_thickness_mm is not None,
party_wall_construction=ext_c.party_wall_construction_type,
sap_floor_dimensions=_map_floor_dimensions(ext_m.floors),
wall_thickness_mm=ext_c.wall_thickness_mm,
@ -1532,7 +1616,7 @@ def _map_extension_building_part(
def _map_sap_window(window: Window) -> SapWindow:
return SapWindow(
pvc_frame=window.frame_type,
frame_material=window.frame_type,
glazing_gap=window.glazing_gap,
orientation=window.orientation,
window_type=window.window_type,
@ -1574,7 +1658,11 @@ def _map_sap_heating(
fuel_type = (
_raw_fuel
if _raw_fuel
else ("Electricity" if main.system_type.lower() in _ELECTRIC_SYSTEM_TYPES else _raw_fuel)
else (
"Electricity"
if main.system_type.lower() in _ELECTRIC_SYSTEM_TYPES
else _raw_fuel
)
)
return SapHeating(
@ -1596,7 +1684,11 @@ def _map_sap_heating(
secondary_fuel_type=secondary_fuel_type,
secondary_heating_type=heating.secondary_heating.secondary_system,
shower_outlets=shower_outlets,
cylinder_size=heating.water_heating.cylinder_size if heating.water_heating.cylinder_size != "No Cylinder" else None,
cylinder_size=(
heating.water_heating.cylinder_size
if heating.water_heating.cylinder_size != "No Cylinder"
else None
),
cylinder_insulation_type=heating.water_heating.insulation_type,
cylinder_insulation_thickness_mm=heating.water_heating.insulation_thickness_mm,
immersion_heating_type=heating.water_heating.immersion_type,
@ -1617,3 +1709,112 @@ def _map_sap_ventilation(ventilation: Ventilation) -> SapVentilation:
flueless_gas_fires_count=ventilation.number_of_flueless_gas_fires,
ventilation_in_pcdf_database=ventilation.ventilation_in_pcdf_database,
)
def _map_elmhurst_building_part(survey: ElmhurstSiteNotes) -> SapBuildingPart:
dims = survey.dimensions
floor_dims = [
SapFloorDimension(
room_height_m=f.room_height_m,
total_floor_area_m2=f.area_m2,
party_wall_length_m=f.party_wall_length_m,
heat_loss_perimeter_m=f.heat_loss_perimeter_m,
floor=i,
)
for i, f in enumerate(dims.floors)
]
return SapBuildingPart(
identifier="main",
construction_age_band=_strip_code(survey.construction_age_band),
wall_construction=_strip_code(survey.walls.wall_type),
wall_insulation_type=_strip_code(survey.walls.insulation),
wall_thickness_measured=not survey.walls.thickness_unknown,
party_wall_construction=_strip_code(survey.walls.party_wall_type),
sap_floor_dimensions=floor_dims,
wall_thickness_mm=survey.walls.thickness_mm,
roof_insulation_location=_strip_code(survey.roof.insulation),
roof_insulation_thickness=survey.roof.insulation_thickness_mm,
floor_type=_strip_code(survey.floor.location),
floor_construction_type=_strip_code(survey.floor.floor_type),
floor_insulation_type_str=_strip_code(survey.floor.insulation),
floor_u_value_known=survey.floor.u_value_known,
)
def _map_elmhurst_window(w: ElmhurstWindow) -> SapWindow:
return SapWindow(
frame_material=w.frame_type or None,
glazing_gap=w.glazing_gap or "",
orientation=w.orientation,
window_type="Window",
glazing_type=w.glazing_type,
window_width=w.width_m,
window_height=w.height_m,
draught_proofed=w.draught_proofed,
window_location=w.building_part,
window_wall_type=w.location,
permanent_shutters_present=w.permanent_shutters,
frame_factor=w.frame_factor,
window_transmission_details=WindowTransmissionDetails(
u_value=w.u_value,
solar_transmittance=w.g_value,
data_source=w.data_source,
),
)
def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating:
mh = survey.main_heating
sap_control = mh.heating_controls_sap
control = (
sap_control.split(", ", 1)[1]
if sap_control.startswith("SAP code") and ", " in sap_control
else sap_control
)
shower_outlets = (
ShowerOutlets(
shower_outlet=ShowerOutlet(
shower_outlet_type=survey.baths_and_showers.showers[0].outlet_type
)
)
if survey.baths_and_showers.showers
else None
)
return SapHeating(
instantaneous_wwhrs=InstantaneousWwhrs(),
main_heating_details=[
MainHeatingDetail(
has_fghrs=survey.renewables.flue_gas_heat_recovery_present,
main_fuel_type=mh.fuel_type,
heat_emitter_type=mh.heat_emitter,
emitter_temperature=mh.design_flow_temperature,
fan_flue_present=mh.fan_assisted_flue,
main_heating_control=control,
central_heating_pump_age_str=mh.heat_pump_age,
)
],
has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling,
shower_outlets=shower_outlets,
cylinder_size=(
None
if not survey.water_heating.hot_water_cylinder_present
else survey.water_heating.water_heating_code
),
water_heating_code=survey.water_heating.water_heating_sap_code,
)
def _map_elmhurst_ventilation(v: ElmhurstVentilation) -> SapVentilation:
return SapVentilation(
ventilation_type=None,
draught_lobby=v.draught_lobby != "Not present",
pressure_test=v.pressure_test_method,
open_flues_count=v.open_flues_count,
closed_flues_count=v.open_chimneys_closed_fire_count,
boiler_flues_count=v.solid_fuel_boiler_flues_count,
other_flues_count=v.other_heater_flues_count,
extract_fans_count=v.extract_fans_count,
passive_vents_count=v.passive_vents_count,
flueless_gas_fires_count=v.flueless_gas_fires_count,
ventilation_in_pcdf_database=None,
)

View file

@ -481,6 +481,10 @@ class TestFromRdSapSchema21_0_1:
# draught_proofed: "true"
assert result.sap_windows[0].draught_proofed is True
def test_window_frame_material_false(self, result: EpcPropertyData) -> None:
# pvc_frame: "false" in fixture → frame_material should be None
assert result.sap_windows[0].frame_material is None
# --- sap building parts ---
def test_building_part_count(self, result: EpcPropertyData) -> None:

View file

@ -398,7 +398,7 @@ class TestFromSiteNotesExample1:
# Windows
sap_windows=[
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="South East",
window_type="Window",
@ -411,7 +411,7 @@ class TestFromSiteNotesExample1:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="South East",
window_type="Window",
@ -424,7 +424,7 @@ class TestFromSiteNotesExample1:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -437,7 +437,7 @@ class TestFromSiteNotesExample1:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -694,3 +694,21 @@ class TestFromSiteNotesMiscTopLevel:
def test_photovoltaic_array(self, result: EpcPropertyData) -> None:
# renewables.photovoltaic_array: false
assert result.photovoltaic_array is False
class TestUnmeasurableWallThickness:
"""wall_thickness_mm=None in site notes → wall_thickness_measured=False in domain."""
@pytest.fixture
def result(self) -> EpcPropertyData:
survey = from_dict(
PasHubRdSapSiteNotes,
load("pashub_rdsap_site_notes_example_unmeasurable_wall.json"),
)
return EpcPropertyDataMapper.from_site_notes(survey)
def test_wall_thickness_measured_is_false(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_thickness_measured is False
def test_wall_thickness_mm_is_none(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_thickness_mm is None

View file

@ -0,0 +1,247 @@
from dataclasses import dataclass
from datetime import date
from typing import List, Optional
@dataclass
class SurveyorInfo:
surveyor_code: str
name: str
title: str
tel_number: str
survey_reference: str
my_reference: Optional[str] = None
@dataclass
class PropertyDetails:
rdsap_version: str
reference_number: str
lodgement_required: bool
regs_region: str
epc_language: str
postcode: str
region: str
street: str
town: str
tenure: str
transaction_type: str
inspection_date: date
process_date: date
epc_exists: bool
uprn: Optional[str] = None
house_name: Optional[str] = None
house_number: Optional[str] = None
locality: Optional[str] = None
county: Optional[str] = None
@dataclass
class FloorDimension:
name: str # e.g. "Lowest Floor"
area_m2: float
room_height_m: float
heat_loss_perimeter_m: float
party_wall_length_m: float
@dataclass
class BuildingPartDimensions:
dimension_type: str # e.g. "Internal"
floors: List[FloorDimension]
@dataclass
class WallDetails:
wall_type: str # e.g. "CA Cavity"
insulation: str # e.g. "F Filled Cavity"
thickness_unknown: bool
u_value_known: bool
party_wall_type: str # e.g. "U Unable to determine"
thickness_mm: Optional[int] = None
@dataclass
class RoofDetails:
roof_type: str # e.g. "PA Pitched (slates/tiles), access to loft"
insulation: str # e.g. "J Joists"
u_value_known: bool
insulation_thickness_mm: Optional[int] = None
@dataclass
class FloorDetails:
location: str # e.g. "G Ground floor"
floor_type: str # e.g. "N Suspended, not timber"
insulation: str # e.g. "A As built"
u_value_known: bool
default_u_value: Optional[float] = None
@dataclass
class Window:
width_m: float
height_m: float
area_m2: float
glazing_type: str
frame_factor: float
building_part: str
location: str
orientation: str
data_source: str
u_value: float
g_value: float
draught_proofed: bool
permanent_shutters: str # e.g. "None"
frame_type: Optional[str] = None
glazing_gap: Optional[str] = None
@dataclass
class VentilationAndCooling:
open_chimneys_count: int
open_flues_count: int
open_chimneys_closed_fire_count: int
solid_fuel_boiler_flues_count: int
other_heater_flues_count: int
blocked_chimneys_count: int
extract_fans_count: int
passive_vents_count: int
flueless_gas_fires_count: int
fixed_space_cooling: bool
draught_lobby: str # e.g. "Not present"
mechanical_ventilation: bool
pressure_test_method: str # e.g. "Not available"
@dataclass
class Lighting:
total_bulbs: int
led_cfl_count_known: bool
led_count: int
cfl_count: int
incandescent_count: int
low_energy_count: int = 0
@dataclass
class MainHeating:
heat_emitter: str # e.g. "Radiators"
fuel_type: str # e.g. "Mains gas"
flue_type: str # e.g. "Balanced"
fan_assisted_flue: bool
design_flow_temperature: str # e.g. "Unknown"
heating_controls_ees: str # e.g. "CBE"
heating_controls_sap: (
str # e.g. "SAP code 2106, Programmer, room thermostat and TRVs"
)
percentage_of_heat: int
pcdf_boiler_reference: Optional[str] = (
None # e.g. "17742 Potterton, Promax 33 Combi ErP, 88.30%"
)
heat_pump_age: Optional[str] = None
@dataclass
class Meters:
electricity_meter_type: str # e.g. "Single"
main_gas: bool
electricity_smart_meter: bool
gas_smart_meter: bool
@dataclass
class WaterHeating:
water_heating_code: str # e.g. "HWP"
water_heating_sap_code: int
water_heating_fuel_type: str
hot_water_cylinder_present: bool
@dataclass
class Shower:
shower_number: int
outlet_type: str
connected: str # e.g. "None"
@dataclass
class BathsAndShowers:
number_of_baths: int
number_of_baths_connected: int
showers: List[Shower]
@dataclass
class Renewables:
solar_water_heating: bool
wwhrs_present: bool
flue_gas_heat_recovery_present: bool
photovoltaic_panel: str # e.g. "None"
export_capable_meter: bool
wind_turbine_present: bool
wind_turbines_terrain_type: str
hydro_electricity_generated_kwh: float
@dataclass
class ElmhurstSiteNotes:
surveyor_info: SurveyorInfo
property_details: PropertyDetails
# Summary Information
current_sap_rating: int
potential_sap_rating: int
current_ei_rating: int
potential_ei_rating: int
co2_emissions_current_t: float
# Section 1.0
property_type: str # e.g. "B Bungalow"
attachment: str # e.g. "E End-Terrace"
# Section 2.0
number_of_storeys: int
habitable_rooms: int
heated_habitable_rooms: int
# Section 3.0
construction_age_band: str # e.g. "D 1950-1966"
# Section 4.0
dimensions: BuildingPartDimensions
# Section 5.0
has_conservatory: bool
# Sections 7.09.0
walls: WallDetails
roof: RoofDetails
floor: FloorDetails
# Section 10.0
door_count: int
insulated_door_count: int
# Section 11.0
windows: List[Window]
draught_proofing_percent: int
# Section 12.0
ventilation: VentilationAndCooling
# Section 13.0
lighting: Lighting
# Section 14.014.2
main_heating: MainHeating
meters: Meters
# Section 15.0
water_heating: WaterHeating
# Section 1x.0
baths_and_showers: BathsAndShowers
# Sections 16.022.0
renewables: Renewables

View file

@ -44,7 +44,7 @@ class MainBuildingConstruction:
walls_insulation_type: str
thermal_conductivity_of_wall_insulation: str
wall_u_value_known: bool
wall_thickness_mm: int
wall_thickness_mm: Optional[int]
party_wall_construction_type: str
filled_cavity_indicators: Optional[str] = None
@ -59,7 +59,7 @@ class ExtensionConstruction:
walls_insulation_type: str
thermal_conductivity_of_wall_insulation: str
wall_u_value_known: bool
wall_thickness_mm: int
wall_thickness_mm: Optional[int]
party_wall_construction_type: str
filled_cavity_indicators: Optional[str] = None

View file

@ -0,0 +1,190 @@
{
"inspection_metadata": {
"inspection_surveyor": "test",
"email_address": "test@test.com",
"report_reference": "49D422A9-0779-44DD-9665-464D35DFF1A8",
"created_on": "2026-03-31",
"date_of_inspection": "2026-03-31",
"property_address": "1, Test Street, Test Town, Test County, TE1 1ST"
},
"general": {
"epc_checked_before_assessment": true,
"epc_exists_at_point_of_assessment": false,
"inspection_date": "2026-03-31",
"transaction_type": "None of the Above",
"tenure": "Rented Social",
"property_type": "House",
"detachment_type": "Mid-terrace",
"number_of_storeys": 2,
"terrain_type": "Suburban",
"number_of_extensions": 0,
"electricity_smart_meter": true,
"electric_meter_type": "Single",
"dwelling_export_capable": true,
"mains_gas_available": true,
"gas_smart_meter": true,
"gas_meter_accessible": true,
"measurements_location": "Internal"
},
"building_construction": {
"main_building": {
"age_range": "I: 1996 - 2002",
"age_indicators": "local knowledge",
"walls_construction_type": "Cavity",
"cavity_construction_indicators": "stretcher bond",
"walls_insulation_type": "As built",
"thermal_conductivity_of_wall_insulation": "Unknown",
"wall_u_value_known": false,
"wall_thickness_mm": null,
"party_wall_construction_type": "Cavity Masonry, Unfilled"
},
"floor": {
"floor_type": "Ground Floor",
"floor_construction": "Suspended, not timber",
"floor_insulation_type": "As Built",
"floor_u_value_known": false
}
},
"building_measurements": {
"main_building": {
"floors": [
{
"name": "Floor 1",
"area_m2": 24.78,
"height_m": 2.37,
"heat_loss_perimeter_m": 14.21,
"pwl_m": 6.15
},
{
"name": "Floor 0",
"area_m2": 24.78,
"height_m": 2.35,
"heat_loss_perimeter_m": 14.21,
"pwl_m": 6.15
}
]
}
},
"roof_space": {
"main_building": {
"construction_type": "Pitched roof (Slates or tiles), Access to loft",
"insulation_at": "Joists",
"roof_u_value_known": false,
"insulation_thickness_mm": 100,
"cavity_wall_construction_indicators": "No indicator of construction visible",
"rooms_in_roof": false
}
},
"windows": [
{
"id": 1,
"location": "Main Building",
"wall_type": "External wall",
"glazing_type": "Double glazing, Unknown install date",
"window_type": "Window",
"frame_type": "Wooden or PVC",
"glazing_gap": "16 mm or more",
"draught_proofed": true,
"permanent_shutters": false,
"height_m": 1.36,
"width_m": 1.0,
"orientation": "South East"
}
],
"heating_and_hot_water": {
"main_heating": {
"selection_method": "PCDF Search",
"system_type": "Boiler with radiators or underfloor heating",
"product_id": 18400,
"manufacturer": "Vaillant",
"model": "ecoFIT sustain 415",
"orig_manufacturer": "Vaillant",
"fuel": "Mains gas",
"summer_efficiency": 0,
"type": "Regular",
"condensing": true,
"year": "2018 - current",
"mount": "Wall",
"open_flue": "Room-sealed",
"fan_assist": true,
"status": "Normal status for an actual product",
"central_heating_pump_age": "Unknown",
"controls": "Programmer, room thermostat and TRVs",
"flue_gas_heat_recovery_system": false,
"weather_compensator": false,
"emitter": "Radiators",
"emitter_temperature": "Unknown"
},
"secondary_heating": {
"secondary_fuel": "No Secondary Heating"
},
"water_heating": {
"type": "Regular",
"system": "From main heating 1",
"cylinder_size": "Normal (90-130 litres)",
"cylinder_measured_heat_loss": "Not known",
"insulation_type": "Factory fitted",
"insulation_thickness_mm": 12,
"has_thermostat": true
}
},
"ventilation": {
"ventilation_type": "Natural",
"has_fixed_air_conditioning": false,
"number_of_open_flues": 0,
"number_of_closed_flues": 0,
"number_of_boiler_flues": 0,
"number_of_other_flues": 0,
"number_of_extract_fans": 2,
"number_of_passive_vents": 0,
"number_of_flueless_gas_fires": 0,
"pressure_test": "No test",
"draught_lobby": false
},
"conservatories": {
"has_conservatory": false
},
"renewables": {
"wind_turbines": false,
"solar_hot_water": false,
"photovoltaic_array": false,
"number_of_pv_batteries": 0,
"hydro": false
},
"room_count_elements": {
"number_of_habitable_rooms": 2,
"any_unheated_rooms": true,
"number_of_heated_rooms": 0,
"number_of_external_doors": 2,
"number_of_insulated_external_doors": 0,
"number_of_draughtproofed_external_doors": 2,
"number_of_open_chimneys": 0,
"number_of_blocked_chimneys": 0,
"number_of_fixed_incandescent_bulbs": 0,
"exact_led_cfl_count_known": true,
"number_of_fixed_led_bulbs": 5,
"number_of_fixed_cfl_bulbs": 4,
"waste_water_heat_recovery": "None"
},
"water_use": {
"number_of_baths": 1,
"number_of_special_features": 0,
"showers": [
{
"id": 1,
"outlet_type": "Non-Electric Shower"
}
]
},
"customer_response": {
"customer_present": true,
"willing_to_answer_satisfaction_survey": false
},
"addendum": {
"addendum": "None",
"related_party_disclosure": "No related party",
"hard_to_treat_cavity_access_issues": false,
"hard_to_treat_cavity_high_exposure": false,
"hard_to_treat_cavity_narrow_cavities": false
}
}

View file

@ -255,13 +255,13 @@ class HubspotClient:
"dampmould_growth",
"damp_mould_and_repairs_comments",
"pre_sap_score_dropdown",
"coordinator",
"coordinator_user",
"mtp_completion_date",
"mtp_re_model_completion_date",
"ioe_v3_completion_date",
"proposed_measures_dropdown",
"approved_package",
"designer",
"designer_user",
"design_completion_date",
"actual_measures_installed",
"installer",
@ -283,6 +283,16 @@ class HubspotClient:
"ei_score__potential_",
"epc_sap_score",
"epc_sap_score__potential_",
"survey_type",
"measures_for_pibi_ordered",
"pibi_order_date",
"pibi_completed_date",
"property_halted_date",
"property_halted_reason",
"technical_approved_measures_for_install",
"sent_to_iw_for_pricing",
"osmosis_survey_required",
"osmosis_survey_date",
],
)
)
@ -290,6 +300,20 @@ class HubspotClient:
deal_info: dict[str, str] = cast(dict[str, str], deal.properties) # type: ignore[reportUnknownMemberType]
return deal_info
def get_owner_info(self, owner_id: str) -> Optional[dict[str, Optional[str]]]:
try:
owner = self._call_with_retry(
lambda: self.client.crm.owners.owners_api.get_by_id(owner_id) # type: ignore[reportUnknownMemberType]
)
return {
"first_name": owner.first_name, # type: ignore[reportUnknownMemberType]
"last_name": owner.last_name, # type: ignore[reportUnknownMemberType]
"email": getattr(owner, "email", None),
}
except Exception:
self.logger.warning(f"Failed to fetch HubSpot owner {owner_id}")
return None
def get_deal_and_company_and_listing(
self, deal_id: str
) -> tuple[dict[str, str], Optional[str], Optional[dict[str, str]]]:

View file

@ -1,18 +1,18 @@
import os
from sqlmodel import select
from sqlmodel import select, Session
from datetime import datetime, timezone
from typing import Dict, Optional
from backend.app.db.models.hubspot_deal_data import HubspotDealData
from backend.app.db.models.hubspot_user import HubspotUser
from etl.hubspot.company_data import CompanyData
from etl.hubspot.hubspotClient import HubspotClient
from etl.hubspot.s3_uploader import S3Uploader
from backend.app.db.connection import db_read_session
from backend.app.db.models.organisation import Organisation
from etl.hubspot.utils import parse_hs_date
from etl.hubspot.utils import parse_hs_bool, parse_hs_date
from utils.logger import setup_logger
logger = setup_logger()
@ -95,6 +95,9 @@ class HubspotDataToDb:
with db_read_session() as session:
deal_id = deal_data.get("hs_object_id")
self._sync_owner_to_db(deal_data.get("coordinator_user"), hubspot_client, session)
self._sync_owner_to_db(deal_data.get("designer_user"), hubspot_client, session)
statement = select(HubspotDealData).where(
HubspotDealData.deal_id == deal_id
)
@ -125,6 +128,38 @@ class HubspotDataToDb:
session.refresh(new_record)
return new_record
def _sync_owner_to_db(
self,
owner_id: Optional[str],
hubspot_client: HubspotClient,
session: Session,
) -> None:
if not owner_id:
return
owner_info = hubspot_client.get_owner_info(owner_id)
if owner_info is None:
return
now = datetime.now(timezone.utc)
existing: Optional[HubspotUser] = session.get(HubspotUser, owner_id)
if existing:
existing.first_name = owner_info["first_name"]
existing.last_name = owner_info["last_name"]
existing.email = owner_info["email"]
existing.updated_at = now
session.add(existing)
else:
session.add(
HubspotUser(
hubspot_owner_id=owner_id,
first_name=owner_info["first_name"],
last_name=owner_info["last_name"],
email=owner_info["email"],
updated_at=now,
)
)
def _update_existing_deal(
self,
existing: HubspotDealData,
@ -170,7 +205,7 @@ class HubspotDataToDb:
"ei_score__potential_": deal_data.get("ei_score__potential_"),
"epc_sap_score": deal_data.get("epc_sap_score"),
"epc_sap_score__potential_": deal_data.get("epc_sap_score__potential_"),
"coordinator": deal_data.get("coordinator"),
"coordinator": deal_data.get("coordinator_user"),
"mtp_completion_date": parse_hs_date(deal_data.get("mtp_completion_date")),
"mtp_re_model_completion_date": parse_hs_date(
deal_data.get("mtp_re_model_completion_date")
@ -180,7 +215,7 @@ class HubspotDataToDb:
),
"proposed_measures": deal_data.get("proposed_measures_dropdown"),
"approved_package": deal_data.get("approved_package"),
"designer": deal_data.get("designer"),
"designer": deal_data.get("designer_user"),
"design_completion_date": parse_hs_date(
deal_data.get("design_completion_date")
),
@ -202,6 +237,24 @@ class HubspotDataToDb:
"confirmed_survey_time": deal_data.get("confirmed_survey_time"),
"surveyed_date": parse_hs_date(deal_data.get("surveyed_date")),
"design_type": deal_data.get("design_type"),
"survey_type": deal_data.get("survey_type"),
"measures_for_pibi_ordered": deal_data.get("measures_for_pibi_ordered"),
"pibi_order_date": parse_hs_date(deal_data.get("pibi_order_date")),
"pibi_completed_date": parse_hs_date(deal_data.get("pibi_completed_date")),
"property_halted_date": parse_hs_date(
deal_data.get("property_halted_date")
),
"property_halted_reason": deal_data.get("property_halted_reason"),
"technical_approved_measures_for_install": deal_data.get(
"technical_approved_measures_for_install"
),
"sent_to_installer_for_pricing": parse_hs_date(
deal_data.get("sent_to_iw_for_pricing")
),
"domna_survey_required": parse_hs_bool(
deal_data.get("osmosis_survey_required")
),
"domna_survey_date": parse_hs_date(deal_data.get("osmosis_survey_date")),
}.items():
setattr(existing, attr, value or getattr(existing, attr))
@ -249,7 +302,7 @@ class HubspotDataToDb:
ei_score__potential_=deal_data.get("ei_score__potential_"),
epc_sap_score=deal_data.get("epc_sap_score"),
epc_sap_score__potential_=deal_data.get("epc_sap_score__potential_"),
coordinator=deal_data.get("coordinator"),
coordinator=deal_data.get("coordinator_user"),
mtp_completion_date=parse_hs_date(deal_data.get("mtp_completion_date")),
mtp_re_model_completion_date=parse_hs_date(
deal_data.get("mtp_re_model_completion_date")
@ -259,7 +312,7 @@ class HubspotDataToDb:
),
proposed_measures=deal_data.get("proposed_measures_dropdown"),
approved_package=deal_data.get("approved_package"),
designer=deal_data.get("designer"),
designer=deal_data.get("designer_user"),
design_completion_date=parse_hs_date(
deal_data.get("design_completion_date")
),
@ -279,6 +332,22 @@ class HubspotDataToDb:
confirmed_survey_time=deal_data.get("confirmed_survey_time"),
surveyed_date=parse_hs_date(deal_data.get("surveyed_date")),
design_type=deal_data.get("design_type"),
survey_type=deal_data.get("survey_type"),
measures_for_pibi_ordered=deal_data.get("measures_for_pibi_ordered"),
pibi_order_date=parse_hs_date(deal_data.get("pibi_order_date")),
pibi_completed_date=parse_hs_date(deal_data.get("pibi_completed_date")),
property_halted_date=parse_hs_date(deal_data.get("property_halted_date")),
property_halted_reason=deal_data.get("property_halted_reason"),
technical_approved_measures_for_install=deal_data.get(
"technical_approved_measures_for_install"
),
sent_to_installer_for_pricing=parse_hs_date(
deal_data.get("sent_to_iw_for_pricing")
),
domna_survey_required=parse_hs_bool(
deal_data.get("osmosis_survey_required")
),
domna_survey_date=parse_hs_date(deal_data.get("osmosis_survey_date")),
)
def _handle_existing_photo_upload(

View file

@ -1,7 +1,7 @@
from typing import Dict, List, Optional
from backend.app.db.models.hubspot_deal_data import HubspotDealData
from etl.hubspot.utils import parse_hs_date
from etl.hubspot.utils import parse_hs_bool, parse_hs_date
class HubspotDealDiffer:
@ -71,10 +71,10 @@ class HubspotDealDiffer:
"ei_score__potential_": "ei_score__potential_",
"epc_sap_score": "epc_sap_score",
"epc_sap_score__potential_": "epc_sap_score__potential_",
"coordinator": "coordinator",
"coordinator_user": "coordinator",
"proposed_measures_dropdown": "proposed_measures",
"approved_package": "approved_package",
"designer": "designer",
"designer_user": "designer",
"actual_measures_installed": "actual_measures_installed",
"installer": "installer",
"installer_handover": "installer_handover",
@ -82,6 +82,10 @@ class HubspotDealDiffer:
"design_type": "design_type",
"surveyor": "surveyor",
"confirmed_survey_time": "confirmed_survey_time",
"survey_type": "survey_type",
"measures_for_pibi_ordered": "measures_for_pibi_ordered",
"property_halted_reason": "property_halted_reason",
"technical_approved_measures_for_install": "technical_approved_measures_for_install",
}
for hs_field, db_field in FIELD_MAP.items():
@ -102,6 +106,11 @@ class HubspotDealDiffer:
("expected_commencement_date", "expected_commencement_date"),
("confirmed_survey_date", "confirmed_survey_date"),
("surveyed_date", "surveyed_date"),
("pibi_order_date", "pibi_order_date"),
("pibi_completed_date", "pibi_completed_date"),
("property_halted_date", "property_halted_date"),
("sent_to_iw_for_pricing", "sent_to_installer_for_pricing"),
("osmosis_survey_date", "domna_survey_date"),
]
for hs_field, db_field in date_fields:
@ -111,6 +120,18 @@ class HubspotDealDiffer:
if old_value != new_value:
return True
# --- Boolean fields ---
bool_fields = [
("osmosis_survey_required", "domna_survey_required"),
]
for hs_field, db_field in bool_fields:
old_value = getattr(old_deal, db_field)
new_value = parse_hs_bool(new_deal.get(hs_field))
if old_value != new_value:
return True
# --- Time field ---
if old_deal.confirmed_survey_time != new_deal.get("confirmed_survey_time"):
return True

View file

@ -14,3 +14,11 @@ def parse_hs_date(value: Optional[str]) -> Optional[datetime]:
return dt.astimezone(timezone.utc)
except ValueError:
return None
def parse_hs_bool(value: Optional[str]) -> Optional[bool]:
if value is None or value == "":
return None
if isinstance(value, bool):
return value
return str(value).strip().lower() == "true"

View file

@ -3,6 +3,6 @@ pythonpath = .
log_cli = true
log_cli_level = INFO
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/documents_parser/tests backend/magic_plan/tests
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/pashub_fetcher/tests backend/documents_parser/tests backend/magic_plan/tests
markers =
integration: mark a test as an integration test

View file

@ -100,7 +100,7 @@ class LightingRecommendations:
:return:
"""
if "sap05" in self.property.lighting["clean_description"].lower():
if "sap05" in self.property.lighting.get("clean_description", "").lower():
return
if self.property.lighting["low_energy_proportion"] >= 1:

View file

@ -7,4 +7,6 @@ psycopg[binary]
pytest-postgresql
hubspot-api-client
fuzzywuzzy
pymupdf
pymupdf
playwright==1.58.0
msal

View file

@ -9,4 +9,5 @@ deps =
-rbackend/engine/requirements.txt
-rbackend/app/requirements/requirements.txt
-rtest.requirements.txt
commands_pre = playwright install --with-deps chromium
commands = pytest {posargs}