merged from main

This commit is contained in:
Jun-te Kim 2026-05-11 12:30:29 +00:00
commit 6504785e7c
125 changed files with 418181 additions and 778 deletions

View file

@ -18,15 +18,6 @@ RUN curl -fsSL https://github.com/neovim/neovim/releases/latest/download/nvim-li
| tar -xz -C /opt \
&& ln -s /opt/nvim-linux-x86_64/bin/nvim /usr/local/bin/nvim
# # 2) Build and install libpostal from source
# RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
# && cd /tmp/libpostal \
# && ./bootstrap.sh \
# && ./configure --datadir=/usr/local/share/libpostal \
# && make -j"$(nproc)" \
# && make install \
# && ldconfig \
# && rm -rf /tmp/libpostal
# 3) Create the user and grant sudo privileges
RUN groupadd -g ${USER_GID} ${USER} \
@ -34,10 +25,7 @@ RUN groupadd -g ${USER_GID} ${USER} \
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
&& chmod 0440 /etc/sudoers.d/${USER}
# # 4) Python deps - if you want to run assest list
# ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
# ADD asset_list/requirements.txt requirements.txt
# RUN pip install -r requirements.txt
#
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
@ -75,26 +63,27 @@ RUN wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key
RUN apt update
RUN apt install -y postgresql-14
# Install Node.js + backlog.md
# Install Node.js
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \
&& npm install -g backlog.md \
&& rm -rf /var/lib/apt/lists/*
# GitHub CLI — used by the postCreate skill installer to authenticate against
# private Hestia-Homes repos via the host's mounted ~/.config/gh.
RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
| dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
&& chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
> /etc/apt/sources.list.d/github-cli.list \
&& apt update && apt install -y gh \
&& rm -rf /var/lib/apt/lists/*
USER ${USER}
# Bootstrap LazyVim starter config
RUN git clone https://github.com/LazyVim/starter /home/${USER}/.config/nvim \
&& rm -rf /home/${USER}/.config/nvim/.git
# Install Claude + plugins + skills
RUN curl -fsSL https://claude.ai/install.sh | bash \
&& export PATH="/home/${USER}/.local/bin:${PATH}" \
&& claude plugin marketplace add JuliusBrussee/caveman \
&& claude plugin install caveman@caveman \
&& npx skills@latest add --global --yes mattpocock/skills/grill-me \
&& npx skills@latest add --global --yes mattpocock/skills/to-prd \
&& npx skills@latest add --global --yes mattpocock/skills/ubiquitous-language \
&& npx skills@latest add --global --yes mattpocock/skills/tdd \
&& npx skills@latest add --global --yes mattpocock/skills/improve-codebase-architecture
# Install Claude Code CLI (skills are installed via postCreate from Hestia-Homes/agentic-toolkit)
RUN curl -fsSL https://claude.ai/install.sh | bash
ENV PATH="/home/vscode/.local/bin:${PATH}"
USER root

View file

@ -4,7 +4,8 @@
"service": "model-backend",
"remoteUser": "vscode",
"workspaceFolder": "/workspaces/model",
"initializeCommand": "docker network create shared-dev 2>/dev/null || true",
"initializeCommand": "docker network create shared-dev 2>/dev/null || true; test -d \"$HOME/.config/gh\" || test -n \"$GITHUB_TOKEN\" || { echo >&2 'error: no GitHub auth found. Run `gh auth login && gh auth setup-git` on the host, or export GITHUB_TOKEN, then retry.'; exit 1; }",
"postCreateCommand": "gh repo clone Hestia-Homes/agentic-toolkit /tmp/agentic-toolkit -- --branch 0.0.5 --depth 1 && bash /tmp/agentic-toolkit/setup.sh",
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
"mounts": [
"source=${localEnv:HOME},target=/workspaces/home,type=bind",
@ -41,12 +42,8 @@
"containerEnv": {
"PYTHONFLAGS": "-Xfrozen_modules=off"
},
"forwardPorts": [6421, 8000],
"forwardPorts": [8000],
"portsAttributes": {
"6421": {
"label": "Backlog.md",
"onAutoForward": "notify"
},
"8000": {
"label": "FastAPI",
"onAutoForward": "notify"

View file

@ -14,8 +14,13 @@ services:
volumes:
- ../../:/workspaces/model
- ~/.gitconfig:/home/vscode/.gitconfig:ro
# GitHub CLI auth from host (created by `gh auth login`). Used by the
# postCreate skill installer to clone private Hestia-Homes repos.
- ~/.config/gh:/home/vscode/.config/gh:ro
environment:
- SSH_AUTH_SOCK=${SSH_AUTH_SOCK:-}
# Fallback HTTPS auth if ~/.config/gh isn't present on the host.
- GITHUB_TOKEN=${GITHUB_TOKEN:-}
networks:
- backend-net
- shared-dev

View file

@ -31,17 +31,19 @@ from recommendations.recommendation_utils import (
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
from dotenv import load_dotenv
# from dotenv import load_dotenv
logger = setup_logger()
load_dotenv(dotenv_path="../backend/.env")
# load_dotenv(dotenv_path="../backend/.env")
# OpenAI API Key (set this in your environment variables for security)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
# OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
class DataRemapper:
def __init__(self, standard_values, standard_map=None, max_tokens=1000):
def __init__(
self, standard_values, standard_map=None, max_tokens=1000, api_key=None
):
"""
Initialize the remapper with standard values and a predefined mapping.
@ -75,7 +77,8 @@ class DataRemapper:
"gpt-3.5-turbo": {"input": 0.0015 / 1000, "output": 0.002 / 1000},
}
self.openai_client = OpenAI(api_key=OPENAI_API_KEY)
print(f"DATA REMAPPER api key is {api_key}")
self.openai_client = OpenAI(api_key=api_key)
@staticmethod
def clean_string(text):
@ -136,12 +139,20 @@ class DataRemapper:
raise ValueError("Input tokens exceed the maximum limit.")
logger.info("Calling OpenAI API for standardization...")
response = self.openai_client.chat.completions.create(
model=self.ai_model,
messages=[{"role": "user", "content": prompt}],
max_tokens=self.max_tokens,
temperature=0.1,
)
try:
response = self.openai_client.chat.completions.create(
model=self.ai_model,
messages=[{"role": "user", "content": prompt}],
max_tokens=self.max_tokens,
temperature=0.1,
)
except Exception as e:
print(f"[debug] OpenAI call failed. type={type(e).__name__}")
print(f"[debug] status={getattr(e, 'status_code', None)}")
print(f"[debug] body={getattr(e, 'response', None) and e.response.text}")
print(f"[debug] model={self.ai_model}")
raise
output_text = response.choices[0].message.content.strip()
output_tokens = self.count_tokens(output_text) # Count output tokens
@ -504,6 +515,7 @@ class AssetList:
landlord_block_reference=None,
phase=False,
header=0,
openai_api_key=None,
):
self.local_filepath = local_filepath
self.sheet_name = sheet_name
@ -529,6 +541,7 @@ class AssetList:
self.ecosurv = None
self.ecosurv_no_match = pd.DataFrame()
self.geographical_areas = pd.DataFrame()
self.openai_api_key = openai_api_key
# When this is True, we intend to break the programme into multiple phases. We may need to review
# how this is structured in the future, as depending on how we get future data, we may need to
@ -1107,6 +1120,7 @@ class AssetList:
remapper = DataRemapper(
standard_values=config["standard_values"],
standard_map=config["standard_map"],
api_key=self.openai_api_key,
)
remap_dictionary = remapper.standardize_list(
values_to_remap=values_to_remap.tolist()
@ -1296,8 +1310,8 @@ class AssetList:
self.standardised_asset_list[
self.ATTRIBUTE_HAS_SOLAR
] = self.standardised_asset_list[
self.FIND_EPC_DATA_NAMES["Solar photovoltaics"]
] | ~self.standardised_asset_list[
self.FIND_EPC_DATA_NAMES["Solar photovoltaics"]
] | ~self.standardised_asset_list[
self.EPC_API_DATA_NAMES["photo-supply"]
].isin(
["0.0", 0, None, "", np.nan]
@ -1315,7 +1329,7 @@ class AssetList:
property_type=(
str(x[self.STANDARD_PROPERTY_TYPE]).title()
if str(x[self.STANDARD_PROPERTY_TYPE]).title()
in accepted_epc_property_types
in accepted_epc_property_types
else (
x[self.EPC_API_DATA_NAMES["property-type"]]
if not pd.isnull(
@ -1373,9 +1387,9 @@ class AssetList:
self.standardised_asset_list.apply(
lambda x: estimate_perimeter(
floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]]
/ x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
/ x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]]
/ x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
/ x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
),
axis=1,
)
@ -1460,7 +1474,7 @@ class AssetList:
year_lower_bound = (
2007
if x[self.EPC_API_DATA_NAMES["construction-age-band"]]
== "England and Wales: 2007 onwards"
== "England and Wales: 2007 onwards"
else 2012
)
@ -1515,7 +1529,7 @@ class AssetList:
age_band_matches = (
"EPC Age Band Matches Year Built"
if x[self.STANDARD_YEAR_BUILT]
== int(x[self.EPC_API_DATA_NAMES["construction-age-band"]])
== int(x[self.EPC_API_DATA_NAMES["construction-age-band"]])
else "EPC Age Band is different from Year Built"
)
@ -1545,7 +1559,7 @@ class AssetList:
age_band_matches = (
"EPC Age Band Matches Year Built"
if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date))
and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date))
and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date))
else (
"EPC Age Band is older than Year Built"
if x[self.STANDARD_YEAR_BUILT] > float(upper_date)
@ -1717,22 +1731,22 @@ class AssetList:
if self.non_intrusives_present:
if self.new_format_non_insturives_present_v2:
non_intrusives_wall_filter = (
self.standardised_asset_list["non-intrusives: Construction"]
== "CAVITY"
) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
self.standardised_asset_list["non-intrusives: Construction"]
== "CAVITY"
) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
["EMPTY", "PARTIAL", "EMPTY CAVITY"]
)
else:
non_intrusives_wall_filter = (
self.standardised_asset_list["non-intrusives: Construction"]
== "CAVITY"
) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
self.standardised_asset_list["non-intrusives: Construction"]
== "CAVITY"
) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
["EMPTY", "PARTIAL"]
)
elif self.old_format_non_intrusives_present:
non_intrusives_wall_filter = self.standardised_asset_list[
"non-intrusives: WFT Findings"
].str.lower().str.strip().isin(
"non-intrusives: WFT Findings"
].str.lower().str.strip().isin(
[
"empty cavity",
"partial fill",
@ -1742,18 +1756,18 @@ class AssetList:
"empty cav",
]
) | (
(
self.standardised_asset_list["non-intrusives: WFT Findings"]
.str.lower()
.str.strip()
.str.contains("empty cavity|partial fill")
& ~self.standardised_asset_list["non-intrusives: WFT Findings"]
.astype(str)
.str.lower()
.str.strip()
.str.contains("major access issues")
)
)
(
self.standardised_asset_list["non-intrusives: WFT Findings"]
.str.lower()
.str.strip()
.str.contains("empty cavity|partial fill")
& ~self.standardised_asset_list["non-intrusives: WFT Findings"]
.astype(str)
.str.lower()
.str.strip()
.str.contains("major access issues")
)
)
else:
# We set the filter to False, as we have no non-intrusives
non_intrusives_wall_filter = False
@ -1765,12 +1779,12 @@ class AssetList:
)
else:
year_built_filter = (
self.standardised_asset_list[self.STANDARD_YEAR_BUILT]
<= self.EMPTY_CAVITY_YEAR_THRESHOLD
) | (
self.standardised_asset_list["epc_year_upper_bound"]
<= self.EMPTY_CAVITY_YEAR_THRESHOLD
)
self.standardised_asset_list[self.STANDARD_YEAR_BUILT]
<= self.EMPTY_CAVITY_YEAR_THRESHOLD
) | (
self.standardised_asset_list["epc_year_upper_bound"]
<= self.EMPTY_CAVITY_YEAR_THRESHOLD
)
# Criteria:
# The property isn't a bedsit
@ -1811,8 +1825,8 @@ class AssetList:
] = (
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]
& ~self.standardised_asset_list[
"non_intrusive_indicates_empty_cavity_has_solar"
]
"non_intrusive_indicates_empty_cavity_has_solar"
]
& (
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(
["bedsit"]
@ -1888,8 +1902,8 @@ class AssetList:
.str.lower()
.isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS)
| self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(
["uninsulated cavity"]
)
["uninsulated cavity"]
)
)
######################################################
@ -1926,8 +1940,8 @@ class AssetList:
extraction_wall_filter = (
extraction_wall_filter
& ~self.standardised_asset_list[
"non-intrusives: Eligibility (Red/Yellow/Green)"
].isin(["RED"])
"non-intrusives: Eligibility (Red/Yellow/Green)"
].isin(["RED"])
)
self.standardised_asset_list[
@ -2023,26 +2037,26 @@ class AssetList:
self.standardised_asset_list[
"solar_epc_data_indicates_correct_heating_system"
] = (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheat-description"]
]
.str.lower()
.str.contains(
"air source heat pump|ground source heat pump|boiler and radiators, electric"
)
) | (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheat-description"]
]
.str.lower()
.str.contains("electric storage heaters")
& (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheatcont-description"]
]
== "Controls for high heat retention storage heaters"
)
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheat-description"]
]
.str.lower()
.str.contains(
"air source heat pump|ground source heat pump|boiler and radiators, electric"
)
) | (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheat-description"]
]
.str.lower()
.str.contains("electric storage heaters")
& (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheatcont-description"]
]
== "Controls for high heat retention storage heaters"
)
)
# If the landlord has given us the heating system, we default to that on heating upgrades. Because of the
# poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the
@ -2050,25 +2064,25 @@ class AssetList:
self.standardised_asset_list[
"solar_epc_data_indicates_requires_heating_upgrade"
] = (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheat-description"]
]
.str.lower()
.str.contains("electric storage heaters|room heaters")
& (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheat-description"]
self.EPC_API_DATA_NAMES["mainheatcont-description"]
]
.str.lower()
.str.contains("electric storage heaters|room heaters")
& (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheatcont-description"]
]
!= "Controls for high heat retention storage heaters"
)
) & (
~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
["district heating", "communal heating", "communal gas boiler"]
)
& ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM]
.astype(str)
.str.contains("gas ")
!= "Controls for high heat retention storage heaters"
)
) & (
~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
["district heating", "communal heating", "communal gas boiler"]
)
& ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM]
.astype(str)
.str.contains("gas ")
)
# Basic check - both of the previous two shouldn't be true simultaneously
if (
@ -2148,8 +2162,8 @@ class AssetList:
self.standardised_asset_list[
"solar_non_intrusives_walls_insulated"
] = self.standardised_asset_list[
"non-intrusives: WFT Findings"
].str.lower().str.strip().isin(
"non-intrusives: WFT Findings"
].str.lower().str.strip().isin(
[
"retro drilled",
"retro filled",
@ -2158,8 +2172,8 @@ class AssetList:
"retro drilled and filled",
]
) | self.standardised_asset_list[
"non-intrusives: WFT Findings"
].str.lower().str.strip().str.contains(
"non-intrusives: WFT Findings"
].str.lower().str.strip().str.contains(
"retro drilled"
)
else:
@ -2176,19 +2190,14 @@ class AssetList:
)
self.standardised_asset_list["solar_epc_walls_insulated"] = (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES[
"walls-description"]]
.str.lower()
.str.contains("|".join(
self.EPC_INSULATED_WALLS_SUBSTRINGS))
) | (
self.standardised_asset_list[
"walls_u_value"].apply(
lambda x: x <= 0.7 if not pd.isnull(
x) else False
)
)
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]]
.str.lower()
.str.contains("|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS))
) | (
self.standardised_asset_list["walls_u_value"].apply(
lambda x: x <= 0.7 if not pd.isnull(x) else False
)
)
roof_data = []
for desc in self.standardised_asset_list[
@ -2230,20 +2239,20 @@ class AssetList:
self.standardised_asset_list[
"solar_epc_loft_needs_topup"
] = self.standardised_asset_list[
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
].apply(
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
].apply(
lambda x: int(x) < 200 if str(x).isdigit() else False
) | (
(
self.standardised_asset_list["is_loft"]
| self.standardised_asset_list["is_pitched"]
)
& (
self.standardised_asset_list[
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
].isin(["below average", "none"])
)
(
self.standardised_asset_list["is_loft"]
| self.standardised_asset_list["is_pitched"]
)
& (
self.standardised_asset_list[
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
].isin(["below average", "none"])
)
)
self.standardised_asset_list["epc_has_floor_recommendation"] = (
self.standardised_asset_list["epc_has_floor_recommendation"].fillna(False)
@ -2252,16 +2261,15 @@ class AssetList:
# Check if the boiler is electric
# We check if it contains both the terms boiler & electric
self.standardised_asset_list["has_electric_boiler"] = (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheat-description"]
]
.str.lower()
.isin(["boiler and radiators, electric"])
) | (
self.standardised_asset_list[
self.STANDARD_HEATING_SYSTEM]
== "electric boiler"
)
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["mainheat-description"]
]
.str.lower()
.isin(["boiler and radiators, electric"])
) | (
self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM]
== "electric boiler"
)
####################################
# Check solar eligibility
@ -2399,11 +2407,11 @@ class AssetList:
empty_cavity_map = {
"non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE
+ ": ",
+ ": ",
"non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property "
"already has solar: ",
"already has solar: ",
"non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, "
f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ",
f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ",
}
for variable, description in empty_cavity_map.items():
self.standardised_asset_list["cavity_reason"] = np.where(
@ -2419,8 +2427,8 @@ class AssetList:
(
self.standardised_asset_list["epc_indicates_empty_cavity"]
& ~self.standardised_asset_list[
"non_intrusive_indicates_empty_cavity"
]
"non_intrusive_indicates_empty_cavity"
]
& (
self.standardised_asset_list["non-intrusives: WFT Findings"]
.str.lower()
@ -2445,8 +2453,8 @@ class AssetList:
(
self.standardised_asset_list["epc_indicates_empty_cavity"]
& ~self.standardised_asset_list[
"non_intrusive_indicates_empty_cavity"
]
"non_intrusive_indicates_empty_cavity"
]
& self.standardised_asset_list[
"non_intrusive_indicates_cavity_extraction"
]
@ -2461,8 +2469,8 @@ class AssetList:
(
self.standardised_asset_list["epc_indicates_empty_cavity"]
& ~self.standardised_asset_list[
"non_intrusive_indicates_empty_cavity"
]
"non_intrusive_indicates_empty_cavity"
]
& (
self.standardised_asset_list["non-intrusives: Insulated"]
== "RETRO DRILLED"
@ -2478,8 +2486,8 @@ class AssetList:
(
self.standardised_asset_list["epc_indicates_empty_cavity"]
& ~self.standardised_asset_list[
"non_intrusive_indicates_empty_cavity"
]
"non_intrusive_indicates_empty_cavity"
]
& (
self.standardised_asset_list["non-intrusives: Insulated"]
== "FILLED AT BUILD"
@ -2495,8 +2503,8 @@ class AssetList:
(
self.standardised_asset_list["epc_indicates_empty_cavity"]
& ~self.standardised_asset_list[
"non_intrusive_indicates_empty_cavity"
]
"non_intrusive_indicates_empty_cavity"
]
& pd.isnull(self.standardised_asset_list["cavity_reason"])
),
f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"],
@ -2640,7 +2648,7 @@ class AssetList:
identified_work = self.standardised_asset_list[
~pd.isnull(self.standardised_asset_list["cavity_reason"])
| ~pd.isnull(self.standardised_asset_list["solar_reason"])
][self.DOMNA_PROPERTY_ID].values
][self.DOMNA_PROPERTY_ID].values
if self.DOMNA_PROPERTY_ID in self.outcomes.columns:
self.outcomes_for_output = self.outcomes[
@ -2675,12 +2683,12 @@ class AssetList:
blocks_of_flats = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
== "block of flats"
]
]
non_blocks_of_flats = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
!= "block of flats"
]
]
# Produce some aggregate figures
self.work_type_figures = {
@ -2723,7 +2731,7 @@ class AssetList:
blocks = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
== "block of flats"
].copy()
].copy()
if blocks.empty:
return
@ -2860,7 +2868,7 @@ class AssetList:
self.standardised_asset_list = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
!= "block of flats"
]
]
self.standardised_asset_list = pd.concat(
[self.standardised_asset_list, expanded_blocks], ignore_index=True
@ -2940,7 +2948,7 @@ class AssetList:
# find any block refs with more than 50% emptires
viable_empty_blocks = self.block_analysis_df[
self.block_analysis_df["Percentage of Empties"] >= 0.50
]
]
if not viable_empty_blocks.empty:
project_code_lookup = viable_empty_blocks[["Block Reference"]].copy()
@ -3179,7 +3187,7 @@ class AssetList:
contact_details = pd.read_excel(local_filepath, sheet_name=sheet_name)[
[self.contact_detail_fields["landlord_property_id"]] + details_colnames
]
]
contact_details = contact_details[
~pd.isnull(
contact_details[self.contact_detail_fields["landlord_property_id"]]
@ -3572,13 +3580,10 @@ class AssetList:
"Non-Intrusives: Date Checked <LISTING non_intrusives__date_checked>": date_of_inspections,
"Non-Intrusives: Wall Type <LISTING non_intrusives__wall_type>": non_intrusives_construction,
"Non-intrusives: Insulation <LISTING non_intrusives__insulation>": non_intrusives_insulated,
"Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>":
non_intrusives_insulation_material,
"Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>":
non_intrusives_ciga_check_required,
"Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>": non_intrusives_insulation_material,
"Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>": non_intrusives_ciga_check_required,
"Non-Intrusives: PV Access Issues <LISTING non_intrusives__access_issues>": non_intrusives_pv_access,
"Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>":
non_intrusives_roof_orientation,
"Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>": non_intrusives_roof_orientation,
"Non-Intrusives: Surveyor Notes <LISTING non_intrusives__surveyor_notes>": non_intrusives_surveyor_notes,
"Non-Intrusives: Surveyor Name <LISTING non_intrusives__surveyor_name>": non_intrusives_surveyor_name,
"CIGA: Date Requested <LISTING ciga__date_requested>": None, # TODO: Don't have this for the moment
@ -3755,8 +3760,8 @@ class AssetList:
# We compare address line 1 to full address
if any(
df[self.STANDARD_FULL_ADDRESS]
.str.lower()
.str.contains(row["Address Line 1"].lower(), na=False)
.str.lower()
.str.contains(row["Address Line 1"].lower(), na=False)
):
df = df[
df[self.STANDARD_FULL_ADDRESS]
@ -3996,7 +4001,7 @@ class AssetList:
matched = matched[
matched["houseno"].astype(str) == house_no_to_match
]
]
if matched.shape[0] == 1:
lookup_i.append(
{
@ -4021,7 +4026,7 @@ class AssetList:
)[0]
matched = matched[
matched[self.STANDARD_FULL_ADDRESS] == best_match
]
]
lookup_i.append(
{
"row_id": x["row_id"],
@ -4332,7 +4337,7 @@ class AssetList:
df = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID]
== row[master_id_colnames[idx]]
]
]
if df.shape[0] == 1:
matched.append(
{
@ -4438,7 +4443,7 @@ class AssetList:
)[1]
)
> 90
]
]
if df.shape[0] == 0:
unmatched.append(row["row_id"])
@ -4446,8 +4451,8 @@ class AssetList:
if any(
df[self.STANDARD_FULL_ADDRESS]
.str.lower()
.str.contains(
.str.lower()
.str.contains(
" ".join(
[row[house_no_col], row["Street / Block Name"]]
).lower()
@ -4474,7 +4479,7 @@ class AssetList:
row[property_type_col].split(" ")[-1].lower()
)
& (df[self.STANDARD_PROPERTY_TYPE] != "block of flats")
]
]
if df.shape[0] != 1:
# We have multiple matches - it's likely because the landlord has a duplicate

View file

@ -21,6 +21,11 @@ EPC_AUTH_TOKEN = os.getenv(
OPENAI_API_KEY = os.getenv(
"OPENAI_API_KEY",
)
print(
f"[debug] OPENAI_API_KEY loaded: "
f"{OPENAI_API_KEY[:8]}...{OPENAI_API_KEY[-4:] if OPENAI_API_KEY else 'NONE'} "
f"(len={len(OPENAI_API_KEY) if OPENAI_API_KEY else 0})"
)
def extract_address1(
@ -74,23 +79,23 @@ def app():
"""
data_folder = "/workspaces/model/asset_list"
data_filename = "2026-04-22T08_22_00.779745_61049fd3.xlsx"
sheet_name = "in"
postcode_column = "postcode_clean"
address1_column = "address2uprn_address"
data_filename = "input.xlsx"
sheet_name = "Handovers"
postcode_column = "POSTCODE"
address1_column = "Full Addres"
address1_method = None
fulladdress_column = "address2uprn_address"
fulladdress_column = "Full Addres"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = "address2uprn_uprn"
landlord_property_type = "Property Type" # Good to include if landlord gave
landlord_built_form = "Built Form" # Good to include if landlord gave
landlord_os_uprn = "domna_found_uprn"
landlord_property_type = "PROPERTY TYPE" # Good to include if landlord gave
landlord_built_form = "Type Description" # Good to include if landlord gave
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "UPRN"
landlord_property_id = "PROP REF"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@ -131,6 +136,7 @@ def app():
landlord_sap=landlord_sap,
landlord_block_reference=landlord_block_reference,
phase=phase,
openai_api_key=OPENAI_API_KEY,
)
asset_list.init_standardise()
@ -462,3 +468,9 @@ def app():
asset_list.duplicated_addresses.to_excel(
writer, sheet_name="Duplicate Properties", index=False
)
for key,value in dict.items():
lsakjfldsa

View file

@ -0,0 +1,57 @@
import pandas as pd
from backend.utils.addressMatch import AddressMatch
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
"""
Returns True if all non-null UPRNs in df match the given uprn.
Returns False otherwise.
"""
if column not in df.columns:
return False
uprns = df[column].dropna().astype(str).str.strip().unique()
if len(uprns) == 0:
return False
return len(uprns) == 1 and uprns[0] == str(uprn)
def get_uprn_candidates(
df: pd.DataFrame,
user_address: str,
address_column: str = "address",
uprn_column: str = "uprn",
) -> pd.DataFrame:
"""
Annotate EPC results with lexicographical similarity scores and ranks.
Returns a DataFrame sorted by descending lexiscore.
DOES NOT choose or return a UPRN.
"""
if address_column not in df.columns:
raise ValueError(f"Missing column: {address_column}")
if uprn_column not in df.columns:
raise ValueError(f"Missing column: {uprn_column}")
out = df.copy()
user_norm = AddressMatch.normalise_address(user_address)
out["lexiscore"] = out[address_column].apply(
lambda x: AddressMatch.levenshtein(user_norm, x)
)
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
return out.sort_values(
["lexirank", "lexiscore"],
ascending=[True, False],
)

View file

@ -110,6 +110,8 @@ class Addresses:
landlord_multi_glaze_proportion=float(row["landlord_multi_glaze_proportion"]) if row.get(
"landlord_multi_glaze_proportion") else None,
landlord_construction_age_band=row.get("landlord_construction_age_band"),
lmk_key=None,
epc_certificate_number=None,
)
@staticmethod

View file

@ -46,6 +46,8 @@ class Settings(BaseSettings):
EPC_AUTH_TOKEN: str = "changeme"
OPEN_EPC_API_TOKEN: str = "changeme"
GOOGLE_SOLAR_API_KEY: str = "changeme"
MAGICPLAN_CUSTOMER_ID: str = "changeme"
MAGICPLAN_API_KEY: str = "changeme"
# Database settings
DB_HOST: str = "changeme"
@ -78,6 +80,7 @@ class Settings(BaseSettings):
OSMOSIS_ACD_SHAREPOINT_ID: Optional[str] = None
PRIVATE_PAY_SHAREPOINT_ID: Optional[str] = None
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID: Optional[str] = None
OPENAI_API_KEY: Optional[str] = None
# Pas Hub
PASHUB_EMAIL: Optional[str] = None

View file

@ -0,0 +1,141 @@
from typing import Any, cast
from sqlalchemy import delete, select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlmodel import Session, col
from datatypes.magicplan.domain.models import Floor, Plan
from backend.app.db.models.magic_plan import (
MagicPlanDoorModel,
MagicPlanFloorModel,
MagicPlanPlanModel,
MagicPlanRoomModel,
MagicPlanWindowModel,
)
def save_plan(session: Session, plan: Plan) -> None:
plan_id: int = _upsert_plan(session, plan)
_delete_children(session, plan_id)
floor_ids: list[int] = _insert_floors(session, plan.floors, plan_id)
room_ids: list[int] = _insert_rooms(session, plan.floors, floor_ids)
_insert_windows_and_doors(session, plan.floors, room_ids)
def _upsert_plan(session: Session, plan: Plan) -> int:
stmt = (
pg_insert(MagicPlanPlanModel)
.values(
magic_plan_uid=plan.uid,
name=plan.name,
address=plan.address,
postcode=plan.postcode,
)
.on_conflict_do_update(
index_elements=["magic_plan_uid"],
set_={
"name": plan.name,
"address": plan.address,
"postcode": plan.postcode,
},
)
.returning(col(MagicPlanPlanModel.id))
)
row_id: int = session.execute(stmt).scalar_one()
return row_id
def _delete_children(session: Session, plan_id: int) -> None:
floor_subq = (
select(col(MagicPlanFloorModel.id))
.where(col(MagicPlanFloorModel.magic_plan_plan_id) == plan_id)
.scalar_subquery()
)
room_subq = (
select(col(MagicPlanRoomModel.id))
.where(col(MagicPlanRoomModel.magic_plan_floor_id).in_(floor_subq))
.scalar_subquery()
)
session.execute(
delete(MagicPlanWindowModel).where(
col(MagicPlanWindowModel.magic_plan_room_id).in_(room_subq)
)
)
session.execute(
delete(MagicPlanDoorModel).where(
col(MagicPlanDoorModel.magic_plan_room_id).in_(room_subq)
)
)
session.execute(
delete(MagicPlanRoomModel).where(
col(MagicPlanRoomModel.magic_plan_floor_id).in_(floor_subq)
)
)
session.execute(
delete(MagicPlanFloorModel).where(
col(MagicPlanFloorModel.magic_plan_plan_id) == plan_id
)
)
def _insert_floors(session: Session, floors: list[Floor], plan_id: int) -> list[int]:
rows: list[dict[str, Any]] = [
{"magic_plan_plan_id": plan_id, "level": floor.level} for floor in floors
]
result = session.execute(
pg_insert(MagicPlanFloorModel)
.values(rows)
.returning(col(MagicPlanFloorModel.id))
)
return cast(list[int], list(result.scalars().all()))
def _insert_rooms(
session: Session, floors: list[Floor], floor_ids: list[int]
) -> list[int]:
rows: list[dict[str, Any]] = [
{
"magic_plan_floor_id": floor_id,
"name": room.name,
"width_m": room.width_m,
"length_m": room.length_m,
"area_m2": room.area_m2,
}
for floor, floor_id in zip(floors, floor_ids)
for room in floor.rooms
]
result = session.execute(
pg_insert(MagicPlanRoomModel).values(rows).returning(col(MagicPlanRoomModel.id))
)
return cast(list[int], list(result.scalars().all()))
def _insert_windows_and_doors(
session: Session, floors: list[Floor], room_ids: list[int]
) -> None:
all_rooms = [room for floor in floors for room in floor.rooms]
window_rows: list[dict[str, Any]] = [
{
"magic_plan_room_id": room_id,
"width_m": window.width_m,
"height_m": window.height_m,
"area_m2": window.area_m2,
"opening_type": window.opening_type,
}
for room, room_id in zip(all_rooms, room_ids)
for window in room.windows
]
door_rows: list[dict[str, Any]] = [
{
"magic_plan_room_id": room_id,
"width_mm": door.width_mm,
}
for room, room_id in zip(all_rooms, room_ids)
for door in room.doors
]
if window_rows:
session.execute(pg_insert(MagicPlanWindowModel).values(window_rows))
if door_rows:
session.execute(pg_insert(MagicPlanDoorModel).values(door_rows))

View file

@ -0,0 +1,41 @@
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlmodel import SQLModel
import backend.app.db.models.magic_plan # noqa: F401 — registers MagicPlan models with SQLModel.metadata
# TODO: promote to backend/app/db/conftest.py once a second DB-touching test directory appears under this tree
@pytest.fixture(scope="function")
def engine(postgresql):
connection_string = (
f"postgresql+psycopg://"
f"{postgresql.info.user}:"
f"{postgresql.info.password}@"
f"{postgresql.info.host}:"
f"{postgresql.info.port}/"
f"{postgresql.info.dbname}"
)
engine = create_engine(connection_string)
SQLModel.metadata.create_all(engine)
yield engine
SQLModel.metadata.drop_all(engine)
engine.dispose()
@pytest.fixture(scope="function")
def db_session(engine):
connection = engine.connect()
transaction = connection.begin()
session = sessionmaker(bind=connection)()
yield session
session.close()
transaction.rollback()
connection.close()

View file

@ -0,0 +1,95 @@
import json
from pathlib import Path
import pytest
from sqlalchemy import func, select
from sqlalchemy.orm import Session
from sqlmodel import SQLModel
from datatypes.magicplan.api.response import MagicPlanPlan
from datatypes.magicplan.domain.mapper import map_plan
from datatypes.magicplan.domain.models import Plan
from backend.app.db.functions.magic_plan_functions import save_plan
from backend.app.db.models.magic_plan import (
MagicPlanDoorModel,
MagicPlanFloorModel,
MagicPlanPlanModel,
MagicPlanRoomModel,
MagicPlanWindowModel,
)
FIXTURE_DIR = Path(__file__).parents[4] / "magic_plan"
@pytest.fixture(scope="module")
def domain_plan() -> Plan:
data = json.loads(
(FIXTURE_DIR / "magicplan_api_plan_response_example.json").read_text()
)
return map_plan(MagicPlanPlan.model_validate(data["data"]))
def _count(session: Session, model: type[SQLModel]) -> int:
return session.execute(select(func.count()).select_from(model)).scalar_one()
def test_plan_row_present_after_save(db_session: Session, domain_plan: Plan) -> None:
# Act
save_plan(db_session, domain_plan)
# Assert
assert _count(db_session, MagicPlanPlanModel) == 1
def test_floor_count_matches_domain(db_session: Session, domain_plan: Plan) -> None:
# Arrange
expected = len(domain_plan.floors)
# Act
save_plan(db_session, domain_plan)
# Assert
assert _count(db_session, MagicPlanFloorModel) == expected
def test_room_count_matches_domain(db_session: Session, domain_plan: Plan) -> None:
# Arrange
expected = sum(len(f.rooms) for f in domain_plan.floors)
# Act
save_plan(db_session, domain_plan)
# Assert
assert _count(db_session, MagicPlanRoomModel) == expected
def test_window_count_matches_domain(db_session: Session, domain_plan: Plan) -> None:
# Arrange
expected = sum(len(r.windows) for f in domain_plan.floors for r in f.rooms)
# Act
save_plan(db_session, domain_plan)
# Assert
assert _count(db_session, MagicPlanWindowModel) == expected
def test_door_count_matches_domain(db_session: Session, domain_plan: Plan) -> None:
# Arrange
expected = sum(len(r.doors) for f in domain_plan.floors for r in f.rooms)
# Act
save_plan(db_session, domain_plan)
# Assert
assert _count(db_session, MagicPlanDoorModel) == expected
def test_save_plan_idempotent(db_session: Session, domain_plan: Plan) -> None:
# Act — call twice within the same session
save_plan(db_session, domain_plan)
save_plan(db_session, domain_plan)
# Assert — same row counts as a single call
assert _count(db_session, MagicPlanPlanModel) == 1
assert _count(db_session, MagicPlanFloorModel) == len(domain_plan.floors)
assert _count(db_session, MagicPlanRoomModel) == sum(
len(f.rooms) for f in domain_plan.floors
)
assert _count(db_session, MagicPlanWindowModel) == sum(
len(r.windows) for f in domain_plan.floors for r in f.rooms
)
assert _count(db_session, MagicPlanDoorModel) == sum(
len(r.doors) for f in domain_plan.floors for r in f.rooms
)

View file

@ -18,8 +18,9 @@ class EpcPropertyModel(SQLModel, table=True):
__tablename__ = "epc_property"
id: Optional[int] = Field(default=None, primary_key=True)
property_id: int = Field(foreign_key="property.id", nullable=False)
portfolio_id: int = Field(foreign_key="portfolio.id", nullable=False)
property_id: Optional[int] = Field(default=None)
portfolio_id: Optional[int] = Field(default=None)
uploaded_file_id: Optional[int] = Field(default=None)
# Identity / admin
uprn: Optional[int] = Field(default=None)
@ -148,8 +149,8 @@ class EpcPropertyModel(SQLModel, table=True):
def from_epc_property_data(
cls,
data: EpcPropertyData,
property_id: int,
portfolio_id: int,
property_id: Optional[int] = None,
portfolio_id: Optional[int] = None,
) -> EpcPropertyModel:
es = data.sap_energy_source
h = data.sap_heating
@ -593,7 +594,7 @@ class EpcWindowModel(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
epc_property_id: int = Field(foreign_key="epc_property.id", nullable=False)
pvc_frame: str
frame_material: Optional[str] = Field(default=None)
glazing_gap: str
orientation: str
window_type: str
@ -607,7 +608,7 @@ class EpcWindowModel(SQLModel, table=True):
frame_factor: Optional[float] = Field(default=None)
permanent_shutters_insulated: Optional[str] = Field(default=None)
transmission_u_value: Optional[float] = Field(default=None)
transmission_data_source: Optional[int] = Field(default=None)
transmission_data_source: Optional[str] = Field(default=None)
transmission_solar_transmittance: Optional[float] = Field(default=None)
@classmethod
@ -615,7 +616,7 @@ class EpcWindowModel(SQLModel, table=True):
td = window.window_transmission_details
return cls(
epc_property_id=epc_property_id,
pvc_frame=str(window.pvc_frame),
frame_material=window.frame_material,
glazing_gap=str(window.glazing_gap),
orientation=str(window.orientation),
window_type=str(window.window_type),

View file

@ -67,6 +67,17 @@ class HubspotDealData(SQLModel, table=True):
surveyed_date: Optional[datetime] = Field(default=None)
design_type: Optional[str] = Field(default=None)
survey_type: Optional[str] = Field(default=None)
measures_for_pibi_ordered: Optional[str] = Field(default=None)
pibi_order_date: Optional[datetime] = Field(default=None)
pibi_completed_date: Optional[datetime] = Field(default=None)
property_halted_date: Optional[datetime] = Field(default=None)
property_halted_reason: Optional[str] = Field(default=None)
technical_approved_measures_for_install: Optional[str] = Field(default=None)
sent_to_installer_for_pricing: Optional[datetime] = Field(default=None)
domna_survey_required: Optional[bool] = Field(default=None)
domna_survey_date: Optional[datetime] = Field(default=None)
created_at: Optional[datetime] = Field(
sa_column=Column(
DateTime(timezone=True),

View file

@ -0,0 +1,13 @@
from sqlmodel import SQLModel, Field
from datetime import datetime
from typing import Optional
class HubspotUser(SQLModel, table=True):
__tablename__ = "hubspot_users"
hubspot_owner_id: str = Field(primary_key=True)
first_name: Optional[str] = Field(default=None)
last_name: Optional[str] = Field(default=None)
email: Optional[str] = Field(default=None)
updated_at: datetime

View file

@ -0,0 +1,52 @@
from typing import Optional
from sqlmodel import Field, SQLModel
class MagicPlanPlanModel(SQLModel, table=True):
__tablename__ = "magic_plan_plan"
id: Optional[int] = Field(default=None, primary_key=True)
magic_plan_uid: Optional[str] = Field(default=None, unique=True, index=True)
name: Optional[str] = None
address: Optional[str] = None
postcode: Optional[str] = None
class MagicPlanFloorModel(SQLModel, table=True):
__tablename__ = "magic_plan_floor"
id: Optional[int] = Field(default=None, primary_key=True)
magic_plan_plan_id: int = Field(foreign_key="magic_plan_plan.id")
level: Optional[int] = None
class MagicPlanRoomModel(SQLModel, table=True):
__tablename__ = "magic_plan_room"
id: Optional[int] = Field(default=None, primary_key=True)
magic_plan_floor_id: int = Field(foreign_key="magic_plan_floor.id")
name: Optional[str] = None
width_m: Optional[float] = None
length_m: Optional[float] = None
area_m2: Optional[float] = None
class MagicPlanWindowModel(SQLModel, table=True):
__tablename__ = "magic_plan_window"
id: Optional[int] = Field(default=None, primary_key=True)
magic_plan_room_id: int = Field(foreign_key="magic_plan_room.id")
width_m: Optional[float] = None
height_m: Optional[float] = None
area_m2: Optional[float] = None
opening_type: Optional[str] = None
class MagicPlanDoorModel(SQLModel, table=True):
__tablename__ = "magic_plan_door"
id: Optional[int] = Field(default=None, primary_key=True)
magic_plan_room_id: int = Field(foreign_key="magic_plan_room.id")
width_mm: Optional[float] = None
type: Optional[str] = None

View file

@ -0,0 +1,134 @@
from backend.app.db.models.magic_plan import (
MagicPlanDoorModel,
MagicPlanFloorModel,
MagicPlanPlanModel,
MagicPlanRoomModel,
MagicPlanWindowModel,
)
# --- MagicPlanPlan ---
def test_plan_table_name() -> None:
assert MagicPlanPlanModel.__tablename__ == "magic_plan_plan"
def test_plan_has_magic_plan_uid_column() -> None:
assert "magic_plan_uid" in MagicPlanPlanModel.__table__.columns
def test_plan_magic_plan_uid_is_unique() -> None:
col = MagicPlanPlanModel.__table__.columns["magic_plan_uid"]
assert (
any(
c.unique
for c in MagicPlanPlanModel.__table__.constraints
if hasattr(c, "columns")
and "magic_plan_uid" in [cc.name for cc in c.columns]
)
or col.unique
)
def test_plan_instantiation() -> None:
plan = MagicPlanPlanModel(
magic_plan_uid="uid-123", name="Test", address="1 High St", postcode="SW1A 1AA"
)
assert plan.magic_plan_uid == "uid-123"
assert plan.name == "Test"
assert plan.postcode == "SW1A 1AA"
# --- MagicPlanFloor ---
def test_floor_table_name() -> None:
assert MagicPlanFloorModel.__tablename__ == "magic_plan_floor"
def test_floor_fk_column_name() -> None:
assert "magic_plan_plan_id" in MagicPlanFloorModel.__table__.columns
def test_floor_has_level() -> None:
floor = MagicPlanFloorModel(magic_plan_plan_id=1, level=0)
assert floor.level == 0
# --- MagicPlanRoom ---
def test_room_table_name() -> None:
assert MagicPlanRoomModel.__tablename__ == "magic_plan_room"
def test_room_fk_column_name() -> None:
assert "magic_plan_floor_id" in MagicPlanRoomModel.__table__.columns
def test_room_has_measurement_columns() -> None:
cols = MagicPlanRoomModel.__table__.columns
assert "width_m" in cols
assert "length_m" in cols
assert "area_m2" in cols
def test_room_instantiation() -> None:
room = MagicPlanRoomModel(
magic_plan_floor_id=1, name="Kitchen", width_m=2.67, length_m=2.98, area_m2=7.95
)
assert room.name == "Kitchen"
assert room.width_m == 2.67
# --- MagicPlanWindow ---
def test_window_table_name() -> None:
assert MagicPlanWindowModel.__tablename__ == "magic_plan_window"
def test_window_fk_column_name() -> None:
assert "magic_plan_room_id" in MagicPlanWindowModel.__table__.columns
def test_window_has_measurement_columns() -> None:
cols = MagicPlanWindowModel.__table__.columns
assert "width_m" in cols
assert "height_m" in cols
assert "area_m2" in cols
assert "opening_type" in cols
def test_window_instantiation() -> None:
window = MagicPlanWindowModel(
magic_plan_room_id=1,
width_m=1.4,
height_m=1.2,
area_m2=1.68,
opening_type="casement",
)
assert window.opening_type == "casement"
# --- MagicPlanDoor ---
def test_door_table_name() -> None:
assert MagicPlanDoorModel.__tablename__ == "magic_plan_door"
def test_door_fk_column_name() -> None:
assert "magic_plan_room_id" in MagicPlanDoorModel.__table__.columns
def test_door_has_width_mm_and_type() -> None:
cols = MagicPlanDoorModel.__table__.columns
assert "width_mm" in cols
assert "type" in cols
def test_door_instantiation() -> None:
door = MagicPlanDoorModel(magic_plan_room_id=1, width_mm=0.79, type="hinged")
assert door.width_mm == 0.79
assert door.type == "hinged"

View file

@ -2,8 +2,8 @@ from fastapi import APIRouter, HTTPException, status
from jose import jwt, jwe
import json
import datetime
from app.config import get_settings
from app.dependencies import get_derived_encryption_key
from backend.app.config import get_settings
from backend.app.dependencies import get_derived_encryption_key
router = APIRouter(
prefix="/local",
@ -27,7 +27,12 @@ def create_dummy_token(secret: str) -> str:
"dbId": "known_id",
}
token = jwe.encrypt(json.dumps(claims), get_derived_encryption_key(secret), algorithm="dir", encryption="A256GCM")
token = jwe.encrypt(
json.dumps(claims),
get_derived_encryption_key(secret),
algorithm="dir",
encryption="A256GCM",
)
return token
@ -40,6 +45,8 @@ async def dummy_token():
async def dummy_token():
settings = get_settings()
if settings.ENVIRONMENT != "local":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN,
detail="Dummy token can only be generated in local environment")
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Dummy token can only be generated in local environment",
)
return {"dummy_token": create_dummy_token(settings.SECRET_KEY)}

View file

@ -30,10 +30,7 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
logger.error(f"Validation Errors: {exc.errors()}")
return JSONResponse(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
content=jsonable_encoder({
"detail": exc.errors(),
"body": exc.body
}),
content=jsonable_encoder({"detail": exc.errors(), "body": exc.body}),
)
@ -63,7 +60,8 @@ app.include_router(tasks_router.router, prefix="/v1")
app.include_router(bulk_uploads_router.router, prefix="/v1")
if get_settings().ENVIRONMENT == "local":
from app.local import router as local_router
from backend.app.local import router as local_router
app.include_router(local_router.router)
handler = Mangum(app)
@ -98,10 +96,7 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
logger.error(f"Validation Errors: {exc.errors()}")
return JSONResponse(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
content=jsonable_encoder({
"detail": exc.errors(),
"body": exc.body
}),
content=jsonable_encoder({"detail": exc.errors(), "body": exc.body}),
)
@ -130,7 +125,8 @@ app.include_router(whlg_router.router, prefix="/v1")
app.include_router(bulk_uploads_router.router, prefix="/v1")
if get_settings().ENVIRONMENT == "local":
from app.local import router as local_router
from backend.app.local import router as local_router
app.include_router(local_router.router)
handler = Mangum(app)

View file

@ -0,0 +1,76 @@
from typing import Optional
from sqlmodel import Session
from backend.app.db.models.epc_property import (
EpcBuildingPartModel,
EpcEnergyElementModel,
EpcFlatDetailsModel,
EpcFloorDimensionModel,
EpcMainHeatingDetailModel,
EpcPropertyEnergyPerformanceModel,
EpcPropertyModel,
EpcWindowModel,
)
from datatypes.epc.domain.epc_property_data import EpcPropertyData
def save_epc_property_data(
session: Session,
data: EpcPropertyData,
uploaded_file_id: Optional[int] = None,
property_id: Optional[int] = None,
portfolio_id: Optional[int] = None,
) -> EpcPropertyModel:
epc_prop = EpcPropertyModel.from_epc_property_data(
data, property_id=property_id, portfolio_id=portfolio_id
)
epc_prop.uploaded_file_id = uploaded_file_id
session.add(epc_prop)
session.flush()
assert epc_prop.id is not None
epc_property_id: int = epc_prop.id
session.add(
EpcPropertyEnergyPerformanceModel.from_epc_property_data(
data, epc_property_id=epc_property_id
)
)
for detail in data.sap_heating.main_heating_details:
session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id))
for part in data.sap_building_parts:
bp = EpcBuildingPartModel.from_domain(part, epc_property_id)
session.add(bp)
session.flush()
assert bp.id is not None
for dim in part.sap_floor_dimensions:
session.add(EpcFloorDimensionModel.from_domain(dim, bp.id))
for window in data.sap_windows:
session.add(EpcWindowModel.from_domain(window, epc_property_id))
for el in data.roofs:
session.add(EpcEnergyElementModel.from_domain(el, "roof", epc_property_id))
for el in data.walls:
session.add(EpcEnergyElementModel.from_domain(el, "wall", epc_property_id))
for el in data.floors:
session.add(EpcEnergyElementModel.from_domain(el, "floor", epc_property_id))
for el in data.main_heating:
session.add(EpcEnergyElementModel.from_domain(el, "main_heating", epc_property_id))
for el, etype in [
(data.window, "window"),
(data.lighting, "lighting"),
(data.hot_water, "hot_water"),
(data.secondary_heating, "secondary_heating"),
(data.main_heating_controls, "main_heating_controls"),
]:
if el is not None:
session.add(EpcEnergyElementModel.from_domain(el, etype, epc_property_id))
if data.sap_flat_details is not None:
session.add(EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id))
return epc_prop

View file

@ -0,0 +1,451 @@
import re
from datetime import date, datetime
from typing import List, Optional
from datatypes.epc.surveys.elmhurst_site_notes import (
BathsAndShowers,
BuildingPartDimensions,
ElmhurstSiteNotes,
FloorDetails,
FloorDimension,
Lighting,
MainHeating,
Meters,
PropertyDetails,
Renewables,
RoofDetails,
Shower,
SurveyorInfo,
VentilationAndCooling,
WallDetails,
WaterHeating,
Window,
)
class ElmhurstSiteNotesExtractor:
def __init__(self, pages: List[str]) -> None:
self._text = "\n".join(pages)
self._lines = [l.strip() for l in self._text.splitlines() if l.strip()]
# --- generic helpers ---
def _next_val(self, label: str) -> Optional[str]:
lc = label.rstrip(":") + ":"
lb = label.rstrip(":")
for i, line in enumerate(self._lines):
if line.startswith(lc) and len(line) > len(lc):
return line[len(lc):].strip() or None
if line == lc or line == lb:
for j in range(i + 1, min(i + 4, len(self._lines))):
v = self._lines[j]
if v.endswith(":") or v.startswith("©"):
return None
if v:
return v
return None
return None
def _str_val(self, label: str) -> str:
v = self._next_val(label)
return " ".join(v.split()) if v else ""
def _opt_str(self, label: str) -> Optional[str]:
v = self._next_val(label)
return " ".join(v.split()) if v else None
def _bool_val(self, label: str) -> bool:
v = self._next_val(label)
return v is not None and v.lower() == "yes"
def _int_val(self, label: str) -> int:
v = self._next_val(label)
try:
return int(v.split()[0]) if v else 0
except (ValueError, IndexError):
return 0
def _date_val(self, label: str) -> date:
v = self._next_val(label)
if not v:
raise ValueError(f"Missing date for label: {label}")
return datetime.strptime(v.strip(), "%d/%m/%Y").date()
def _between(self, start: str, end: str) -> str:
try:
s = self._text.index(start) + len(start)
e = self._text.index(end, s)
return self._text[s:e]
except ValueError:
return ""
def _section_lines(self, start: str, end: str) -> List[str]:
text = self._between(start, end)
return [l.strip() for l in text.splitlines() if l.strip()]
def _local_val(self, lines: List[str], label: str) -> Optional[str]:
lb = label.rstrip(":")
lc = lb + ":"
for i, line in enumerate(lines):
if line.startswith(lc) and len(line) > len(lc):
return line[len(lc):].strip() or None
if line == lc or line == lb:
for j in range(i + 1, min(i + 4, len(lines))):
v = lines[j]
if v.endswith(":") or v.startswith("©"):
return None
if v:
return v
return None
return None
def _local_str(self, lines: List[str], label: str) -> str:
v = self._local_val(lines, label)
return " ".join(v.split()) if v else ""
def _local_bool(self, lines: List[str], label: str) -> bool:
v = self._local_val(lines, label)
return v is not None and v.lower() == "yes"
# --- section extractors ---
def _extract_surveyor_info(self) -> SurveyorInfo:
return SurveyorInfo(
surveyor_code=self._str_val("Surveyor"),
name=self._str_val("Name"),
title=self._str_val("Title"),
tel_number=self._str_val("Tel Number"),
survey_reference=self._str_val("Survey Reference"),
my_reference=self._opt_str("My Reference"),
)
def _extract_property_details(self) -> PropertyDetails:
epc_m = re.search(
r"Check for the existence of\nan EPC:\n(Yes|No)", self._text
)
epc_exists = epc_m.group(1).lower() == "yes" if epc_m else False
return PropertyDetails(
rdsap_version=self._str_val("RdSAP version"),
reference_number=self._str_val("Reference Number"),
lodgement_required=self._bool_val("Lodgement Required"),
regs_region=self._str_val("Regs Region"),
epc_language=self._str_val("EPC Language"),
postcode=self._str_val("Postcode"),
region=self._str_val("Region"),
street=self._str_val("Street"),
town=self._str_val("Town"),
tenure=self._str_val("Property Tenure"),
transaction_type=self._str_val("Transaction Type"),
inspection_date=self._date_val("Inspection Date"),
process_date=self._date_val("Process date"),
epc_exists=epc_exists,
uprn=self._opt_str("UPRN"),
house_name=self._opt_str("House Name"),
house_number=self._opt_str("House No"),
locality=self._opt_str("Locality"),
county=self._opt_str("County"),
)
def _extract_attachment(self) -> str:
m = re.search(r"1\.0 Property type:\n[^\n]+\n([^\n]+)", self._text)
return " ".join(m.group(1).strip().split()) if m else ""
def _extract_dimensions(self) -> BuildingPartDimensions:
dim_type = self._str_val("Dimension type")
section = self._between("4.0 Dimensions:", "5.0 Conservatory:")
floor_matches = re.findall(
r"([A-Za-z ]+Floor):\n([\d.]+)\n([\d.]+)\n([\d.]+)\n([\d.]+)",
section,
)
floors = [
FloorDimension(
name=name.strip(),
area_m2=float(area),
room_height_m=float(height),
heat_loss_perimeter_m=float(hlp),
party_wall_length_m=float(pwl),
)
for name, area, height, hlp, pwl in floor_matches
]
return BuildingPartDimensions(dimension_type=dim_type, floors=floors)
def _extract_walls(self) -> WallDetails:
lines = self._section_lines("7.0 Walls:", "8.0 Roofs:")
thickness_raw = self._local_val(lines, "Wall Thickness")
thickness_mm = (
int(thickness_raw.split()[0]) if thickness_raw else None
)
return WallDetails(
wall_type=self._local_str(lines, "Type"),
insulation=self._local_str(lines, "Insulation"),
thickness_unknown=self._local_bool(lines, "Wall Thickness Unknown"),
u_value_known=self._local_bool(lines, "U-value Known"),
party_wall_type=self._local_str(lines, "Party Wall Type"),
thickness_mm=thickness_mm,
)
def _extract_roof(self) -> RoofDetails:
lines = self._section_lines("8.0 Roofs:", "8.1 Rooms in Roof:")
thickness_raw = self._local_val(lines, "Insulation Thickness")
thickness_mm = (
int(thickness_raw.split()[0]) if thickness_raw else None
)
return RoofDetails(
roof_type=self._local_str(lines, "Type"),
insulation=self._local_str(lines, "Insulation"),
u_value_known=self._local_bool(lines, "U-value Known"),
insulation_thickness_mm=thickness_mm,
)
def _extract_floor(self) -> FloorDetails:
lines = self._section_lines("9.0 Floors:", "10.0 Doors:")
u_val_raw = self._local_val(lines, "Default U-value")
default_u = float(u_val_raw) if u_val_raw else None
return FloorDetails(
location=self._local_str(lines, "Location"),
floor_type=self._local_str(lines, "Type"),
insulation=self._local_str(lines, "Insulation"),
u_value_known=self._local_bool(lines, "U-value Known"),
default_u_value=default_u,
)
def _extract_windows(self) -> List[Window]:
m = re.search(
r"Permanent\s+Shutters\n(.*?)Draught Proofing",
self._text,
re.DOTALL,
)
if not m:
return []
tokens = [t.strip() for t in m.group(1).splitlines() if t.strip()]
windows: List[Window] = []
i = 0
while i + 12 < len(tokens):
try:
width_m = float(tokens[i])
height_m = float(tokens[i + 1])
area_m2 = float(tokens[i + 2])
except (ValueError, IndexError):
i += 1
continue
i += 3
# Collect glazing type tokens until frame_factor (0 < v ≤ 1.0)
glazing_parts: List[str] = []
while i < len(tokens):
try:
v = float(tokens[i])
if 0.0 < v <= 1.0:
break
glazing_parts.append(tokens[i])
except ValueError:
glazing_parts.append(tokens[i])
i += 1
# If last glazing token is a single word (no spaces, not numeric) it's the frame_type
frame_type: Optional[str] = None
if glazing_parts and " " not in glazing_parts[-1] and not glazing_parts[-1].replace(".", "").isdigit():
frame_type = glazing_parts.pop()
glazing_type = " ".join(glazing_parts).strip()
if i >= len(tokens):
break
frame_factor = float(tokens[i]); i += 1
# Consume glazing_gap if present ("mm" token, possibly multi-token e.g. "16 mm or more")
glazing_gap: Optional[str] = None
if i < len(tokens) and "mm" in tokens[i]:
gap_parts = [tokens[i]]; i += 1
while i < len(tokens) and tokens[i].lower() in {"or", "more"}:
gap_parts.append(tokens[i]); i += 1
glazing_gap = " ".join(gap_parts)
building_part = tokens[i]; i += 1
location = tokens[i]; i += 1
orientation = tokens[i]; i += 1
data_source = tokens[i]; i += 1
u_value = float(tokens[i]); i += 1
g_value = float(tokens[i]); i += 1
draught_proofed = tokens[i].lower() == "yes"; i += 1
permanent_shutters = tokens[i]; i += 1
windows.append(
Window(
width_m=width_m,
height_m=height_m,
area_m2=area_m2,
glazing_type=glazing_type,
frame_factor=frame_factor,
building_part=building_part,
location=location,
orientation=orientation,
data_source=data_source,
u_value=u_value,
g_value=g_value,
draught_proofed=draught_proofed,
permanent_shutters=permanent_shutters,
frame_type=frame_type,
glazing_gap=glazing_gap,
)
)
return windows
def _extract_ventilation(self) -> VentilationAndCooling:
return VentilationAndCooling(
open_chimneys_count=self._int_val("No. of open chimneys"),
open_flues_count=self._int_val("No. of open flues"),
open_chimneys_closed_fire_count=self._int_val(
"No. of open chimneys/open flues attached to closed fire"
),
solid_fuel_boiler_flues_count=self._int_val(
"No. of flues attached to solid fuel boiler"
),
other_heater_flues_count=self._int_val(
"No. of open flues attached to other heater"
),
blocked_chimneys_count=self._int_val("No. of blocked chimneys"),
extract_fans_count=self._int_val("No. of intermittent extract fans"),
passive_vents_count=self._int_val("No. of passive vents"),
flueless_gas_fires_count=self._int_val("No. of flueless gas fires"),
fixed_space_cooling=self._bool_val("Fixed Space Cooling"),
draught_lobby=self._str_val("Draught Lobby"),
mechanical_ventilation=self._bool_val("Mechanical Ventilation"),
pressure_test_method=self._str_val("Test Method"),
)
def _extract_lighting(self) -> Lighting:
led_cfl_count_known = self._bool_val("Number of LED and CFL Known")
return Lighting(
total_bulbs=self._int_val("Total number of bulbs"),
led_cfl_count_known=led_cfl_count_known,
led_count=self._int_val("Number of LED lights"),
cfl_count=self._int_val("Number of CFL lights"),
incandescent_count=self._int_val("Total number of incandescents"),
low_energy_count=(
0 if led_cfl_count_known
else self._int_val("Total number of Low Energy")
),
)
def _extract_main_heating(self) -> MainHeating:
lines = self._section_lines("14.0 Main Heating1", "14.1 Main Heating2")
pct_raw = self._local_val(lines, "Percentage of Heat")
pct = int(pct_raw.split()[0]) if pct_raw else 0
return MainHeating(
heat_emitter=self._local_str(lines, "Heat Emitter"),
fuel_type=self._local_str(lines, "Fuel Type"),
flue_type=self._local_str(lines, "Flue Type"),
fan_assisted_flue=self._local_bool(lines, "Fan Assisted Flue"),
design_flow_temperature=self._local_str(lines, "Design flow temperature"),
heating_controls_ees=self._local_str(lines, "Main Heating Controls EES"),
heating_controls_sap=self._local_str(lines, "Main Heating Controls Sap"),
percentage_of_heat=pct,
pcdf_boiler_reference=self._local_val(lines, "PCDF boiler Reference"),
heat_pump_age=self._local_val(lines, "Heat pump age"),
)
def _extract_meters(self) -> Meters:
return Meters(
electricity_meter_type=self._str_val("Electricity meter type"),
main_gas=self._bool_val("Main gas"),
electricity_smart_meter=self._bool_val("Electricity Smart Meter Present"),
gas_smart_meter=self._bool_val("Gas Smart Meter Present"),
)
def _extract_water_heating(self) -> WaterHeating:
return WaterHeating(
water_heating_code=self._str_val("Water Heating Code"),
water_heating_sap_code=self._int_val("Water Heating SapCode"),
water_heating_fuel_type=self._str_val("Water Heating Fuel Type"),
hot_water_cylinder_present=self._bool_val("Hot Water Cylinder Present"),
)
def _extract_baths_and_showers(self) -> BathsAndShowers:
n_baths = self._int_val("Total Number of Baths")
n_connected = self._int_val("Number of Baths Connected")
try:
idx = self._lines.index("Connected")
except ValueError:
return BathsAndShowers(
number_of_baths=n_baths,
number_of_baths_connected=n_connected,
showers=[],
)
showers: List[Shower] = []
j = idx + 1
while j + 2 <= len(self._lines) - 1:
num_line = self._lines[j]
if not num_line.isdigit():
break
showers.append(
Shower(
shower_number=int(num_line),
outlet_type=self._lines[j + 1],
connected=self._lines[j + 2],
)
)
j += 3
return BathsAndShowers(
number_of_baths=n_baths,
number_of_baths_connected=n_connected,
showers=showers,
)
def _rating_val(self, label: str) -> int:
v = self._next_val(label)
try:
return int(v.split()[-1]) if v else 0
except (ValueError, IndexError):
return 0
def _extract_renewables(self) -> Renewables:
fghrs_lines = self._section_lines(
"18.0 Flue Gas Heat Recovery System", "19.0 Photovoltaic Panel"
)
fghrs = self._local_bool(fghrs_lines, "Present")
terrain = self._str_val("Terrain Type")
hydro_raw = self._next_val("Electricity generated [kWh/year]")
hydro = float(hydro_raw) if hydro_raw else 0.0
return Renewables(
solar_water_heating=self._bool_val("Solar Water Heating"),
wwhrs_present=self._bool_val("Is WWHRS present in the property?"),
flue_gas_heat_recovery_present=fghrs,
photovoltaic_panel=self._str_val("Photovoltaic Panel"),
export_capable_meter=self._bool_val("Export capable meter"),
wind_turbine_present=self._bool_val("Wind turbine present?"),
wind_turbines_terrain_type=terrain,
hydro_electricity_generated_kwh=hydro,
)
def extract(self) -> ElmhurstSiteNotes:
emissions_raw = self._next_val("Emissions (t/year)")
co2 = float(emissions_raw.split()[0]) if emissions_raw else 0.0
return ElmhurstSiteNotes(
surveyor_info=self._extract_surveyor_info(),
property_details=self._extract_property_details(),
current_sap_rating=self._rating_val("Current SAP rating"),
potential_sap_rating=self._rating_val("Potential SAP rating"),
current_ei_rating=self._rating_val("Current EI rating"),
potential_ei_rating=self._rating_val("Potential EI rating"),
co2_emissions_current_t=co2,
property_type=self._str_val("1.0 Property type"),
attachment=self._extract_attachment(),
number_of_storeys=self._int_val("Storeys"),
habitable_rooms=self._int_val("Habitable Rooms"),
heated_habitable_rooms=self._int_val("Heated Habitable Rooms"),
construction_age_band=self._str_val("Main Property"),
dimensions=self._extract_dimensions(),
has_conservatory=self._bool_val("Is there a conservatory?"),
walls=self._extract_walls(),
roof=self._extract_roof(),
floor=self._extract_floor(),
door_count=self._int_val("Total Number of Doors"),
insulated_door_count=self._int_val("Number of Insulated Doors"),
windows=self._extract_windows(),
draught_proofing_percent=self._int_val("Draught Proofing"),
ventilation=self._extract_ventilation(),
lighting=self._extract_lighting(),
main_heating=self._extract_main_heating(),
meters=self._extract_meters(),
water_heating=self._extract_water_heating(),
baths_and_showers=self._extract_baths_and_showers(),
renewables=self._extract_renewables(),
)

View file

@ -66,9 +66,11 @@ class PasHubRdSapSiteNotesExtractor:
val = self._get_in(list_to_process, key)
return val is not None and val.lower() != "not known"
def _wall_thickness_in(self, list_to_process: List[str]) -> int:
def _wall_thickness_in(self, list_to_process: List[str]) -> Optional[int]:
val = self._get_in(list_to_process, "Wall thickness:")
return int(val.split()[0]) if val else 0
if not val or val.split()[0].lower() == "unmeasurable":
return None
return int(val.split()[0])
def _section(self, start: str, end: str) -> List[str]:
try:
@ -83,10 +85,17 @@ class PasHubRdSapSiteNotesExtractor:
def extract_inspection_metadata(self) -> InspectionMetadata:
try:
addr_start = self.text_list.index("Property Address:") + 1
addr_end = self.text_list.index("Property Photo", addr_start)
property_address = ", ".join(
t.rstrip(",") for t in self.text_list[addr_start:addr_end]
)
try:
addr_end = self.text_list.index("Property Photo", addr_start)
address_tokens = self.text_list[addr_start:addr_end]
except ValueError:
addr_end = self.text_list.index("RdSAP Assessment", addr_start)
address_tokens = []
for t in self.text_list[addr_start:addr_end]:
if not t or t.startswith("Page "):
break
address_tokens.append(t)
property_address = ", ".join(t.rstrip(",") for t in address_tokens)
except ValueError:
property_address = ""

View file

@ -0,0 +1,131 @@
#!/usr/bin/env python3
"""
Parse a local site-notes PDF and load the result into the database.
Usage:
python local_runner.py <pdf_path>
"""
from typing import List, Optional, Tuple
from backend.app.db.connection import db_session
from backend.app.db.models.epc_property import (
EpcBuildingPartModel,
EpcEnergyElementModel,
EpcFlatDetailsModel,
EpcFloorDimensionModel,
EpcMainHeatingDetailModel,
EpcPropertyEnergyPerformanceModel,
EpcPropertyModel,
EpcWindowModel,
)
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
from backend.documents_parser.pdf import pdf_to_pages, pdf_to_text_list
from datatypes.epc.domain.epc_property_data import EnergyElement, EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
def _parse_pdf(pdf_path: str) -> EpcPropertyData:
with open(pdf_path, "rb") as f:
pdf_bytes: bytes = f.read()
pages: List[str] = pdf_to_pages(pdf_bytes)
full_text: str = "\n".join(pages)
if "Elmhurst Energy Systems" in full_text:
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
tokens: List[str] = pdf_to_text_list(pdf_bytes)
pashub_notes = PasHubRdSapSiteNotesExtractor(tokens).extract()
return EpcPropertyDataMapper.from_site_notes(pashub_notes)
def _insert_energy_elements(
session,
elements: List[EnergyElement],
element_type: str,
epc_property_id: int,
) -> None:
for el in elements:
session.add(
EpcEnergyElementModel.from_domain(el, element_type, epc_property_id)
)
def _insert_optional_energy_element(
session,
el: Optional[EnergyElement],
element_type: str,
epc_property_id: int,
) -> None:
if el is not None:
session.add(
EpcEnergyElementModel.from_domain(el, element_type, epc_property_id)
)
def run(pdf_path: str) -> None:
data: EpcPropertyData = _parse_pdf(pdf_path)
print("successfully mapped pdf")
with db_session() as session:
epc_prop: EpcPropertyModel = EpcPropertyModel.from_epc_property_data(data)
session.add(epc_prop)
session.flush()
assert epc_prop.id is not None
epc_property_id: int = epc_prop.id
session.add(
EpcPropertyEnergyPerformanceModel.from_epc_property_data(
data, epc_property_id=epc_property_id
)
)
for detail in data.sap_heating.main_heating_details:
session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id))
for part in data.sap_building_parts:
bp: EpcBuildingPartModel = EpcBuildingPartModel.from_domain(
part, epc_property_id
)
session.add(bp)
session.flush()
assert bp.id is not None
for dim in part.sap_floor_dimensions:
session.add(EpcFloorDimensionModel.from_domain(dim, bp.id))
for window in data.sap_windows:
session.add(EpcWindowModel.from_domain(window, epc_property_id))
list_elements: List[Tuple[List[EnergyElement], str]] = [
(data.roofs, "roof"),
(data.walls, "wall"),
(data.floors, "floor"),
(data.main_heating, "main_heating"),
]
for elements, etype in list_elements:
_insert_energy_elements(session, elements, etype, epc_property_id)
optional_elements: List[Tuple[Optional[EnergyElement], str]] = [
(data.window, "window"),
(data.lighting, "lighting"),
(data.hot_water, "hot_water"),
(data.secondary_heating, "secondary_heating"),
(data.main_heating_controls, "main_heating_controls"),
]
for el, etype in optional_elements:
_insert_optional_energy_element(session, el, etype, epc_property_id)
if data.sap_flat_details is not None:
session.add(
EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id)
)
print(f"epc_property_id={epc_property_id}")
print(f"address: {data.address_line_1}, {data.post_town}, {data.postcode}")
if __name__ == "__main__":
# run("backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf")
run("backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf")

View file

@ -0,0 +1,28 @@
from typing import List
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
from backend.documents_parser.pdf import pdf_to_pages, pdf_to_text_list
def parse_site_notes_pdf(file_path: str) -> EpcPropertyData:
with open(file_path, "rb") as f:
pdf_bytes = f.read()
pages = pdf_to_pages(pdf_bytes)
if "Elmhurst Energy Systems" in "\n".join(pages):
return _parse_elmhurst(pages)
return _parse_pashub(pdf_bytes)
def _parse_elmhurst(pages: List[str]) -> EpcPropertyData:
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
def _parse_pashub(pdf_bytes: bytes) -> EpcPropertyData:
tokens = pdf_to_text_list(pdf_bytes)
site_notes = PasHubRdSapSiteNotesExtractor(tokens).extract()
return EpcPropertyDataMapper.from_site_notes(site_notes)

View file

@ -10,3 +10,8 @@ def pdf_to_text_list(pdf_bytes: bytes) -> List[str]:
for line in page.get_text().split("\n"):
tokens.append(line)
return tokens
def pdf_to_pages(pdf_bytes: bytes) -> List[str]:
with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc:
return [page.get_text() for page in doc]

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,6 @@
[
"Summary Information\nSurveyor:\nP960-0001\nName:\nRichard Matthew Ratcliff\nTitle: Mr.\nTel Number: 07760 443 469\nSurvey Reference:\n001573\nMy Reference:\nCurrent SAP rating:\nC 69\nPotential SAP rating: C 77\nEmissions (t/year):\n1.683 tonnes\nCurrent EI rating:\nC 76\nPotential EI rating:\nB 81\nFuel Bill:\n\u00a3896\nProperty Details:\nRdSAP version:\nRdSAP10\nReference Number:\nP960-0001-001573\nMy Reference:\nLodgement Required:\nNo\nRegs Region:\nEngland\nEPC Language:\nEnglish\nUPRN:\nPostcode:\nBB10 1XX\nRegion:\nWest Pennines\nHouse Name:\nHouse No:\n19\nStreet:\nQueens Road\nLocality:\nTown:\nBURNLEY\nCounty:\nProperty Tenure:\nRented (social)\nTransaction Type:\nGrant scheme\nInspection Date:\n06/03/2026\nProcess date:\n06/03/2026\nCheck for the existence of\nan EPC:\nNo\nDoes an EPC exist at the\npoint of carrying out this\nenergy assessment:\nNo\nReason why another energy\nassessment needs to be\nundertaken:\nRdSAP Inputs\nProperty Description:\n1.0 Property type:\nB Bungalow\nE End-Terrace\n2.0 Number of\nStoreys:\n1\nHabitable Rooms:\n2\nHeated Habitable Rooms:\n2\n3.0 Date Built:\nMain Property\nD 1950-1966\n4.0 Dimensions:\nDimension type:\nInternal\nMain Property\nFloor\nArea\n[m2]\nRoom\nHeight\n[m]\nHeat Loss\nWall Perimeter\n[m]\nParty Wall\nLength\n[m]\nLowest Floor:\n44.89\n2.24\n20.10\n6.70\nNo\n5.0 Conservatory:\nIs there a conservatory?\nNo\n7.0 Walls:\nMain Property\nType\nCA Cavity\nInsulation\nF Filled Cavity\nWall Thickness Unknown\nNo\nWall Thickness\n300 mm\nU-value Known\nNo\nParty Wall Type\nU Unable to determine\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\n8.0 Roofs:\nMain Property\nType\nPA Pitched (slates/tiles), access to loft\nInsulation\nJ Joists\nInsulation Thickness\n270 mm\nU-value Known\nNo\n8.1 Rooms in Roof:\n9.0 Floors:\nMain Property\nLocation\nG Ground floor\nType\nN Suspended, not timber\nInsulation\nA As built\nDefault U-value\n0.69\nU-value Known\nNo\n10.0 Doors:\nTotal Number of Doors\n0\nNumber of Insulated Doors\n0\n11.0 Windows:\nW\nH\nArea Glazing Type\nFrame \nType\nFrame \nFactor\nGlazing \nGap\nBuilding \nPart\nLocation\nOrient. Data-Source\nU \nvalue\ng \nvalue\nDraught \nProofed\nPermanent \nShutters\n1.30\n1.10\n1.43\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nNorth\nManufacturer\n1.40\n0.72\nYes\nNone\n1.80\n1.00\n1.80\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nNorth\nManufacturer\n1.40\n0.72\nYes\nNone\n0.70\n0.80\n0.56\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nSouth\nManufacturer\n1.40\n0.72\nYes\nNone\n0.70\n1.30\n0.91\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nSouth\nManufacturer\n1.40\n0.72\nYes\nNone\nDraught Proofing\n100 %\n12.0 Ventilation & Cooling\nNo. of open chimneys\n0\nNo. of open flues\n0\nNo. of open chimneys/open flues attached to closed fire\n0\nNo. of flues attached to solid fuel boiler\n0\nNo. of open flues attached to other heater\n0\nNo. of blocked chimneys\n0\nNo. of intermittent extract fans\n2\nNo. of passive vents\n0\nNo. of flueless gas fires\n0\nFixed Space Cooling\nNo\nDraught Lobby\nNot present\n12.1 Mechanical Ventilation\nMechanical Ventilation\nNo\n12.2 Air Pressure Test\nTest Method\nNot available\n13.0 Lighting\nTotal number of bulbs\n8\nNumber of LED and CFL Known\nYes\nNumber of LED lights\n4\nNumber of CFL lights\n4\nTotal number of Low Energy\n8\nTotal number of incandescents\n0\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\n14.0 Main Heating1\nPCDF boiler Reference\n17742 Potterton, Promax 33 Combi ErP, 88.30%\nHeat Emitter\nRadiators\nHeat pump age\nUnknown\nFuel Type\nMains gas\nFlue Type\nBalanced\nFan Assisted Flue\nYes\nDesign flow temperature\nUnknown\nPCDF Heating Controls\n0 \nMain Heating Controls EES\nCBE\nMain Heating Controls Sap\nSAP code 2106, Programmer, room thermostat and TRVs\nPCDF Compensator\n0 \nPercentage of Heat\n100 %\n14.1 Main Heating2\nPCDF boiler Reference\n0 \nMain Heating EES Code\nMain Heating SAP Code\n0\nPercentage of Heat\n0 %\n14.1 Community Heating/Heat Network\nHeating Type\nNone\n14.2 Meters\nElectricity meter type\nSingle\nMain gas\nYes\nElectricity Smart Meter Present\nNo\nGas Smart Meter Present\nNo\n15.0 Water Heating\nWater Heating Code\nHWP\nWater Heating SapCode\n901\nWater Heating Fuel Type\nMains gas\n15.1 Hot Water Cylinder\nHot Water Cylinder Present\nNo\n15.2 Community Hot Water\nPCDF boiler Reference\n0\n16.0 Solar water heating\nSolar Water Heating\nNo\n17.0 Waste Water Heat Recovery System\nIs WWHRS present in the property?\nNo / Unknown\n1x.0 Baths and Showers\nTotal Number of Baths\n0\nNumber of Baths Connected\n0\nDescription\nType\nConnected\n1\nElectric shower\nNone\n18.0 Flue Gas Heat Recovery System\nPresent\nNo\n19.0 Photovoltaic Panel\nPhotovoltaic Panel\nNone\nExport capable meter\nNo\n20.0 Wind Turbine\nTerrain Type\nSuburban\nWind turbine present?\nNo\n22.0 Special Features\n21.0 Small-Scale Hydro\nElectricity generated [kWh/year]\n0.00\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\nRecommendations\nLoft insulation (Already installed)\nFlat roof insulation (Not applicable)\nRoom-in-roof insulation (Not applicable)\nCavity wall insulation (Already installed)\nSolid wall insulation (Not applicable)\nFloor insulation (suspended floor) (Recommended)\nHot water cylinder insulation (Not applicable)\nDraught proofing (Already installed)\nLow energy lighting (Already installed)\nCylinder thermostat (Not applicable)\nHeating controls for wet central heating system (Already installed)\nUpgrade boiler, same fuel (Already installed)\nChange heating to condensing gas condensing boiler (fuel switch) (Not applicable)\nFlue gas heat recovery in conjunction with new boiler (Not applicable)\nSolar water heating (SAP increase too small)\nHeat recovery system for mixer showers (Not applicable)\nDouble glazed windows (Already installed)\nInsulated doors (Already installed)\nSolar photovoltaic panels (Recommended)\nWind turbine (Not applicable)\nPV diverter (Not applicable)\nPV battery (Not applicable)\nWater heating controls (Not applicable)\nAlternative Recommendations\nExternal wall insulation with cavity insulation (Not applicable)\nBiomass boiler (alternative) (Not applicable)\nMicro CHP (alternative) (Not applicable)\nRelated Party Disclosure\nAddenda\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n"
]

View file

@ -0,0 +1,6 @@
[
"Summary Information\nSurveyor:\nBW22-0001\nName:\nIan Marsh\nTitle:\nTel Number: 07709266472\nSurvey Reference:\n001233\nMy Reference:\nCurrent SAP rating:\nD 68\nPotential SAP rating: A 92\nEmissions (t/year):\n2.812 tonnes\nCurrent EI rating:\nD 68\nPotential EI rating:\nC 76\nFuel Bill:\n\u00a31098\nProperty Details:\nRdSAP version:\nRdSAP10\nReference Number:\nBW22-0001-001233\nMy Reference:\nLodgement Required:\nNo\nRegs Region:\nEngland\nEPC Language:\nEnglish\nUPRN:\nPostcode:\nBB11 2NU\nRegion:\nWest Pennines\nHouse Name:\nHouse No:\n39\nStreet:\nConstable Avenue\nLocality:\nTown:\nBURNLEY\nCounty:\nProperty Tenure:\nRented (social)\nTransaction Type:\nGrant scheme\nInspection Date:\n06/03/2026\nProcess date:\n06/03/2026\nCheck for the existence of\nan EPC:\nNo\nDoes an EPC exist at the\npoint of carrying out this\nenergy assessment:\nNo\nReason why another energy\nassessment needs to be\nundertaken:\nRdSAP Inputs\nProperty Description:\n1.0 Property type:\nH House\nS Semi-Detached\n2.0 Number of\nStoreys:\n2\nHabitable Rooms:\n4\nHeated Habitable Rooms:\n4\n3.0 Date Built:\nMain Property\nD 1950-1966\n4.0 Dimensions:\nDimension type:\nInternal\nMain Property\nFloor\nArea\n[m2]\nRoom\nHeight\n[m]\nHeat Loss\nWall Perimeter\n[m]\nParty Wall\nLength\n[m]\n1st Floor:\n35.88\n2.51\n17.46\n6.62\nLowest Floor:\n35.88\n2.67\n17.46\n6.62\nNo\n5.0 Conservatory:\nIs there a conservatory?\nNo\n7.0 Walls:\nMain Property\nType\nCA Cavity\nInsulation\nF Filled Cavity\nWall Thickness Unknown\nNo\nWall Thickness\n300 mm\nU-value Known\nNo\nParty Wall Type\nCU Cavity masonry unfilled\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\n8.0 Roofs:\nMain Property\nType\nPA Pitched (slates/tiles), access to loft\nInsulation\nJ Joists\nInsulation Thickness\n200 mm\nU-value Known\nNo\n8.1 Rooms in Roof:\n9.0 Floors:\nMain Property\nLocation\nG Ground floor\nType\nT Suspended timber\nInsulation\nA As built\nDefault U-value\n0.72\nU-value Known\nNo\n10.0 Doors:\nTotal Number of Doors\n2\nNumber of Insulated Doors\n0\n11.0 Windows:\nW\nH\nArea Glazing Type\nFrame \nType\nFrame \nFactor\nGlazing \nGap\nBuilding \nPart\nLocation\nOrient. Data-Source\nU \nvalue\ng \nvalue\nDraught \nProofed\nPermanent \nShutters\n1.59\n1.36\n2.16\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nEast\nManufacturer\n2.70\n0.76\nYes\nNone\n1.27\n0.43\n0.55\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nEast\nManufacturer\n2.70\n0.76\nYes\nNone\n1.54\n1.06\n1.63\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nEast\nManufacturer\n2.70\n0.76\nYes\nNone\n0.61\n1.07\n0.65\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nSouth\nManufacturer\n2.70\n0.76\nYes\nNone\n1.07\n1.05\n1.12\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\n1.07\n1.08\n1.16\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\n1.10\n1.06\n1.17\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\n1.12\n1.06\n1.19\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\nDraught Proofing\n90 %\n12.0 Ventilation & Cooling\nNo. of open chimneys\n0\nNo. of open flues\n0\nNo. of open chimneys/open flues attached to closed fire\n0\nNo. of flues attached to solid fuel boiler\n0\nNo. of open flues attached to other heater\n0\nNo. of blocked chimneys\n0\nNo. of intermittent extract fans\n2\nNo. of passive vents\n2\nNo. of flueless gas fires\n0\nFixed Space Cooling\nNo\nDraught Lobby\nNot present\n12.1 Mechanical Ventilation\nMechanical Ventilation\nNo\n12.2 Air Pressure Test\nTest Method\nNot available\n13.0 Lighting\nTotal number of bulbs\n10\nNumber of LED and CFL Known\nNo\nTotal number of Low Energy\n5\nTotal number of incandescents\n5\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\n14.0 Main Heating1\nPCDF boiler Reference\n18737 Baxi, ASSURE, 88.40%\nHeat Emitter\nRadiators\nHeat pump age\nUnknown\nFuel Type\nMains gas\nFlue Type\nBalanced\nFan Assisted Flue\nYes\nDesign flow temperature\nUnknown\nPCDF Heating Controls\n0 \nMain Heating Controls EES\nCBE\nMain Heating Controls Sap\nSAP code 2106, Programmer, room thermostat and TRVs\nPCDF Compensator\n0 \nPercentage of Heat\n100 %\n14.1 Main Heating2\nPCDF boiler Reference\n0 \nMain Heating EES Code\nMain Heating SAP Code\n0\nPercentage of Heat\n0 %\n14.1 Community Heating/Heat Network\nHeating Type\nNone\n14.2 Meters\nElectricity meter type\nSingle\nMain gas\nYes\nElectricity Smart Meter Present\nNo\nGas Smart Meter Present\nNo\n15.0 Water Heating\nWater Heating Code\nHWP\nWater Heating SapCode\n901\nWater Heating Fuel Type\nMains gas\n15.1 Hot Water Cylinder\nHot Water Cylinder Present\nNo\n15.2 Community Hot Water\nPCDF boiler Reference\n0\n16.0 Solar water heating\nSolar Water Heating\nNo\n17.0 Waste Water Heat Recovery System\nIs WWHRS present in the property?\nNo / Unknown\n1x.0 Baths and Showers\nTotal Number of Baths\n1\nNumber of Baths Connected\n0\nDescription\nType\nConnected\n1\nNon-electric shower\nNone\n18.0 Flue Gas Heat Recovery System\nPresent\nNo\n19.0 Photovoltaic Panel\nPhotovoltaic Panel\nNone\nExport capable meter\nNo\n20.0 Wind Turbine\nTerrain Type\nRural\nWind turbine present?\nNo\n22.0 Special Features\n21.0 Small-Scale Hydro\nElectricity generated [kWh/year]\n0.00\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
"Summary Information\nRecommendations\nLoft insulation (Already installed)\nFlat roof insulation (Not applicable)\nRoom-in-roof insulation (Not applicable)\nCavity wall insulation (Already installed)\nSolid wall insulation (Not applicable)\nFloor insulation (suspended floor) (Recommended)\nHot water cylinder insulation (Not applicable)\nDraught proofing (SAP increase too small)\nLow energy lighting (Recommended)\nCylinder thermostat (Not applicable)\nHeating controls for wet central heating system (Already installed)\nUpgrade boiler, same fuel (Already installed)\nChange heating to condensing gas condensing boiler (fuel switch) (Not applicable)\nFlue gas heat recovery in conjunction with new boiler (Not applicable)\nSolar water heating (SAP increase too small)\nHeat recovery system for mixer showers (SAP increase too small)\nDouble glazed windows (Already installed)\nInsulated doors (SAP increase too small)\nSolar photovoltaic panels (Recommended)\nWind turbine (Recommended)\nPV diverter (Not applicable)\nPV battery (Not applicable)\nWater heating controls (Not applicable)\nAlternative Recommendations\nExternal wall insulation with cavity insulation (Not applicable)\nBiomass boiler (alternative) (Not applicable)\nMicro CHP (alternative) (Not applicable)\nRelated Party Disclosure\nAddenda\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n"
]

View file

@ -0,0 +1,670 @@
[
"SMART EPC: Record of",
"Inspection & Site Notes",
"Inspection Surveyor:",
"Dave Elliott",
"E-Mail Address:",
"davejohns36@icloud.com",
"Report Reference:",
"Not Applicable",
"Created On:",
"12 September 2025",
"Date of Inspection:",
"08 September 2025",
"Property Address:",
"Flat 3,",
"29 Watcombe Circus,",
"NOTTINGHAM,",
"NG5 2DU",
"Page 1",
"",
"Photo of electricity meter:",
"Single Smart Meter",
"RdSAP Assessment",
"General",
"Confirm you have checked for the existence of an",
"EPC before carrying out another energy assessment.",
"Yes",
"Does an EPC exist at the point of carrying out this",
"energy assessment?",
"No",
"Inspection Date:",
"08/09/2025",
"Transaction Type:",
"None of the Above",
"Tenure:",
"Rented Social",
"Type of Property:",
"Maisonette",
"Detachment Type:",
"Semi-Detached",
"Flat Type:",
"Mid-floor",
"Flat Location:",
"3",
"Corridor Type:",
"Unheated Corridor",
"Unheated corridor wall length:",
"6.59 m",
"Number of storeys:",
"2 Storeys",
"Terrain Type:",
"Suburban",
"Number of Extensions:",
"2 Extensions",
"Is an electricity smart meter present?",
"Yes",
"Electric meter type:",
"Single",
"Is the dwelling export-capable?",
"No",
"Is mains gas available?",
"Yes",
"Is there a gas smart meter?",
"No",
"Is the gas meter accessible?",
"Yes",
"Page 2",
"",
"Photo of Gas Meter:",
"Gas Meter",
"External indicators of Solid Brick construction:",
"Brick Pattern",
"Select Measurements Location:",
"Internal",
"Building Construction",
"Main Building",
"Age Range:",
"1900-1929",
"Record indicators of property age:",
"Property checker",
"Walls - Construction Type:",
"Solid brick",
"Record external indicators of Solid Brick",
"Construction:",
"consistent with build age",
"Walls - Insulation Type:",
"As built",
"Thermal conductivity of wall insulation:",
"Unknown",
"Wall U-Value known?",
"Not Known",
"Wall thickness:",
"280 mm",
"Page 3",
"",
"Photo wall thickness:",
"Wall Measurements",
"Wall Dry-Lined?",
"No",
"Party wall construction type:",
"Solid Masonry, Timber Frame, or System Built",
"Floor type:",
"Other dwelling below",
"Extension 1",
"Age Range:",
"1900-1929",
"Record indicators of property age:",
"Property checker",
"Walls - Construction Type:",
"Solid brick",
"Record external indicators of Solid Brick",
"Construction:",
"headers and stretchers in brick bond",
"Walls - Insulation Type:",
"As built",
"Thermal conductivity of wall insulation:",
"Unknown",
"Wall U-Value known?",
"Not Known",
"Wall thickness:",
"280 mm",
"Wall Dry-Lined?",
"Yes",
"Party wall construction type:",
"Solid Masonry, Timber Frame, or System Built",
"Floor type:",
"Other dwelling below",
"Extension 2",
"Age Range:",
"1900-1929",
"Record indicators of property age:",
"Property checker",
"Walls - Construction Type:",
"Solid brick",
"Record external indicators of Solid Brick",
"Construction:",
"headers and stretchers in brick bond",
"Walls - Insulation Type:",
"As built",
"Thermal conductivity of wall insulation:",
"Unknown",
"Page 4",
"",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Wall U-Value known?",
"Not Known",
"Wall thickness:",
"280 mm",
"Wall Dry-Lined?",
"Yes",
"Party wall construction type:",
"Solid Masonry, Timber Frame, or System Built",
"Floor type:",
"Other dwelling below",
"Building Measurements",
"Area (m2)",
"Height (m)",
"Heat Loss Perimeter (m)",
"PWL (m)",
"Main Building",
"Floor 1",
"39.5",
"3.58",
"11.02",
"15.21",
"Floor 0",
"23.06",
"2.87",
"11.72",
"10.8",
"Extension 1",
"Floor 1",
"3.43",
"3.58",
"4.97",
"1",
"Floor 0",
"3.43",
"2.87",
"4.97",
"1",
"Extension 2",
"Floor 0",
"1.81",
"3.58",
"4.96",
"1",
"Roof Space",
"Main Building",
"Roofs - Construction Type:",
"Pitched roof (Slates or tiles), Access to loft",
"Roofs - Insulation At:",
"Joists",
"Roof U-Value:",
"Not Known",
"Roofs - Insulation Thickness:",
"225 mm",
"Page 5",
"",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Loft insulation:",
"Loft",
"Page 6",
"",
"Loft insulation:",
"Loft",
"Indicators of Solid Brick Wall Construction in roof space:",
"solid wall construction visible to gables",
"Record indicators of Solid Brick Wall Construction in",
"roof space:",
"solid wall construction visible to gables",
"Extension 1",
"Roofs - Construction Type:",
"Flat",
"Roofs - Insulation At:",
"Unknown",
"Record indicators of Solid Brick Wall Construction in",
"roof space:",
"solid wall construction visible at eaves",
"Extension 2",
"Roofs - Construction Type:",
"Flat",
"Roofs - Insulation At:",
"Unknown",
"Record indicators of Solid Brick Wall Construction in",
"roof space:",
"Couldn\u2019t enter",
"Page 7",
"",
"Alternative Wall",
"Main Building",
"Alternative Wall 1",
"Construction type:",
"Solid brick",
"Record external indicators of Solid Brick",
"Construction:",
"consistent with building age, no visible cavity trays",
"Insulation Type:",
"As Built",
"Sheltered wall?",
"Yes",
"Thermal conductivity of wall insulation:",
"Unknown",
"Wall thickness:",
"280 mm",
"Wall Dry-Lined?",
"Yes",
"Windows",
"Window 1",
"Window location:",
"Main Building",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.2 m",
"Window width:",
"0.8 m",
"Orientation:",
"South West",
"Window 2",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.65 m",
"Window width:",
"0.52 m",
"Orientation:",
"East",
"Page 8",
"",
"Window 3",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.95 m",
"Window width:",
"0.86 m",
"Orientation:",
"East",
"Window 4",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.76 m",
"Window width:",
"0.65 m",
"Orientation:",
"North",
"Window 5",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.68 m",
"Window width:",
"0.68 m",
"Orientation:",
"East",
"Page 9",
"",
"Window 6",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.84 m",
"Window width:",
"1.18 m",
"Orientation:",
"North East",
"Window 7",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.76 m",
"Window width:",
"0.65 m",
"Orientation:",
"North",
"Window 8",
"Window location:",
"Extension 2",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.82 m",
"Window width:",
"0.84 m",
"Orientation:",
"South East",
"Page 10",
"",
"Window 9",
"Window location:",
"Extension 2",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.65 m",
"Window width:",
"0.5 m",
"Orientation:",
"South",
"Window 10",
"Window location:",
"Extension 2",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.71 m",
"Window width:",
"0.47 m",
"Orientation:",
"East",
"Window 11",
"Window location:",
"Extension 2",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.2 m",
"Window width:",
"0.8 m",
"Orientation:",
"South West",
"Page 11",
"",
"Heating & Hot Water",
"Main Heating Systems",
"Main Heating 1",
"How would you like to select the Heating System?",
"PCDF Search",
"System type:",
"Boiler with radiators or underfloor heating",
"Product Id",
"15030",
"Manufacturer",
"Baxi",
"Model",
"Duo-tec Combi",
"Orig Manuf",
"Baxi Heating",
"Fuel",
"Mains gas",
"S. Efficiency",
"0",
"Type",
"Combi",
"Condensing",
"Yes",
"Year",
"2006 - 2008",
"Mount",
"Wall",
"Open Flue",
"Room-sealed",
"Fan Assist",
"Yes",
"Status",
"Normal status for an actual product",
"Central heating pump age:",
"Unknown",
"Controls:",
"Programmer, room thermostat and TRVs",
"Does the boiler have a Flue Gas Heat Recover",
"System (FGHRS)?",
"No",
"Is there a weather compensator?",
"No",
"Emitter:",
"Radiators",
"Emitter Temperature:",
"Unknown",
"Secondary Heating System",
"Secondary Fuel",
"No Secondary Heating",
"Water Heating & Cylinder",
"Water Heating Type:",
"Regular",
"Water Heating System:",
"From main heating 1",
"Cylinder Size:",
"No Cylinder",
"Ventilation",
"Ventilation type:",
"Natural",
"Has fixed air conditioning?",
"No",
"Number of open flues:",
"0",
"Number of closed flues:",
"0",
"Number of boiler flues:",
"1",
"Page 12",
"",
"Number of other flues:",
"0",
"Number of extract fans:",
"2",
"Number of passive vents:",
"1",
"Number of flueless gas fires:",
"0",
"Pressure test:",
"No test",
"Is there a draught lobby?",
"Yes",
"Conservatories",
"Is there conservatory?",
"No conservatory",
"Renewables",
"Wind Turbines",
"Has wind turbines?",
"No",
"Solar hot water",
"Has solar hot water?",
"No",
"Photovoltaics",
"Has photovoltaic array?",
"No",
"Number of PV batteries:",
"None",
"Hydro",
"Is the dwelling connected to Hydro?",
"No",
"Room Count Elements",
"Number of habitable rooms?",
"3",
"Are any of these rooms unheated?",
"No",
"Number of external doors?",
"1",
"Number of insulated external doors?",
"0",
"Number of draughtproofed external doors?",
"1",
"Number of open chimneys?",
"0",
"Number of blocked chimneys?",
"0",
"Number of fixed incandescent bulbs:",
"7",
"Is the exact number of LED and CFL bulbs known?",
"Yes",
"Number of fixed LED bulbs:",
"7",
"Number of fixed CFL bulbs:",
"0",
"Are there any waste water heat recovery systems?",
"None",
"Number of baths:",
"1",
"How many special features are there at the",
"property?",
"0",
"Customer Response",
"Customer present?",
"Yes",
"Page 13",
"",
"Customer willing to answer satisfaction survey?",
"No",
"Addendum + Related Party Disclosure",
"Addendum",
"None",
"Related party disclosure",
"No related party",
"Photographs Required",
"Page 14",
""
]

View file

@ -0,0 +1,356 @@
import json
import os
from datetime import date
import pytest
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
FIXTURE_PATH = os.path.join(
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json"
)
FIXTURE_PATH_2 = os.path.join(
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_2_text.json"
)
@pytest.fixture(scope="module")
def result() -> EpcPropertyData:
with open(FIXTURE_PATH) as f:
pages = json.load(f)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
@pytest.fixture(scope="module")
def result2() -> EpcPropertyData:
with open(FIXTURE_PATH_2) as f:
pages = json.load(f)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
class TestAddress:
def test_address_line_1(self, result: EpcPropertyData) -> None:
assert result.address_line_1 == "19, Queens Road"
def test_post_town(self, result: EpcPropertyData) -> None:
assert result.post_town == "BURNLEY"
def test_postcode(self, result: EpcPropertyData) -> None:
assert result.postcode == "BB10 1XX"
class TestInspectionInfo:
def test_inspection_date(self, result: EpcPropertyData) -> None:
assert result.inspection_date == date(2026, 3, 6)
def test_tenure(self, result: EpcPropertyData) -> None:
assert result.tenure == "Rented (social)"
def test_transaction_type(self, result: EpcPropertyData) -> None:
assert result.transaction_type == "Grant scheme"
def test_report_reference(self, result: EpcPropertyData) -> None:
assert result.report_reference == "P960-0001-001573"
class TestPropertyDescription:
def test_property_type(self, result: EpcPropertyData) -> None:
assert result.property_type == "Bungalow"
def test_built_form(self, result: EpcPropertyData) -> None:
assert result.built_form == "End-Terrace"
def test_dwelling_type(self, result: EpcPropertyData) -> None:
assert result.dwelling_type == "End-Terrace bungalow"
def test_number_of_storeys(self, result: EpcPropertyData) -> None:
assert result.number_of_storeys == 1
def test_has_conservatory(self, result: EpcPropertyData) -> None:
assert result.has_conservatory is False
def test_total_floor_area(self, result: EpcPropertyData) -> None:
assert result.total_floor_area_m2 == 44.89
class TestCounts:
def test_habitable_rooms_count(self, result: EpcPropertyData) -> None:
assert result.habitable_rooms_count == 2
def test_heated_rooms_count(self, result: EpcPropertyData) -> None:
assert result.heated_rooms_count == 2
def test_door_count(self, result: EpcPropertyData) -> None:
assert result.door_count == 0
def test_insulated_door_count(self, result: EpcPropertyData) -> None:
assert result.insulated_door_count == 0
def test_open_chimneys_count(self, result: EpcPropertyData) -> None:
assert result.open_chimneys_count == 0
def test_blocked_chimneys_count(self, result: EpcPropertyData) -> None:
assert result.blocked_chimneys_count == 0
class TestLighting:
def test_led_count(self, result: EpcPropertyData) -> None:
assert result.led_fixed_lighting_bulbs_count == 4
def test_cfl_count(self, result: EpcPropertyData) -> None:
assert result.cfl_fixed_lighting_bulbs_count == 4
def test_incandescent_count(self, result: EpcPropertyData) -> None:
assert result.incandescent_fixed_lighting_bulbs_count == 0
class TestFlags:
def test_solar_water_heating(self, result: EpcPropertyData) -> None:
assert result.solar_water_heating is False
def test_has_hot_water_cylinder(self, result: EpcPropertyData) -> None:
assert result.has_hot_water_cylinder is False
def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
assert result.has_fixed_air_conditioning is False
def test_hydro(self, result: EpcPropertyData) -> None:
assert result.hydro is False
def test_photovoltaic_array(self, result: EpcPropertyData) -> None:
assert result.photovoltaic_array is False
class TestBuildingPart:
def test_single_building_part(self, result: EpcPropertyData) -> None:
assert len(result.sap_building_parts) == 1
def test_identifier(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].identifier == "main"
def test_construction_age_band(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].construction_age_band == "1950-1966"
def test_wall_construction(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_construction == "Cavity"
def test_wall_insulation_type(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_insulation_type == "Filled Cavity"
def test_wall_thickness_measured(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_thickness_measured is True
def test_wall_thickness_mm(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_thickness_mm == 300
def test_roof_insulation_location(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].roof_insulation_location == "Joists"
def test_roof_insulation_thickness(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].roof_insulation_thickness == 270
def test_floor_type(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].floor_type == "Ground floor"
def test_floor_construction_type(self, result: EpcPropertyData) -> None:
assert (
result.sap_building_parts[0].floor_construction_type
== "Suspended, not timber"
)
def test_floor_insulation_type_str(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].floor_insulation_type_str == "As built"
def test_floor_u_value_known(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].floor_u_value_known is False
def test_single_floor_dimension(self, result: EpcPropertyData) -> None:
assert len(result.sap_building_parts[0].sap_floor_dimensions) == 1
def test_floor_dimension_area(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].sap_floor_dimensions[0].total_floor_area_m2 == 44.89
def test_floor_dimension_room_height(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].sap_floor_dimensions[0].room_height_m == 2.24
def test_floor_dimension_heat_loss_perimeter(self, result: EpcPropertyData) -> None:
assert (
result.sap_building_parts[0].sap_floor_dimensions[0].heat_loss_perimeter_m
== 20.10
)
def test_floor_dimension_party_wall_length(self, result: EpcPropertyData) -> None:
assert (
result.sap_building_parts[0].sap_floor_dimensions[0].party_wall_length_m
== 6.70
)
class TestWindows:
def test_window_count(self, result: EpcPropertyData) -> None:
assert len(result.sap_windows) == 4
def test_first_window_width(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_width == 1.30
def test_first_window_height(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_height == 1.10
def test_first_window_orientation(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].orientation == "North"
def test_first_window_glazing_type(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].glazing_type == "Double post or during 2022"
def test_first_window_draught_proofed(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].draught_proofed is True
def test_third_window_orientation(self, result: EpcPropertyData) -> None:
assert result.sap_windows[2].orientation == "South"
def test_frame_factor(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].frame_factor == 0.7
def test_transmission_u_value(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_transmission_details is not None
assert result.sap_windows[0].window_transmission_details.u_value == 1.4
def test_transmission_solar_transmittance(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_transmission_details is not None
assert result.sap_windows[0].window_transmission_details.solar_transmittance == 0.72
def test_transmission_data_source(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_transmission_details is not None
assert result.sap_windows[0].window_transmission_details.data_source == "Manufacturer"
class TestHeating:
def test_single_heating_detail(self, result: EpcPropertyData) -> None:
assert len(result.sap_heating.main_heating_details) == 1
def test_fuel_type(self, result: EpcPropertyData) -> None:
assert result.sap_heating.main_heating_details[0].main_fuel_type == "Mains gas"
def test_heat_emitter_type(self, result: EpcPropertyData) -> None:
assert (
result.sap_heating.main_heating_details[0].heat_emitter_type == "Radiators"
)
def test_emitter_temperature(self, result: EpcPropertyData) -> None:
assert (
result.sap_heating.main_heating_details[0].emitter_temperature == "Unknown"
)
def test_fan_flue_present(self, result: EpcPropertyData) -> None:
assert result.sap_heating.main_heating_details[0].fan_flue_present is True
def test_has_fghrs(self, result: EpcPropertyData) -> None:
assert result.sap_heating.main_heating_details[0].has_fghrs is False
def test_main_heating_control(self, result: EpcPropertyData) -> None:
assert (
result.sap_heating.main_heating_details[0].main_heating_control
== "Programmer, room thermostat and TRVs"
)
def test_shower_outlet_type(self, result: EpcPropertyData) -> None:
assert result.sap_heating.shower_outlets is not None
assert (
result.sap_heating.shower_outlets.shower_outlet.shower_outlet_type
== "Electric shower"
)
def test_no_hot_water_cylinder_size(self, result: EpcPropertyData) -> None:
assert result.sap_heating.cylinder_size is None
def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
assert result.sap_heating.has_fixed_air_conditioning is False
def test_water_heating_code(self, result: EpcPropertyData) -> None:
assert result.sap_heating.water_heating_code == 901
class TestEnergySource:
def test_mains_gas(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.mains_gas is True
def test_meter_type(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.meter_type == "Single"
def test_electricity_smart_meter(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.electricity_smart_meter_present is False
def test_gas_smart_meter(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.gas_smart_meter_present is False
def test_wind_turbines_count(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.wind_turbines_count == 0
def test_wind_turbines_terrain_type(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.wind_turbines_terrain_type == "Suburban"
def test_pv_battery_count(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.pv_battery_count == 0
class TestVentilation:
def test_draught_lobby(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.draught_lobby is False
def test_pressure_test(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.pressure_test == "Not available"
def test_extract_fans_count(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.extract_fans_count == 2
def test_open_flues_count(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.open_flues_count == 0
class TestDraughtproofingAndWater:
def test_percent_draughtproofed(self, result: EpcPropertyData) -> None:
assert result.percent_draughtproofed == 100
def test_waste_water_heat_recovery_absent(self, result: EpcPropertyData) -> None:
assert result.waste_water_heat_recovery == "None"
def test_any_unheated_rooms_false(self, result: EpcPropertyData) -> None:
assert result.any_unheated_rooms is False
class TestEnergyPerformance:
def test_energy_rating_current(self, result: EpcPropertyData) -> None:
assert result.energy_rating_current == 69
def test_energy_rating_potential(self, result: EpcPropertyData) -> None:
assert result.energy_rating_potential == 77
def test_environmental_impact_current(self, result: EpcPropertyData) -> None:
assert result.environmental_impact_current == 76
def test_environmental_impact_potential(self, result: EpcPropertyData) -> None:
assert result.environmental_impact_potential == 81
def test_co2_emissions_current(self, result: EpcPropertyData) -> None:
assert result.co2_emissions_current == 1.683
class TestWindowFrameMaterial:
def test_frame_material_from_elmhurst(self, result2: EpcPropertyData) -> None:
assert result2.sap_windows[0].frame_material == "PVC"
def test_glazing_gap_from_elmhurst(self, result2: EpcPropertyData) -> None:
assert result2.sap_windows[0].glazing_gap == "16 mm or more"
class TestLowEnergyLighting:
def test_low_energy_fixed_lighting_bulbs_count(self, result2: EpcPropertyData) -> None:
assert result2.low_energy_fixed_lighting_bulbs_count == 5

View file

@ -0,0 +1,515 @@
import json
import os
from datetime import date
import pytest
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.surveys.elmhurst_site_notes import (
BathsAndShowers,
BuildingPartDimensions,
ElmhurstSiteNotes,
FloorDetails,
FloorDimension,
Lighting,
MainHeating,
Meters,
PropertyDetails,
Renewables,
RoofDetails,
Shower,
SurveyorInfo,
VentilationAndCooling,
WallDetails,
WaterHeating,
Window,
)
FIXTURE_PATH = os.path.join(
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json"
)
FIXTURE_PATH_2 = os.path.join(
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_2_text.json"
)
@pytest.fixture(scope="module")
def result() -> ElmhurstSiteNotes:
with open(FIXTURE_PATH) as f:
pages = json.load(f)
return ElmhurstSiteNotesExtractor(pages).extract()
@pytest.fixture(scope="module")
def result2() -> ElmhurstSiteNotes:
with open(FIXTURE_PATH_2) as f:
pages = json.load(f)
return ElmhurstSiteNotesExtractor(pages).extract()
class TestSurveyorInfo:
def test_surveyor_code(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.surveyor_code == "P960-0001"
def test_name(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.name == "Richard Matthew Ratcliff"
def test_title(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.title == "Mr."
def test_tel_number(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.tel_number == "07760 443 469"
def test_survey_reference(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.survey_reference == "001573"
def test_my_reference_none(self, result: ElmhurstSiteNotes) -> None:
assert result.surveyor_info.my_reference is None
class TestPropertyDetails:
def test_rdsap_version(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.rdsap_version == "RdSAP10"
def test_reference_number(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.reference_number == "P960-0001-001573"
def test_lodgement_required(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.lodgement_required is False
def test_regs_region(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.regs_region == "England"
def test_epc_language(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.epc_language == "English"
def test_uprn_none(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.uprn is None
def test_postcode(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.postcode == "BB10 1XX"
def test_region(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.region == "West Pennines"
def test_house_name_none(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.house_name is None
def test_house_number(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.house_number == "19"
def test_street(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.street == "Queens Road"
def test_locality_none(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.locality is None
def test_town(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.town == "BURNLEY"
def test_county_none(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.county is None
def test_tenure(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.tenure == "Rented (social)"
def test_transaction_type(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.transaction_type == "Grant scheme"
def test_inspection_date(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.inspection_date == date(2026, 3, 6)
def test_process_date(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.process_date == date(2026, 3, 6)
def test_epc_exists(self, result: ElmhurstSiteNotes) -> None:
assert result.property_details.epc_exists is False
class TestPropertyDescription:
def test_property_type(self, result: ElmhurstSiteNotes) -> None:
assert result.property_type == "B Bungalow"
def test_attachment(self, result: ElmhurstSiteNotes) -> None:
assert result.attachment == "E End-Terrace"
def test_number_of_storeys(self, result: ElmhurstSiteNotes) -> None:
assert result.number_of_storeys == 1
def test_habitable_rooms(self, result: ElmhurstSiteNotes) -> None:
assert result.habitable_rooms == 2
def test_heated_habitable_rooms(self, result: ElmhurstSiteNotes) -> None:
assert result.heated_habitable_rooms == 2
def test_construction_age_band(self, result: ElmhurstSiteNotes) -> None:
assert result.construction_age_band == "D 1950-1966"
def test_has_conservatory(self, result: ElmhurstSiteNotes) -> None:
assert result.has_conservatory is False
class TestDimensions:
def test_dimension_type(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.dimension_type == "Internal"
def test_floor_count(self, result: ElmhurstSiteNotes) -> None:
assert len(result.dimensions.floors) == 1
def test_floor_name(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.floors[0].name == "Lowest Floor"
def test_floor_area(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.floors[0].area_m2 == 44.89
def test_floor_room_height(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.floors[0].room_height_m == 2.24
def test_floor_heat_loss_perimeter(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.floors[0].heat_loss_perimeter_m == 20.10
def test_floor_party_wall_length(self, result: ElmhurstSiteNotes) -> None:
assert result.dimensions.floors[0].party_wall_length_m == 6.70
class TestWalls:
def test_wall_type(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.wall_type == "CA Cavity"
def test_insulation(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.insulation == "F Filled Cavity"
def test_thickness_unknown(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.thickness_unknown is False
def test_thickness_mm(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.thickness_mm == 300
def test_u_value_known(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.u_value_known is False
def test_party_wall_type(self, result: ElmhurstSiteNotes) -> None:
assert result.walls.party_wall_type == "U Unable to determine"
class TestRoof:
def test_roof_type(self, result: ElmhurstSiteNotes) -> None:
assert result.roof.roof_type == "PA Pitched (slates/tiles), access to loft"
def test_insulation(self, result: ElmhurstSiteNotes) -> None:
assert result.roof.insulation == "J Joists"
def test_insulation_thickness_mm(self, result: ElmhurstSiteNotes) -> None:
assert result.roof.insulation_thickness_mm == 270
def test_u_value_known(self, result: ElmhurstSiteNotes) -> None:
assert result.roof.u_value_known is False
class TestFloor:
def test_location(self, result: ElmhurstSiteNotes) -> None:
assert result.floor.location == "G Ground floor"
def test_floor_type(self, result: ElmhurstSiteNotes) -> None:
assert result.floor.floor_type == "N Suspended, not timber"
def test_insulation(self, result: ElmhurstSiteNotes) -> None:
assert result.floor.insulation == "A As built"
def test_default_u_value(self, result: ElmhurstSiteNotes) -> None:
assert result.floor.default_u_value == 0.69
def test_u_value_known(self, result: ElmhurstSiteNotes) -> None:
assert result.floor.u_value_known is False
class TestDoors:
def test_door_count(self, result: ElmhurstSiteNotes) -> None:
assert result.door_count == 0
def test_insulated_door_count(self, result: ElmhurstSiteNotes) -> None:
assert result.insulated_door_count == 0
class TestWindows:
def test_window_count(self, result: ElmhurstSiteNotes) -> None:
assert len(result.windows) == 4
def test_draught_proofing_percent(self, result: ElmhurstSiteNotes) -> None:
assert result.draught_proofing_percent == 100
def test_first_window_dimensions(self, result: ElmhurstSiteNotes) -> None:
w = result.windows[0]
assert w.width_m == 1.30
assert w.height_m == 1.10
assert w.area_m2 == 1.43
def test_first_window_glazing(self, result: ElmhurstSiteNotes) -> None:
w = result.windows[0]
assert w.glazing_type == "Double post or during 2022"
assert w.frame_factor == 0.70
def test_first_window_location(self, result: ElmhurstSiteNotes) -> None:
w = result.windows[0]
assert w.building_part == "Main"
assert w.location == "External wall"
assert w.orientation == "North"
def test_first_window_performance(self, result: ElmhurstSiteNotes) -> None:
w = result.windows[0]
assert w.data_source == "Manufacturer"
assert w.u_value == 1.40
assert w.g_value == 0.72
assert w.draught_proofed is True
assert w.permanent_shutters == "None"
def test_third_window_orientation(self, result: ElmhurstSiteNotes) -> None:
assert result.windows[2].orientation == "South"
def test_fourth_window_dimensions(self, result: ElmhurstSiteNotes) -> None:
w = result.windows[3]
assert w.width_m == 0.70
assert w.height_m == 1.30
assert w.area_m2 == 0.91
class TestVentilation:
def test_open_chimneys(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.open_chimneys_count == 0
def test_open_flues(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.open_flues_count == 0
def test_open_chimneys_closed_fire(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.open_chimneys_closed_fire_count == 0
def test_solid_fuel_boiler_flues(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.solid_fuel_boiler_flues_count == 0
def test_other_heater_flues(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.other_heater_flues_count == 0
def test_blocked_chimneys(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.blocked_chimneys_count == 0
def test_extract_fans(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.extract_fans_count == 2
def test_passive_vents(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.passive_vents_count == 0
def test_flueless_gas_fires(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.flueless_gas_fires_count == 0
def test_fixed_space_cooling(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.fixed_space_cooling is False
def test_draught_lobby(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.draught_lobby == "Not present"
def test_mechanical_ventilation(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.mechanical_ventilation is False
def test_pressure_test_method(self, result: ElmhurstSiteNotes) -> None:
assert result.ventilation.pressure_test_method == "Not available"
class TestLighting:
def test_total_bulbs(self, result: ElmhurstSiteNotes) -> None:
assert result.lighting.total_bulbs == 8
def test_led_cfl_count_known(self, result: ElmhurstSiteNotes) -> None:
assert result.lighting.led_cfl_count_known is True
def test_led_count(self, result: ElmhurstSiteNotes) -> None:
assert result.lighting.led_count == 4
def test_cfl_count(self, result: ElmhurstSiteNotes) -> None:
assert result.lighting.cfl_count == 4
def test_incandescent_count(self, result: ElmhurstSiteNotes) -> None:
assert result.lighting.incandescent_count == 0
class TestMainHeating:
def test_pcdf_boiler_reference(self, result: ElmhurstSiteNotes) -> None:
assert (
result.main_heating.pcdf_boiler_reference
== "17742 Potterton, Promax 33 Combi ErP, 88.30%"
)
def test_heat_emitter(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.heat_emitter == "Radiators"
def test_heat_pump_age(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.heat_pump_age == "Unknown"
def test_fuel_type(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.fuel_type == "Mains gas"
def test_flue_type(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.flue_type == "Balanced"
def test_fan_assisted_flue(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.fan_assisted_flue is True
def test_design_flow_temperature(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.design_flow_temperature == "Unknown"
def test_heating_controls_ees(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.heating_controls_ees == "CBE"
def test_heating_controls_sap(self, result: ElmhurstSiteNotes) -> None:
assert (
result.main_heating.heating_controls_sap
== "SAP code 2106, Programmer, room thermostat and TRVs"
)
def test_percentage_of_heat(self, result: ElmhurstSiteNotes) -> None:
assert result.main_heating.percentage_of_heat == 100
class TestMeters:
def test_electricity_meter_type(self, result: ElmhurstSiteNotes) -> None:
assert result.meters.electricity_meter_type == "Single"
def test_main_gas(self, result: ElmhurstSiteNotes) -> None:
assert result.meters.main_gas is True
def test_electricity_smart_meter(self, result: ElmhurstSiteNotes) -> None:
assert result.meters.electricity_smart_meter is False
def test_gas_smart_meter(self, result: ElmhurstSiteNotes) -> None:
assert result.meters.gas_smart_meter is False
class TestWaterHeating:
def test_water_heating_code(self, result: ElmhurstSiteNotes) -> None:
assert result.water_heating.water_heating_code == "HWP"
def test_water_heating_sap_code(self, result: ElmhurstSiteNotes) -> None:
assert result.water_heating.water_heating_sap_code == 901
def test_water_heating_fuel_type(self, result: ElmhurstSiteNotes) -> None:
assert result.water_heating.water_heating_fuel_type == "Mains gas"
def test_hot_water_cylinder_present(self, result: ElmhurstSiteNotes) -> None:
assert result.water_heating.hot_water_cylinder_present is False
class TestBathsAndShowers:
def test_number_of_baths(self, result: ElmhurstSiteNotes) -> None:
assert result.baths_and_showers.number_of_baths == 0
def test_number_of_baths_connected(self, result: ElmhurstSiteNotes) -> None:
assert result.baths_and_showers.number_of_baths_connected == 0
def test_shower_count(self, result: ElmhurstSiteNotes) -> None:
assert len(result.baths_and_showers.showers) == 1
def test_shower_number(self, result: ElmhurstSiteNotes) -> None:
assert result.baths_and_showers.showers[0].shower_number == 1
def test_shower_outlet_type(self, result: ElmhurstSiteNotes) -> None:
assert result.baths_and_showers.showers[0].outlet_type == "Electric shower"
def test_shower_connected(self, result: ElmhurstSiteNotes) -> None:
assert result.baths_and_showers.showers[0].connected == "None"
class TestRenewables:
def test_solar_water_heating(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.solar_water_heating is False
def test_wwhrs_present(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.wwhrs_present is False
def test_flue_gas_heat_recovery_present(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.flue_gas_heat_recovery_present is False
def test_photovoltaic_panel(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.photovoltaic_panel == "None"
def test_export_capable_meter(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.export_capable_meter is False
def test_wind_turbine_present(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.wind_turbine_present is False
def test_wind_turbines_terrain_type(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.wind_turbines_terrain_type == "Suburban"
def test_hydro_electricity_generated_kwh(self, result: ElmhurstSiteNotes) -> None:
assert result.renewables.hydro_electricity_generated_kwh == 0.0
class TestEnergyPerformance:
def test_current_sap_rating(self, result: ElmhurstSiteNotes) -> None:
assert result.current_sap_rating == 69
def test_potential_sap_rating(self, result: ElmhurstSiteNotes) -> None:
assert result.potential_sap_rating == 77
def test_current_ei_rating(self, result: ElmhurstSiteNotes) -> None:
assert result.current_ei_rating == 76
def test_potential_ei_rating(self, result: ElmhurstSiteNotes) -> None:
assert result.potential_ei_rating == 81
def test_co2_emissions_current_t(self, result: ElmhurstSiteNotes) -> None:
assert result.co2_emissions_current_t == 1.683
class TestWindowsWithFrameDetails:
def test_window_count(self, result2: ElmhurstSiteNotes) -> None:
assert len(result2.windows) == 8
def test_draught_proofing_percent(self, result2: ElmhurstSiteNotes) -> None:
assert result2.draught_proofing_percent == 90
def test_first_window_glazing_type_excludes_frame_type(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].glazing_type == "Double with unknown install date"
def test_first_window_frame_type(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].frame_type == "PVC"
def test_first_window_frame_factor(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].frame_factor == 0.70
def test_first_window_glazing_gap(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].glazing_gap == "16 mm or more"
def test_first_window_location(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].building_part == "Main"
assert result2.windows[0].location == "External wall"
assert result2.windows[0].orientation == "East"
def test_first_window_performance(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[0].data_source == "Manufacturer"
assert result2.windows[0].u_value == 2.70
assert result2.windows[0].g_value == 0.76
assert result2.windows[0].draught_proofed is True
assert result2.windows[0].permanent_shutters == "None"
def test_fourth_window_orientation(self, result2: ElmhurstSiteNotes) -> None:
assert result2.windows[3].orientation == "South"
class TestLightingLedCflUnknown:
def test_total_bulbs(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.total_bulbs == 10
def test_led_cfl_count_known_false(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.led_cfl_count_known is False
def test_low_energy_count(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.low_energy_count == 5
def test_incandescent_count(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.incandescent_count == 5
def test_led_count_zero_when_unknown(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.led_count == 0
def test_cfl_count_zero_when_unknown(self, result2: ElmhurstSiteNotes) -> None:
assert result2.lighting.cfl_count == 0

View file

@ -20,9 +20,9 @@ from datatypes.epc.domain.epc_property_data import (
)
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf")
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_1.pdf")
PDF_PATH_2 = os.path.join(
os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_2.pdf"
os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_2.pdf"
)
@ -71,7 +71,7 @@ class TestPdfToEpcPropertyData:
),
sap_windows=[
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -84,7 +84,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -97,7 +97,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
@ -110,7 +110,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North",
window_type="Window",
@ -123,7 +123,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
@ -136,7 +136,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -149,7 +149,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -162,7 +162,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
@ -302,7 +302,7 @@ class TestPdfToEpcPropertyDataFixture2:
PDF_PATH_3 = os.path.join(
os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_3.pdf"
os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_3.pdf"
)
@ -339,7 +339,7 @@ class TestPdfToEpcPropertyDataFixture3:
PDF_PATH_4 = os.path.join(
os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_4.pdf"
os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_4.pdf"
)
@ -369,7 +369,7 @@ class TestPdfToEpcPropertyDataFixture4:
PDF_PATH_5 = os.path.join(
os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_5.pdf"
os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_5.pdf"
)
@ -401,7 +401,7 @@ class TestPdfToEpcPropertyDataFixture5:
PDF_PATH_6 = os.path.join(
os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_6.pdf"
os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_6.pdf"
)

View file

@ -37,32 +37,37 @@ FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures")
def load_text_fixture() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_1_text.json")) as f:
return json.load(f)
def load_text_fixture_2() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_2_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_2_text.json")) as f:
return json.load(f)
def load_text_fixture_3() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_3_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_3_text.json")) as f:
return json.load(f)
def load_text_fixture_4() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_4_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_4_text.json")) as f:
return json.load(f)
def load_text_fixture_5() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_5_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_5_text.json")) as f:
return json.load(f)
def load_text_fixture_6() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_6_text.json")) as f:
with open(os.path.join(FIXTURES, "pashub_site_notes_6_text.json")) as f:
return json.load(f)
def load_text_fixture_7() -> list[str]:
with open(os.path.join(FIXTURES, "pashub_site_notes_7_text.json")) as f:
return json.load(f)
@ -785,6 +790,38 @@ class TestElectricShowerExtraction:
assert wu.showers[0].outlet_type == "Electric Shower"
# --- fixture 7: maisonette, 2 extensions, no property photo ---
class TestExtractNoPropertyPhoto:
def test_address_extracted_when_no_property_photo(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture_7()).extract()
assert result.inspection_metadata.property_address == "Flat 3, 29 Watcombe Circus, NOTTINGHAM, NG5 2DU"
assert result.inspection_metadata.property_photo is False
assert result.general.property_type == "Maisonette"
assert result.general.number_of_extensions == 2
class TestWallThicknessExtraction:
def _extractor(self) -> PasHubRdSapSiteNotesExtractor:
return PasHubRdSapSiteNotesExtractor([])
def test_numeric_value_returns_int(self) -> None:
assert self._extractor()._wall_thickness_in(["Wall thickness:", "310 mm"]) == 310
def test_unmeasurable_returns_none(self) -> None:
assert self._extractor()._wall_thickness_in(["Wall thickness:", "Unmeasurable"]) is None
def test_unmeasurable_lowercase_returns_none(self) -> None:
assert self._extractor()._wall_thickness_in(["Wall thickness:", "unmeasurable"]) is None
def test_unmeasurable_uppercase_returns_none(self) -> None:
assert self._extractor()._wall_thickness_in(["Wall thickness:", "UNMEASURABLE"]) is None
def test_missing_field_returns_none(self) -> None:
assert self._extractor()._wall_thickness_in([]) is None
class TestSolidMasonryPartyWall:
@pytest.fixture
def bc(self) -> BuildingConstruction:

View file

@ -5,8 +5,8 @@ import pytest
from backend.documents_parser.pdf import pdf_to_text_list
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf")
FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "site_notes_example_text.json")
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_1.pdf")
FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "pashub_site_notes_1_text.json")
@pytest.fixture

View file

@ -0,0 +1,257 @@
import os
from typing import Dict
from playwright.sync_api import Browser, BrowserContext, Locator, Page, sync_playwright
from backend.app.db.connection import db_session
from backend.app.db.functions.uploaded_files_functions import (
get_uploaded_file_by_listing_type_and_source,
)
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
from backend.ecmk_fetcher.address_list import (
PropertyRow,
extract_addresses_from_spreadsheet,
)
from backend.ecmk_fetcher.browser import (
attach_debug_listeners,
download_with_retry,
go_to_assessment_details,
go_to_assessments,
go_to_next_page,
login,
)
from backend.ecmk_fetcher.excel_writer import write_row
from backend.ecmk_fetcher.reports import (
REPORT_TYPES,
FileDownloadButtonType,
build_property_id,
map_report_type_to_db_file_type,
)
from backend.ecmk_fetcher.upload import (
upload_excel_to_sharepoint,
upload_file_to_s3_and_record,
upload_file_to_sharepoint,
)
from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
logger = setup_logger()
DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
class EcmkService:
def __init__(
self,
sharepoint_client: DomnaSharepointClient,
s3_bucket: str,
property_list_filepath: str,
sharepoint_base_path: str,
sharepoint_excel_path: str,
local_dimensions_path: str,
) -> None:
self._sharepoint_client = sharepoint_client
self._s3_bucket = s3_bucket
self._sharepoint_base_path = sharepoint_base_path
self._sharepoint_excel_path = sharepoint_excel_path
self._local_dimensions_path = local_dimensions_path
self._property_map: Dict[str, PropertyRow] = extract_addresses_from_spreadsheet(
property_list_filepath
)
def run(self) -> None:
self._sharepoint_client.download_file(
sharepoint_path=f"{self._sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
local_path=self._local_dimensions_path,
)
with sync_playwright() as p:
browser: Browser = p.chromium.launch(headless=True)
context: BrowserContext = browser.new_context()
page: Page = context.new_page()
try:
self._run_browser_session(page)
finally:
context.close()
browser.close()
def _run_browser_session(self, page: Page) -> None:
username: str = "" # TODO: get from github secrets
password: str = ""
attach_debug_listeners(page)
login(page, username, password)
go_to_assessments(page)
while True:
rows: Locator = page.locator("#assessmentDatatable tbody tr")
row_count: int = rows.count()
for i in range(row_count):
row: Locator = rows.nth(i)
try:
cells: Locator = row.locator("td")
first_name: str = cells.nth(1).inner_text().strip()
last_name: str = cells.nth(2).inner_text().strip()
address: str = cells.nth(5).inner_text().strip()
postcode: str = cells.nth(7).inner_text().strip()
status: str = cells.nth(9).inner_text().strip()
if first_name == "Oliver" and last_name == "Stephens":
continue
if status != "Submitted (not Lodged)":
continue
property_id: str = build_property_id(address, postcode)
property_row: PropertyRow | None = self._property_map.get(
property_id
)
if not property_row:
continue
logger.info(f"Match found for property {address}")
sharepoint_address: str = property_row.address
go_to_assessment_details(page, row)
for report_type in REPORT_TYPES:
hubspot_listing_id: str = property_row.listing_id
try:
db_file_type: FileTypeEnum = (
map_report_type_to_db_file_type(report_type)
)
except ValueError:
logger.error(
f"Unknown report type {report_type}, skipping file"
)
continue
if get_uploaded_file_by_listing_type_and_source(
hubspot_listing_id=int(hubspot_listing_id),
file_type=db_file_type,
file_source=FileSourceEnum.ECMK,
):
logger.debug("File already uploaded to s3, skipping")
continue
file_path: str | None = download_with_retry(page, report_type)
if not file_path:
continue
logger.info(
f"Successfully downloaded file {os.path.basename(file_path)} from ECMK"
)
try:
self._process_file(
file_path=file_path,
report_type=report_type,
db_file_type=db_file_type,
sharepoint_address=sharepoint_address,
hubspot_listing_id=hubspot_listing_id,
)
except Exception:
raise
finally:
if os.path.exists(file_path):
os.remove(file_path)
page.go_back()
page.wait_for_selector(
"#assessmentDatatable tbody tr", timeout=15000
)
except Exception as e:
raise Exception(f"Row processing failed: {str(e)}") from e
if not go_to_next_page(page):
break
def _process_file(
self,
file_path: str,
report_type: int,
db_file_type: FileTypeEnum,
sharepoint_address: str,
hubspot_listing_id: str,
) -> None:
if report_type == FileDownloadButtonType.RAW_XML.value:
self._process_xml_file(
file_path=file_path,
db_file_type=db_file_type,
hubspot_listing_id=hubspot_listing_id,
)
else:
self._process_pdf_file(
file_path=file_path,
file_type=db_file_type,
sharepoint_address=sharepoint_address,
hubspot_listing_id=hubspot_listing_id,
)
def _process_xml_file(
self,
file_path: str,
db_file_type: FileTypeEnum,
hubspot_listing_id: str,
) -> None:
with open(file_path, "r", encoding="utf-8") as f:
xml_string: str = f.read()
details = parse_rdsap(xml_string)
row_data = flatten_sap_property(details)
write_row(self._local_dimensions_path, row_data)
upload_excel_to_sharepoint(
client=self._sharepoint_client,
file_path=self._local_dimensions_path,
sharepoint_path=self._sharepoint_excel_path,
)
upload_file_to_s3_and_record(
bucket=self._s3_bucket,
file_path=file_path,
hubspot_listing_id=hubspot_listing_id,
file_type=db_file_type,
)
def _process_pdf_file(
self,
file_path: str,
file_type: FileTypeEnum,
sharepoint_address: str,
hubspot_listing_id: str,
) -> None:
upload_file_to_sharepoint(
client=self._sharepoint_client,
file_path=file_path,
base_path=self._sharepoint_base_path,
subpath=sharepoint_address,
)
uploaded_file_id: int = upload_file_to_s3_and_record(
bucket=self._s3_bucket,
file_path=file_path,
hubspot_listing_id=hubspot_listing_id,
file_type=file_type,
)
if file_type == FileTypeEnum.ECMK_RD_SAP_SITE_NOTE:
try:
epc_data = parse_site_notes_pdf(file_path)
with db_session() as session:
save_epc_property_data(
session=session,
data=epc_data,
uploaded_file_id=uploaded_file_id,
)
except Exception:
logger.warning(
f"EPC extraction failed for {os.path.basename(file_path)} — file record retained"
)

View file

@ -1,14 +1,32 @@
import os
from typing import Any, Mapping
from backend.ecmk_fetcher.processor import run_job
from backend.ecmk_fetcher.ecmk_service import EcmkService
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
logger = setup_logger()
_PROPERTY_LIST_FILE: str = (
"hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
)
_BASE_DIR: str = os.path.dirname(os.path.dirname(__file__))
def handler(event: Mapping[str, Any], context: Any) -> None:
logger.info("Entered handler")
run_job()
service = EcmkService(
sharepoint_client=DomnaSharepointClient(
sharepoint_location=DomnaSites.PRIVATE_PAY
),
s3_bucket="retrofit-energy-assessments-dev",
property_list_filepath=os.path.join(_BASE_DIR, _PROPERTY_LIST_FILE),
sharepoint_base_path="/Projects/Southern Housing/SH-SURV-26-001/Assessments",
sharepoint_excel_path="/Projects/Southern Housing/SH-SURV-26-001/Modelling",
local_dimensions_path=os.path.join(_BASE_DIR, "Dimensions.xlsx"),
)
service.run()
if __name__ == "__main__":

View file

@ -1,209 +0,0 @@
import os
from typing import Dict
from playwright.sync_api import (
sync_playwright,
Locator,
Page,
Browser,
BrowserContext,
)
from backend.app.db.functions.uploaded_files_functions import (
get_uploaded_file_by_listing_type_and_source,
)
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
from backend.ecmk_fetcher.address_list import (
PropertyRow,
extract_addresses_from_spreadsheet,
)
from backend.ecmk_fetcher.browser import (
attach_debug_listeners,
download_with_retry,
go_to_assessment_details,
go_to_assessments,
go_to_next_page,
login,
)
from backend.ecmk_fetcher.reports import (
REPORT_TYPES,
FileDownloadButtonType,
build_property_id,
map_report_type_to_db_file_type,
)
from backend.ecmk_fetcher.excel_writer import write_row
from backend.ecmk_fetcher.upload import (
upload_excel_to_sharepoint,
upload_file_to_s3_and_update_db,
upload_file_to_sharepoint,
)
from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
logger = setup_logger()
def run_job() -> None:
username: str = "" # TODO: get from github secrets
password: str = ""
property_list_file: str = (
"hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
)
BASE_DIR: str = os.path.dirname(__file__)
filepath: str = os.path.join(BASE_DIR, property_list_file)
property_map: Dict[str, PropertyRow] = extract_addresses_from_spreadsheet(filepath)
sharepoint_client: DomnaSharepointClient = DomnaSharepointClient(
sharepoint_location=DomnaSites.PRIVATE_PAY
)
sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
sharepoint_excel_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
local_dimensions_path: str = os.path.join(BASE_DIR, DIMENSIONS_FILENAME)
sharepoint_client.download_file(
sharepoint_path=f"{sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
local_path=local_dimensions_path,
)
s3_bucket: str = "retrofit-energy-assessments-dev"
with sync_playwright() as p:
browser: Browser = p.chromium.launch(headless=True)
context: BrowserContext = browser.new_context()
page: Page = context.new_page()
attach_debug_listeners(page)
try:
login(page, username, password)
go_to_assessments(page)
while True:
rows: Locator = page.locator("#assessmentDatatable tbody tr")
row_count: int = rows.count()
for i in range(row_count):
row: Locator = rows.nth(i)
try:
cells: Locator = row.locator("td")
first_name: str = cells.nth(1).inner_text().strip()
last_name: str = cells.nth(2).inner_text().strip()
address: str = cells.nth(5).inner_text().strip()
postcode: str = cells.nth(7).inner_text().strip()
status: str = cells.nth(9).inner_text().strip()
if first_name == "Oliver" and last_name == "Stephens":
continue
if status != "Submitted (not Lodged)":
continue
property_id: str = build_property_id(address, postcode)
property_row: PropertyRow | None = property_map.get(property_id)
if not property_row:
continue
logger.info(f"Match found for property {address}")
sharepoint_address: str = property_row.address
go_to_assessment_details(page, row)
for report_type in REPORT_TYPES:
hubspot_listing_id: str = property_row.listing_id
try:
db_file_type: FileTypeEnum = (
map_report_type_to_db_file_type(report_type)
)
except ValueError:
logger.error(
f"Unknown report type {report_type}, skipping file"
)
continue
if get_uploaded_file_by_listing_type_and_source(
hubspot_listing_id=int(hubspot_listing_id),
file_type=db_file_type,
file_source=FileSourceEnum.ECMK,
):
logger.debug("File already uploaded to s3, skipping")
continue
file_path: str | None = download_with_retry(
page, report_type
)
if not file_path:
continue
logger.info(
f"Successfully downloaded file {os.path.basename(file_path)} from ECMK"
)
try:
if report_type == FileDownloadButtonType.RAW_XML.value:
with open(file_path, "r", encoding="utf-8") as f:
xml_string = f.read()
details = parse_rdsap(xml_string)
row_data = flatten_sap_property(details)
write_row(local_dimensions_path, row_data)
upload_excel_to_sharepoint(
client=sharepoint_client,
file_path=local_dimensions_path,
sharepoint_path=sharepoint_excel_path,
)
logger.info(
f"Written dimensions row and uploaded Dimensions.xlsx for {address}"
)
else:
upload_file_to_sharepoint(
client=sharepoint_client,
file_path=file_path,
base_path=sharepoint_base_path,
subpath=sharepoint_address,
)
logger.info(
f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
)
# Upload to s3 and update db
upload_file_to_s3_and_update_db(
bucket=s3_bucket,
file_path=file_path,
hubspot_listing_id=hubspot_listing_id,
file_type=db_file_type,
)
except Exception:
raise
finally:
if os.path.exists(file_path):
os.remove(file_path)
page.go_back()
page.wait_for_selector(
"#assessmentDatatable tbody tr", timeout=15000
)
except Exception as e:
raise Exception(f"Row processing failed: {str(e)}") from e
if not go_to_next_page(page):
break
finally:
context.close()
browser.close()

View file

@ -0,0 +1,594 @@
from typing import Dict
from unittest.mock import MagicMock, call, patch
from backend.app.db.models.uploaded_file import FileTypeEnum
from backend.ecmk_fetcher.address_list import PropertyRow
from backend.ecmk_fetcher.ecmk_service import EcmkService
from backend.ecmk_fetcher.reports import FileDownloadButtonType
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
FAKE_PROPERTY_MAP: Dict[str, PropertyRow] = {
"10 FAKE ST SW1A 1AA": PropertyRow(
row_index=2, address="10 Fake St SW1A 1AA", listing_id="hs-001"
)
}
def make_service(
sharepoint_client: DomnaSharepointClient | None = None,
s3_bucket: str = "test-bucket",
property_list_filepath: str = "/fake/properties.xlsx",
sharepoint_base_path: str = "/base",
sharepoint_excel_path: str = "/excel",
local_dimensions_path: str = "/fake/Dimensions.xlsx",
) -> EcmkService:
return EcmkService(
sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient),
s3_bucket=s3_bucket,
property_list_filepath=property_list_filepath,
sharepoint_base_path=sharepoint_base_path,
sharepoint_excel_path=sharepoint_excel_path,
local_dimensions_path=local_dimensions_path,
)
# ---------------------------------------------------------------------------
# __init__: loads property map from spreadsheet filepath
# ---------------------------------------------------------------------------
def test_init_loads_property_map_from_filepath() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
) as mock_extract:
_ = make_service(property_list_filepath="/some/props.xlsx")
mock_extract.assert_called_once_with("/some/props.xlsx")
# ---------------------------------------------------------------------------
# run(): downloads Dimensions.xlsx before Playwright browser launches
# ---------------------------------------------------------------------------
def _make_playwright_mocks() -> tuple[MagicMock, MagicMock, MagicMock, MagicMock]:
mock_page = MagicMock()
mock_context = MagicMock()
mock_context.new_page.return_value = mock_page
mock_browser = MagicMock()
mock_browser.new_context.return_value = mock_context
mock_playwright = MagicMock()
mock_playwright.chromium.launch.return_value = mock_browser
return mock_page, mock_context, mock_browser, mock_playwright
def test_run_downloads_dimensions_before_browser_launch() -> None:
call_order: list[str] = []
mock_client = MagicMock(spec=DomnaSharepointClient)
def _on_download(**_: object) -> None:
call_order.append("download")
mock_client.download_file.side_effect = _on_download
_, _, mock_browser, mock_playwright = _make_playwright_mocks()
def _on_launch(**_: object) -> MagicMock:
call_order.append("browser")
return mock_browser
mock_playwright.chromium.launch.side_effect = _on_launch
with (
patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
),
patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
):
mock_sync_pw.return_value.__enter__.return_value = mock_playwright
service = make_service(
sharepoint_client=mock_client,
sharepoint_excel_path="/excel",
local_dimensions_path="/fake/Dimensions.xlsx",
)
with patch.object(service, "_run_browser_session"):
service.run()
assert call_order == ["download", "browser"]
def test_run_downloads_dimensions_with_correct_paths() -> None:
mock_client = MagicMock(spec=DomnaSharepointClient)
_, _, _, mock_playwright = _make_playwright_mocks()
with (
patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
),
patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
):
mock_sync_pw.return_value.__enter__.return_value = mock_playwright
service = make_service(
sharepoint_client=mock_client,
sharepoint_excel_path="/excel",
local_dimensions_path="/fake/Dimensions.xlsx",
)
with patch.object(service, "_run_browser_session"):
service.run()
mock_client.download_file.assert_called_once_with(
sharepoint_path="/excel/Dimensions.xlsx",
local_path="/fake/Dimensions.xlsx",
)
# ---------------------------------------------------------------------------
# run(): passes the Playwright Page into _run_browser_session
# ---------------------------------------------------------------------------
def test_run_passes_page_to_run_browser_session() -> None:
mock_page, _, _, mock_playwright = _make_playwright_mocks()
with (
patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
),
patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
):
mock_sync_pw.return_value.__enter__.return_value = mock_playwright
service = make_service()
with patch.object(service, "_run_browser_session") as mock_session:
service.run()
mock_session.assert_called_once_with(mock_page)
# ---------------------------------------------------------------------------
# _process_file: dispatches based on report_type
# ---------------------------------------------------------------------------
def test_process_file_dispatches_to_xml_for_raw_xml() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch.object(service, "_process_xml_file") as mock_xml,
patch.object(service, "_process_pdf_file") as mock_pdf,
):
service._process_file(
file_path="/tmp/file.xml",
report_type=FileDownloadButtonType.RAW_XML.value,
db_file_type=FileTypeEnum.ECMK_SURVEY_XML,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_xml.assert_called_once()
mock_pdf.assert_not_called()
def test_process_file_dispatches_to_pdf_for_non_xml() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch.object(service, "_process_xml_file") as mock_xml,
patch.object(service, "_process_pdf_file") as mock_pdf,
):
service._process_file(
file_path="/tmp/file.pdf",
report_type=FileDownloadButtonType.SITENOTE_REPORT.value,
db_file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_pdf.assert_called_once()
mock_xml.assert_not_called()
# ---------------------------------------------------------------------------
# _process_xml_file: parse → flatten → write row → upload excel → S3
# ---------------------------------------------------------------------------
def test_process_xml_file_full_chain() -> None:
fake_details = MagicMock()
fake_row_data = MagicMock()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service(
s3_bucket="my-bucket",
sharepoint_excel_path="/excel",
local_dimensions_path="/dims/Dimensions.xlsx",
)
with (
patch(
"backend.ecmk_fetcher.ecmk_service.parse_rdsap", return_value=fake_details
) as mock_parse,
patch(
"backend.ecmk_fetcher.ecmk_service.flatten_sap_property",
return_value=fake_row_data,
) as mock_flatten,
patch("backend.ecmk_fetcher.ecmk_service.write_row") as mock_write,
patch(
"backend.ecmk_fetcher.ecmk_service.upload_excel_to_sharepoint"
) as mock_upload_excel,
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record"
) as mock_s3,
patch(
"builtins.open",
MagicMock(return_value=MagicMock(
__enter__=lambda s: MagicMock(read=lambda: "<xml/>"),
__exit__=MagicMock(return_value=False),
)),
),
):
service._process_xml_file(
file_path="/tmp/report.xml",
db_file_type=FileTypeEnum.ECMK_SURVEY_XML,
hubspot_listing_id="hs-001",
)
mock_parse.assert_called_once()
mock_flatten.assert_called_once_with(fake_details)
mock_write.assert_called_once_with("/dims/Dimensions.xlsx", fake_row_data)
mock_upload_excel.assert_called_once_with(
client=service._sharepoint_client,
file_path="/dims/Dimensions.xlsx",
sharepoint_path="/excel",
)
mock_s3.assert_called_once_with(
bucket="my-bucket",
file_path="/tmp/report.xml",
hubspot_listing_id="hs-001",
file_type=FileTypeEnum.ECMK_SURVEY_XML,
)
# ---------------------------------------------------------------------------
# _process_pdf_file: sharepoint upload → S3 upload
# ---------------------------------------------------------------------------
def test_process_pdf_file_uploads_to_sharepoint_then_s3() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service(
s3_bucket="my-bucket",
sharepoint_base_path="/base",
)
with (
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"
) as mock_sp,
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=42,
) as mock_s3,
patch("backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"),
patch("backend.ecmk_fetcher.ecmk_service.db_session"),
):
service._process_pdf_file(
file_path="/tmp/report.pdf",
file_type=FileTypeEnum.ECMK_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_sp.assert_called_once_with(
client=service._sharepoint_client,
file_path="/tmp/report.pdf",
base_path="/base",
subpath="10 Fake St",
)
mock_s3.assert_called_once_with(
bucket="my-bucket",
file_path="/tmp/report.pdf",
hubspot_listing_id="hs-001",
file_type=FileTypeEnum.ECMK_SITE_NOTE,
)
# ---------------------------------------------------------------------------
# _process_pdf_file: EPC extraction conditional on file_type
# ---------------------------------------------------------------------------
def test_process_pdf_file_runs_epc_extraction_for_rd_sap_site_note() -> None:
fake_epc_data = MagicMock()
fake_session = MagicMock()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=99,
),
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
return_value=fake_epc_data,
) as mock_parse,
patch(
"backend.ecmk_fetcher.ecmk_service.save_epc_property_data"
) as mock_save,
patch(
"backend.ecmk_fetcher.ecmk_service.db_session"
) as mock_db_session,
):
mock_db_session.return_value.__enter__.return_value = fake_session
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_parse.assert_called_once_with("/tmp/sitenote.pdf")
mock_save.assert_called_once_with(
session=fake_session,
data=fake_epc_data,
uploaded_file_id=99,
)
def test_process_pdf_file_skips_epc_extraction_for_ecmk_site_note() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=42,
),
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"
) as mock_parse,
patch("backend.ecmk_fetcher.ecmk_service.db_session") as mock_db_session,
):
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_parse.assert_not_called()
mock_db_session.assert_not_called()
def test_process_pdf_file_epc_uses_separate_db_session_from_s3_upload() -> None:
"""EPC db_session opens only after upload_file_to_s3_and_record returns."""
call_order: list[str] = []
def _on_s3(**_: object) -> int:
call_order.append("s3")
return 77
def _on_db_session() -> MagicMock:
call_order.append("db_session")
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=MagicMock())
ctx.__exit__ = MagicMock(return_value=False)
return ctx
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
side_effect=_on_s3,
),
patch("backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"),
patch("backend.ecmk_fetcher.ecmk_service.save_epc_property_data"),
patch(
"backend.ecmk_fetcher.ecmk_service.db_session",
side_effect=_on_db_session,
),
):
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
assert call_order == ["s3", "db_session"]
# ---------------------------------------------------------------------------
# _process_pdf_file: EPC failures swallowed with warning
# ---------------------------------------------------------------------------
def _pdf_file_patches_for_failure() -> tuple: # type: ignore[type-arg]
return (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=1,
),
)
def test_process_pdf_file_parse_failure_logged_as_warning_not_raised() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
sp_patch, s3_patch = _pdf_file_patches_for_failure()
with (
sp_patch,
s3_patch,
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
side_effect=ValueError("bad pdf"),
),
patch("backend.ecmk_fetcher.ecmk_service.save_epc_property_data") as mock_save,
patch("backend.ecmk_fetcher.ecmk_service.db_session"),
patch("backend.ecmk_fetcher.ecmk_service.logger") as mock_logger,
):
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_logger.warning.assert_called_once()
mock_save.assert_not_called()
def test_process_pdf_file_save_failure_logged_as_warning_not_raised() -> None:
fake_session = MagicMock()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
sp_patch, s3_patch = _pdf_file_patches_for_failure()
with (
sp_patch,
s3_patch,
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
return_value=MagicMock(),
),
patch(
"backend.ecmk_fetcher.ecmk_service.save_epc_property_data",
side_effect=RuntimeError("db exploded"),
),
patch("backend.ecmk_fetcher.ecmk_service.db_session") as mock_db_session,
patch("backend.ecmk_fetcher.ecmk_service.logger") as mock_logger,
):
mock_db_session.return_value.__enter__.return_value = fake_session
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_logger.warning.assert_called_once()
# ---------------------------------------------------------------------------
# _run_browser_session: delegates file processing to _process_file
# ---------------------------------------------------------------------------
def _make_page_mock_with_one_matching_row() -> MagicMock:
cells_nth: dict[int, MagicMock] = {n: MagicMock() for n in (1, 2, 5, 7, 9)}
cells_nth[1].inner_text.return_value = "John"
cells_nth[2].inner_text.return_value = "Doe"
cells_nth[5].inner_text.return_value = "10 FAKE ST"
cells_nth[7].inner_text.return_value = "SW1A 1AA"
cells_nth[9].inner_text.return_value = "Submitted (not Lodged)"
cells_mock = MagicMock()
cells_mock.nth.side_effect = lambda n: cells_nth[n]
row_mock = MagicMock()
row_mock.locator.return_value = cells_mock
rows_mock = MagicMock()
rows_mock.count.return_value = 1
rows_mock.nth.return_value = row_mock
page = MagicMock()
page.locator.return_value = rows_mock
return page
# address "10 FAKE ST" + postcode "SW1A 1AA" → build_property_id → "10SW1A1AA"
_BROWSER_SESSION_PROPERTY_MAP: Dict[str, PropertyRow] = {
"10SW1A1AA": PropertyRow(
row_index=2, address="10 Fake St SW1A 1AA", listing_id="12345"
)
}
def test_run_browser_session_calls_process_file_for_downloaded_file() -> None:
mock_page = _make_page_mock_with_one_matching_row()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=_BROWSER_SESSION_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.attach_debug_listeners"),
patch("backend.ecmk_fetcher.ecmk_service.login"),
patch("backend.ecmk_fetcher.ecmk_service.go_to_assessments"),
patch("backend.ecmk_fetcher.ecmk_service.go_to_assessment_details"),
patch("backend.ecmk_fetcher.ecmk_service.go_to_next_page", return_value=False),
patch(
"backend.ecmk_fetcher.ecmk_service.get_uploaded_file_by_listing_type_and_source",
return_value=None,
),
patch(
"backend.ecmk_fetcher.ecmk_service.download_with_retry",
return_value="/tmp/fake.pdf",
),
patch(
"backend.ecmk_fetcher.ecmk_service.map_report_type_to_db_file_type",
return_value=FileTypeEnum.ECMK_SITE_NOTE,
),
patch(
"backend.ecmk_fetcher.ecmk_service.REPORT_TYPES",
[FileDownloadButtonType.SITENOTE_REPORT.value],
),
patch.object(service, "_process_file") as mock_process_file,
patch("os.path.exists", return_value=False),
):
service._run_browser_session(mock_page)
mock_process_file.assert_called_once_with(
file_path="/tmp/fake.pdf",
report_type=FileDownloadButtonType.SITENOTE_REPORT.value,
db_file_type=FileTypeEnum.ECMK_SITE_NOTE,
sharepoint_address="10 Fake St SW1A 1AA",
hubspot_listing_id="12345",
)

View file

@ -0,0 +1,59 @@
from unittest.mock import MagicMock, patch
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
def test_handler_constructs_ecmk_service_and_calls_run() -> None:
mock_service = MagicMock()
mock_service_cls = MagicMock(return_value=mock_service)
with (
patch(
"backend.ecmk_fetcher.handler.handler.EcmkService",
mock_service_cls,
),
patch(
"backend.ecmk_fetcher.handler.handler.DomnaSharepointClient",
return_value=MagicMock(spec=DomnaSharepointClient),
),
):
from backend.ecmk_fetcher.handler.handler import handler
handler({}, None)
mock_service_cls.assert_called_once()
mock_service.run.assert_called_once()
def test_handler_passes_correct_config_to_ecmk_service() -> None:
mock_service = MagicMock()
mock_service_cls = MagicMock(return_value=mock_service)
with (
patch(
"backend.ecmk_fetcher.handler.handler.EcmkService",
mock_service_cls,
),
patch(
"backend.ecmk_fetcher.handler.handler.DomnaSharepointClient",
return_value=MagicMock(spec=DomnaSharepointClient),
),
):
from backend.ecmk_fetcher.handler.handler import handler
handler({}, None)
_, kwargs = mock_service_cls.call_args
assert kwargs["s3_bucket"] == "retrofit-energy-assessments-dev"
assert (
kwargs["sharepoint_base_path"]
== "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
)
assert (
kwargs["sharepoint_excel_path"]
== "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
)
assert kwargs["property_list_filepath"].endswith(
"hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
)
assert kwargs["local_dimensions_path"].endswith("Dimensions.xlsx")

View file

@ -0,0 +1,108 @@
from typing import Generator
from unittest.mock import MagicMock, call, patch
import pytest
from backend.app.db.models.uploaded_file import FileTypeEnum
from backend.ecmk_fetcher.upload import upload_file_to_s3_and_record
@pytest.fixture
def mock_uploaded_file() -> MagicMock:
obj = MagicMock()
obj.id = 42
return obj
@pytest.fixture
def mock_session() -> MagicMock:
return MagicMock()
@pytest.fixture
def patched_deps(
mock_uploaded_file: MagicMock, mock_session: MagicMock
) -> Generator[dict[str, MagicMock], None, None]:
with (
patch(
"backend.ecmk_fetcher.upload.upload_file_to_s3"
) as mock_s3,
patch(
"backend.ecmk_fetcher.upload.db_session"
) as mock_db_ctx,
patch(
"backend.ecmk_fetcher.upload.UploadedFile",
return_value=mock_uploaded_file,
) as mock_model,
):
mock_db_ctx.return_value.__enter__.return_value = mock_session
mock_db_ctx.return_value.__exit__.return_value = False
yield {
"s3": mock_s3,
"db_ctx": mock_db_ctx,
"model": mock_model,
"session": mock_session,
"uploaded_file": mock_uploaded_file,
}
def test_returns_uploaded_file_id_as_int(
patched_deps: dict[str, MagicMock],
) -> None:
result = upload_file_to_s3_and_record(
bucket="test-bucket",
file_path="/tmp/report.pdf",
hubspot_listing_id="hs-001",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
)
assert isinstance(result, int)
assert result == 42
def test_uploads_to_s3_with_key_derived_from_listing_id_and_filename(
patched_deps: dict[str, MagicMock],
) -> None:
upload_file_to_s3_and_record(
bucket="my-bucket",
file_path="/some/path/site_note.pdf",
hubspot_listing_id="hs-999",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
)
patched_deps["s3"].assert_called_once_with(
"/some/path/site_note.pdf",
"my-bucket",
"documents/hubspot_listing_id/hs-999/site_note.pdf",
)
def test_adds_uploaded_file_record_to_session(
patched_deps: dict[str, MagicMock],
) -> None:
upload_file_to_s3_and_record(
bucket="test-bucket",
file_path="/tmp/report.pdf",
hubspot_listing_id="hs-001",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
)
patched_deps["session"].add.assert_called_once_with(
patched_deps["uploaded_file"]
)
patched_deps["session"].flush.assert_called_once()
def test_site_note_type_does_not_trigger_pdf_parsing(
patched_deps: dict[str, MagicMock],
) -> None:
# If parsing branch still existed, this would blow up without a
# parse_site_notes_pdf mock — test passes only when branch is absent.
result = upload_file_to_s3_and_record(
bucket="test-bucket",
file_path="/tmp/site_note.pdf",
hubspot_listing_id="hs-002",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
)
assert result == 42

View file

@ -1,5 +1,6 @@
from datetime import datetime, timezone
import os
from typing import cast
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
@ -7,9 +8,12 @@ from backend.app.db.models.uploaded_file import (
FileTypeEnum,
UploadedFile,
)
from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
logger = setup_logger()
def upload_file_to_sharepoint(
client: DomnaSharepointClient,
@ -41,9 +45,9 @@ def upload_excel_to_sharepoint(
# TODO: this should be moved to somewhere common and called by pashub fetcher
def upload_file_to_s3_and_update_db(
def upload_file_to_s3_and_record(
bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum
) -> None:
) -> int:
filename: str = os.path.basename(file_path)
key: str = f"documents/hubspot_listing_id/{hubspot_listing_id}/{filename}"
@ -61,4 +65,7 @@ def upload_file_to_s3_and_update_db(
with db_session() as session:
# TODO: we should do multiple files at once to reduce db trips
session.add(uploaded_file)
session.commit()
session.flush()
uploaded_file_id: int = int(cast(int, uploaded_file.id))
return uploaded_file_id

View file

@ -656,6 +656,15 @@ async def model_engine(body: PlanTriggerRequest):
# address_metadata=addr Switched off to remove injecting landlord inputs
)
# Warning! The EPC API is broken and we are getting missing data for local authority and
# constituency. We're going to add some verbose handling here but there may be problems
if prepared_epc.local_authority is None:
# Fill
prepared_epc.local_authority = ""
if prepared_epc.constituency is None:
prepared_epc.constituency = ""
input_properties.append(
Property(
id=property_id,

0
backend/etl/__init__.py Normal file
View file

View file

@ -0,0 +1,14 @@
This website https://epc.opendatacommunities.org/ has closed down on 30th May 2026
So we downloaded the data and moved everything to S3 ( s3://retrofit-data-dev/histroical_epc/0_master_backup/ )
This scripts assumes the following:
1) You downloaded the master copy, uncompressed it and set it to a path so we can read the csv
The script funciton is:
1) reads csv for all data, seperate each iteration by postcode
2) compresses the csv and save it in the location
3) location s3://retrofit-data-dev/historical_epc/<postcode>/compressed data.csv

View file

@ -0,0 +1,133 @@
from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, wait
from io import BytesIO
from pathlib import Path
from typing import Any
import boto3
import pandas as pd
from botocore.config import Config
from tqdm import tqdm
from utils.logger import setup_logger
logger = setup_logger()
SRC_ROOT = Path("/workspaces/home/epc_data")
TMP_ROOT = Path("/tmp/epc_postcodes")
S3_BUCKET = "retrofit-data-dev"
S3_PREFIX = "historical_epc"
# This scripts assume you downloading the zip, unzip it, and running it locally
def sanitise(pc: pd.Series) -> pd.Series:
return pc.astype("string").str.upper().str.replace(" ", "", regex=False)
def shard_la(la_dir: Path) -> None:
certs = pd.read_csv(la_dir / "certificates.csv", low_memory=False)
certs["POSTCODE_CLEAN"] = sanitise(certs["POSTCODE"])
before = len(certs)
certs = certs.dropna(subset=["POSTCODE_CLEAN"])
certs = certs[certs["POSTCODE_CLEAN"] != ""]
dropped = before - len(certs)
if dropped:
logger.warning(f"{la_dir.name}: dropped {dropped} rows with empty postcode")
for pc, group in certs.groupby("POSTCODE_CLEAN", sort=False):
out = TMP_ROOT / f"{pc}.csv"
group.drop(columns=["POSTCODE_CLEAN"]).to_csv(
out, mode="a", header=not out.exists(), index=False
)
def list_existing_keys(s3: Any) -> set[str]:
existing: set[str] = set()
paginator = s3.get_paginator("list_objects_v2")
pages = paginator.paginate(Bucket=S3_BUCKET, Prefix=f"{S3_PREFIX}/")
for page in tqdm(pages, desc="list s3"):
for obj in page.get("Contents", []):
existing.add(obj["Key"])
logger.info(f"Found {len(existing)} existing objects under {S3_PREFIX}/")
return existing
def upload_postcode(path: Path, s3: Any) -> None:
df = pd.read_csv(path, low_memory=False).drop_duplicates()
dupes = df["LMK_KEY"].value_counts()
bad = dupes[dupes > 1]
if not bad.empty:
raise ValueError(
f"Postcode {path.stem}: LMK_KEY appears with conflicting cert data: "
f"{bad.index.tolist()[:5]}"
)
buf = BytesIO()
df.to_csv(buf, index=False, compression="gzip")
s3.put_object(
Bucket=S3_BUCKET,
Key=f"{S3_PREFIX}/{path.stem}/data.csv.gz",
Body=buf.getvalue(),
ContentType="text/csv",
ContentEncoding="gzip",
)
def main():
TMP_ROOT.mkdir(parents=True, exist_ok=True)
la_dirs = sorted(
p for p in SRC_ROOT.iterdir() if p.is_dir() and p.name.startswith("domestic-")
)
logger.info(f"Sharding {len(la_dirs)} LA folders -> {TMP_ROOT}")
for la in tqdm(la_dirs, desc="shard"):
shard_la(la)
s3 = boto3.client(
"s3",
config=Config(
max_pool_connections=512, retries={"max_attempts": 5, "mode": "standard"}
),
)
pc_files = sorted(TMP_ROOT.glob("*.csv"))
logger.info(f"Found {len(pc_files)} local shards")
existing = list_existing_keys(s3)
todo = [p for p in pc_files if f"{S3_PREFIX}/{p.stem}/data.csv.gz" not in existing]
skipped = len(pc_files) - len(todo)
logger.info(
f"Uploading {len(todo)} shards (skipping {skipped} already in S3) -> "
f"s3://{S3_BUCKET}/{S3_PREFIX}/"
)
workers = 256
todo_iter = iter(todo)
inflight: dict[Any, Path] = {}
pbar = tqdm(total=len(todo), desc="upload")
with ThreadPoolExecutor(max_workers=workers) as pool:
for _ in range(workers * 2):
pc = next(todo_iter, None)
if pc is None:
break
inflight[pool.submit(upload_postcode, pc, s3)] = pc
while inflight:
done, _ = wait(inflight.keys(), return_when=FIRST_COMPLETED)
for fut in done:
pc = inflight.pop(fut)
try:
fut.result()
except Exception as e:
logger.error(f"{pc.name}: {e}")
raise
pbar.update(1)
nxt = next(todo_iter, None)
if nxt is not None:
inflight[pool.submit(upload_postcode, nxt, s3)] = nxt
pbar.close()
if __name__ == "__main__":
main()

View file

@ -282,11 +282,6 @@ def test_default_export_integration(db_session):
df["sap_points"].sum()
)
assert df.shape == (
10,
101,
), "Expected dataframe shape to be (10, 101), got {}".format(df.shape)
def test_solar_with_battery_example(db_session):
test_portfolio_id = 1
@ -337,7 +332,7 @@ def test_solar_with_battery_example(db_session):
"creation_status": "PropertyCreationStatus.READY",
"uprn": 100090438731,
"landlord_property_id": "BARR052",
"building_reference_number": 3460742868.0,
"building_reference_number": 3460742868,
"status": "PortfolioStatus.ASSESSMENT",
"address": "52, Barrack Street",
"postcode": "CO1 2LR",
@ -566,6 +561,8 @@ def test_solar_with_battery_example(db_session):
creation_status=PropertyCreationStatus[row.creation_status.split(".")[-1]],
status=PortfolioStatus[row.status.split(".")[-1]],
uprn=row.uprn,
address=row.address,
postcode=row.postcode,
property_type=row.property_type,
current_sap_points=row.current_sap_points,
current_epc_rating=Epc[row.current_epc_rating.split(".")[-1]],

View file

View file

@ -0,0 +1,46 @@
import re
from typing import Optional
from datatypes.magicplan.api.response import PlanSummary
_UK_POSTCODE_RE = re.compile(r"[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}", re.IGNORECASE)
def _extract_postcode(address: str) -> str | None:
match = _UK_POSTCODE_RE.search(address)
if match is None:
return None
return match.group().replace(" ", "").upper()
def _normalize_postcode(postcode: str) -> str:
return postcode.replace(" ", "").upper()
def find_matching_plan(plans: list[PlanSummary], address: str) -> Optional[PlanSummary]:
postcode = _extract_postcode(address)
if postcode is None:
return None
address_lower = address.lower()
for plan in plans:
if plan.address is None:
continue
plan_postcode = plan.address.postal_code
if plan_postcode is None:
continue
if _normalize_postcode(plan_postcode) != postcode:
continue
street_parts = [
p for p in [plan.address.street_number, plan.address.street] if p
]
plan_street = " ".join(street_parts).lower()
if plan_street and plan_street in address_lower:
return plan
return None

View file

@ -0,0 +1,36 @@
from typing import Any
from backend.app.config import get_settings
from backend.magic_plan.magic_plan_client import MagicPlanClient
from backend.magic_plan.magic_plan_service import MagicPlanService
from backend.magic_plan.magic_plan_trigger_request import MagicPlanTriggerRequest
from datatypes.magicplan.domain.models import Plan
from backend.utils.subtasks import task_handler
from utils.logger import setup_logger
logger = setup_logger()
@task_handler()
def handler(body: dict[str, Any], context: Any) -> str:
settings = get_settings()
payload = MagicPlanTriggerRequest.model_validate(body)
client = MagicPlanClient(
customer_id=settings.MAGICPLAN_CUSTOMER_ID,
api_key=settings.MAGICPLAN_API_KEY,
)
plan: Plan = MagicPlanService(client).run(payload.address, payload.uprn)
logger.info("Saved MagicPlan plan uid=%s", plan.uid)
return plan.uid
if __name__ == "__main__":
event = {
"Records": [
{
"body": '{"address": "2 Laburnum Way Bromley BR2 8BZ"}',
"messageId": "local-test",
}
]
}
handler(event, None)

View file

@ -0,0 +1,24 @@
import requests
from datatypes.magicplan.api.response import MagicPlanPlan, PlansListResponse
_BASE_URL = "https://cloud.magicplan.app/api/v2"
class MagicPlanClient:
def __init__(self, customer_id: str, api_key: str) -> None:
self._api_key = api_key
self._session = requests.Session()
self._session.headers.update({"customer": customer_id})
def get_plans(self) -> PlansListResponse:
r = self._session.get(f"{_BASE_URL}/plans", params={"key": self._api_key})
r.raise_for_status()
return PlansListResponse.model_validate(r.json()["data"])
def get_plan(self, plan_id: str) -> MagicPlanPlan:
r = self._session.get(
f"{_BASE_URL}/plans/{plan_id}", params={"key": self._api_key}
)
r.raise_for_status()
return MagicPlanPlan.model_validate(r.json()["data"])

View file

@ -0,0 +1,42 @@
from typing import Optional
from datatypes.magicplan.api.response import (
MagicPlanPlan,
PlanSummary,
PlansListResponse,
)
from datatypes.magicplan.domain.mapper import map_plan
from datatypes.magicplan.domain.models import Plan
from backend.app.db.connection import db_session
from backend.app.db.functions.magic_plan_functions import save_plan
from backend.magic_plan.address_matcher import find_matching_plan
from backend.magic_plan.magic_plan_client import MagicPlanClient
from utils.logger import setup_logger
logger = setup_logger()
class MagicPlanService:
def __init__(self, client: MagicPlanClient) -> None:
self._client = client
def run(self, address: str, uprn: Optional[str] = None) -> Plan:
if uprn is not None:
logger.info("MagicPlanService.run uprn=%s", uprn)
plans_response: PlansListResponse = self._client.get_plans()
matched: Optional[PlanSummary] = find_matching_plan(
plans_response.plans, address
) # TODO: use address2UPRN instead? or create AddressMatch domain class
if matched is None:
raise ValueError(f"No MagicPlan found for address: {address!r}")
magic_plan: MagicPlanPlan = self._client.get_plan(matched.id)
plan: Plan = map_plan(magic_plan)
with db_session() as session:
save_plan(session, plan)
return plan

View file

@ -0,0 +1,10 @@
from typing import Optional
from pydantic import BaseModel, ConfigDict
class MagicPlanTriggerRequest(BaseModel):
model_config = ConfigDict(extra="ignore")
address: str
uprn: Optional[str] = None

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,39 @@
{
"data": {
"paging": {
"page": 1,
"next_page": false,
"count": 1
},
"plans": [
{
"id": "9f9889ff-793e-4e9a-a6f0-e22f5b0f5365",
"project_id": "269422e7-45b6-4582-b124-405053dcd967",
"name": "11, Br1 3lp",
"address": {
"street": "11 Station Road",
"street_number": null,
"postal_code": "BR1 3LP",
"city": "Bromley",
"country": "GB",
"longitude": 0.01593668,
"latitude": 51.40901033
},
"creation_date": "2026-04-28T09:35:44+00:00",
"update_date": "2026-05-05T12:53:36+00:00",
"thumbnail_url": "https://s3.amazonaws.com/prod.plans.sensopia.com/9f9889ff-793e-4e9a-a6f0-e22f5b0f5365/plan.thumb",
"public_url": "https://cloud.magicplan.app/plan/9f9889ff-793e-4e9a-a6f0-e22f5b0f5365",
"cloud_url": "https://cloud.magicplan.app/projects/9f9889ff-793e-4e9a-a6f0-e22f5b0f5365",
"3d_url": "https://3d.magicplan.app/#embed/?key=MmFkZDJjNGRmYWRjM2Y5ZDAwMjEyZGRlY2I3NmJjOWFjOWRmMDdkNzIxZTViZDdhNTgxZDBiYWE1YTYzZTJmY%2FJNEogVfW%2FZwVfY25qc24oCKnfVxiF%2FupeeA7vwS8FECF0L9E7DUFE%2ByzEYzYaoVc%2FbtsZ%2FqZOSPopiR4OqD3zbCziU0QTydELS32cnSFOT",
"workgroup_id": "677d01685458a",
"team_id": null,
"created_by": {
"id": "b19771e9-1aad-45a5-9a41-f01a835172ea",
"firstname": null,
"lastname": null,
"email": "archie.ratcliff@domna.homes"
}
}
]
}
}

View file

View file

@ -0,0 +1,129 @@
from datatypes.magicplan.api.response import PlanSummary
from backend.magic_plan.address_matcher import find_matching_plan, _extract_postcode
def _make_plan(
plan_id: str,
street: str | None = None,
street_number: str | None = None,
postal_code: str | None = None,
) -> PlanSummary:
return PlanSummary.model_validate(
{
"id": plan_id,
"name": f"Plan {plan_id}",
"address": {
"street": street,
"street_number": street_number,
"postal_code": postal_code,
},
}
)
# --- _extract_postcode ---
def test_extract_postcode_standard_format() -> None:
assert _extract_postcode("2 Laburnum Way Bromley BR2 8BZ") == "BR28BZ"
def test_extract_postcode_no_space_in_postcode() -> None:
assert _extract_postcode("123 High St London SW1A1AA") == "SW1A1AA"
def test_extract_postcode_lowercase_input() -> None:
assert _extract_postcode("2 laburnum way br2 8bz") == "BR28BZ"
def test_extract_postcode_none_when_absent() -> None:
assert _extract_postcode("123 High Street London") is None
def test_extract_postcode_none_for_empty_string() -> None:
assert _extract_postcode("") is None
# --- find_matching_plan ---
PLAN_A = _make_plan(
"plan-a", street="Laburnum Way", street_number="2", postal_code="BR2 8BZ"
)
PLAN_B = _make_plan(
"plan-b", street="Station Road", street_number="11", postal_code="BR1 3LP"
)
def test_find_matching_plan_returns_match() -> None:
# Arrange
plans = [PLAN_A, PLAN_B]
# Act
result = find_matching_plan(plans, "2 Laburnum Way Bromley BR2 8BZ")
# Assert
assert result is not None
assert result.id == "plan-a"
def test_find_matching_plan_postcode_mismatch_returns_none() -> None:
# Arrange
plans = [PLAN_A]
# Act
result = find_matching_plan(plans, "2 Laburnum Way Bromley SW1A 1AA")
# Assert
assert result is None
def test_find_matching_plan_street_mismatch_returns_none() -> None:
# Arrange
plans = [PLAN_A]
# Act
result = find_matching_plan(plans, "99 Other Road Bromley BR2 8BZ")
# Assert
assert result is None
def test_find_matching_plan_empty_list_returns_none() -> None:
# Act
result = find_matching_plan([], "2 Laburnum Way Bromley BR2 8BZ")
# Assert
assert result is None
def test_find_matching_plan_postcode_with_no_space_in_address() -> None:
# Arrange - address has postcode without internal space
plans = [PLAN_A]
# Act
result = find_matching_plan(plans, "2 Laburnum Way Bromley BR28BZ")
# Assert
assert result is not None
assert result.id == "plan-a"
def test_find_matching_plan_plan_postcode_with_no_space() -> None:
# Arrange - plan has postcode without space
plan = _make_plan(
"plan-c", street="Laburnum Way", street_number="2", postal_code="BR28BZ"
)
# Act
result = find_matching_plan([plan], "2 Laburnum Way Bromley BR2 8BZ")
# Assert
assert result is not None
assert result.id == "plan-c"
def test_find_matching_plan_no_postcode_in_address_returns_none() -> None:
# Act
result = find_matching_plan([PLAN_A], "2 Laburnum Way Bromley")
# Assert
assert result is None
def test_find_matching_plan_second_plan_matches() -> None:
# Arrange
plans = [PLAN_A, PLAN_B]
# Act
result = find_matching_plan(plans, "11 Station Road Bromley BR1 3LP")
# Assert
assert result is not None
assert result.id == "plan-b"

View file

@ -0,0 +1,103 @@
from typing import Any
from unittest.mock import MagicMock, patch
import pytest
from pydantic import ValidationError
from backend.magic_plan.handler import handler
ADDRESS = "2 Laburnum Way Bromley BR2 8BZ"
PLAN_UID = "a7285ed1-878d-47eb-8aa6-85ef9e187516"
def _make_settings(**overrides: str) -> MagicMock:
settings = MagicMock()
settings.MAGICPLAN_CUSTOMER_ID = overrides.get("customer_id", "cust-123")
settings.MAGICPLAN_API_KEY = overrides.get("api_key", "key-abc")
return settings
def _call_handler(body: dict[str, Any]) -> Any:
return handler.__wrapped__(body, None) # type: ignore[attr-defined]
@pytest.fixture()
def mock_plan() -> MagicMock:
plan = MagicMock()
plan.uid = PLAN_UID
return plan
@pytest.fixture()
def mock_service(mock_plan: MagicMock) -> MagicMock:
service = MagicMock()
service.run.return_value = mock_plan
return service
# --- request validation ---
def test_handler_raises_on_missing_address(mock_plan: MagicMock) -> None:
# Arrange
body: dict[str, Any] = {}
with patch("backend.magic_plan.handler.get_settings", return_value=_make_settings()), \
patch("backend.magic_plan.handler.MagicPlanClient"), \
patch("backend.magic_plan.handler.MagicPlanService"):
# Act / Assert
with pytest.raises(ValidationError):
_call_handler(body)
# --- client construction ---
def test_handler_constructs_client_from_settings(mock_service: MagicMock) -> None:
# Arrange
body = {"address": ADDRESS}
with patch("backend.magic_plan.handler.get_settings", return_value=_make_settings(customer_id="cust-xyz", api_key="key-xyz")), \
patch("backend.magic_plan.handler.MagicPlanClient") as MockClient, \
patch("backend.magic_plan.handler.MagicPlanService", return_value=mock_service):
# Act
_call_handler(body)
# Assert
MockClient.assert_called_once_with(customer_id="cust-xyz", api_key="key-xyz")
# --- service orchestration ---
def test_handler_calls_service_run_with_address(mock_service: MagicMock) -> None:
# Arrange
body = {"address": ADDRESS}
with patch("backend.magic_plan.handler.get_settings", return_value=_make_settings()), \
patch("backend.magic_plan.handler.MagicPlanClient"), \
patch("backend.magic_plan.handler.MagicPlanService", return_value=mock_service):
# Act
_call_handler(body)
# Assert
mock_service.run.assert_called_once_with(ADDRESS, None)
def test_handler_passes_uprn_to_service(mock_service: MagicMock) -> None:
# Arrange
body = {"address": ADDRESS, "uprn": "100023336956"}
with patch("backend.magic_plan.handler.get_settings", return_value=_make_settings()), \
patch("backend.magic_plan.handler.MagicPlanClient"), \
patch("backend.magic_plan.handler.MagicPlanService", return_value=mock_service):
# Act
_call_handler(body)
# Assert
mock_service.run.assert_called_once_with(ADDRESS, "100023336956")
def test_handler_returns_plan_uid(mock_service: MagicMock) -> None:
# Arrange
body = {"address": ADDRESS}
with patch("backend.magic_plan.handler.get_settings", return_value=_make_settings()), \
patch("backend.magic_plan.handler.MagicPlanClient"), \
patch("backend.magic_plan.handler.MagicPlanService", return_value=mock_service):
# Act
result = _call_handler(body)
# Assert
assert result == PLAN_UID

View file

@ -0,0 +1,174 @@
import json
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock, patch
import pytest
import requests
from backend.magic_plan.magic_plan_client import MagicPlanClient
from datatypes.magicplan.api.response import MagicPlanPlan, PlansListResponse
FIXTURE_DIR = Path(__file__).parents[2] / "magic_plan"
BASE_URL = "https://cloud.magicplan.app/api/v2"
CUSTOMER_ID = "test-customer"
API_KEY = "test-key"
def _load_fixture(name: str) -> dict[str, Any]:
return json.loads((FIXTURE_DIR / name).read_text())
def _make_client(mock_session: MagicMock) -> MagicPlanClient:
with patch(
"backend.magic_plan.magic_plan_client.requests.Session",
return_value=mock_session,
):
return MagicPlanClient(customer_id=CUSTOMER_ID, api_key=API_KEY)
@pytest.fixture()
def mock_session() -> MagicMock:
return MagicMock()
@pytest.fixture()
def client(mock_session: MagicMock) -> MagicPlanClient:
return _make_client(mock_session)
# --- constructor ---
def test_customer_header_set_on_session(mock_session: MagicMock) -> None:
# Act
_make_client(mock_session)
# Assert
mock_session.headers.update.assert_called_once_with({"customer": CUSTOMER_ID})
# --- get_plans ---
def test_get_plans_calls_correct_url(
client: MagicPlanClient, mock_session: MagicMock
) -> None:
# Arrange
plans_data = _load_fixture("magicplan_api_plans_response_example.json")["data"]
mock_session.get.return_value.json.return_value = {
"message": "OK",
"data": plans_data,
}
# Act
client.get_plans()
# Assert
mock_session.get.assert_called_once_with(
f"{BASE_URL}/plans", params={"key": API_KEY}
)
def test_get_plans_calls_raise_for_status(
client: MagicPlanClient, mock_session: MagicMock
) -> None:
# Arrange
plans_data = _load_fixture("magicplan_api_plans_response_example.json")["data"]
mock_session.get.return_value.json.return_value = {
"message": "OK",
"data": plans_data,
}
# Act
client.get_plans()
# Assert
mock_session.get.return_value.raise_for_status.assert_called_once()
def test_get_plans_returns_plans_list_response(
client: MagicPlanClient, mock_session: MagicMock
) -> None:
# Arrange
plans_data = _load_fixture("magicplan_api_plans_response_example.json")["data"]
mock_session.get.return_value.json.return_value = {
"message": "OK",
"data": plans_data,
}
# Act
result = client.get_plans()
# Assert
assert isinstance(result, PlansListResponse)
assert len(result.plans) == 1
def test_get_plans_propagates_http_error(
client: MagicPlanClient, mock_session: MagicMock
) -> None:
# Arrange
mock_session.get.return_value.raise_for_status.side_effect = requests.HTTPError(
"404"
)
# Act / Assert
with pytest.raises(requests.HTTPError):
client.get_plans()
# --- get_plan ---
def test_get_plan_calls_correct_url(
client: MagicPlanClient, mock_session: MagicMock
) -> None:
# Arrange
plan_data = _load_fixture("magicplan_api_plan_response_example.json")["data"]
mock_session.get.return_value.json.return_value = {
"message": "OK",
"data": plan_data,
}
plan_id = "a7285ed1-878d-47eb-8aa6-85ef9e187516"
# Act
client.get_plan(plan_id)
# Assert
mock_session.get.assert_called_once_with(
f"{BASE_URL}/plans/{plan_id}", params={"key": API_KEY}
)
def test_get_plan_calls_raise_for_status(
client: MagicPlanClient, mock_session: MagicMock
) -> None:
# Arrange
plan_data = _load_fixture("magicplan_api_plan_response_example.json")["data"]
mock_session.get.return_value.json.return_value = {
"message": "OK",
"data": plan_data,
}
# Act
client.get_plan("a7285ed1-878d-47eb-8aa6-85ef9e187516")
# Assert
mock_session.get.return_value.raise_for_status.assert_called_once()
def test_get_plan_returns_magic_plan(
client: MagicPlanClient, mock_session: MagicMock
) -> None:
# Arrange
plan_data = _load_fixture("magicplan_api_plan_response_example.json")["data"]
mock_session.get.return_value.json.return_value = {
"message": "OK",
"data": plan_data,
}
# Act
result = client.get_plan("a7285ed1-878d-47eb-8aa6-85ef9e187516")
# Assert
assert isinstance(result, MagicPlanPlan)
assert result.plan.id == "a7285ed1-878d-47eb-8aa6-85ef9e187516"
def test_get_plan_propagates_http_error(
client: MagicPlanClient, mock_session: MagicMock
) -> None:
# Arrange
mock_session.get.return_value.raise_for_status.side_effect = requests.HTTPError(
"500"
)
# Act / Assert
with pytest.raises(requests.HTTPError):
client.get_plan("some-id")

View file

@ -0,0 +1,146 @@
import json
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from datatypes.magicplan.api.response import MagicPlanPlan, PlanSummary
from datatypes.magicplan.domain.mapper import map_plan
from datatypes.magicplan.domain.models import Plan
from backend.magic_plan.magic_plan_client import MagicPlanClient
from backend.magic_plan.magic_plan_service import MagicPlanService
FIXTURE_DIR = Path(__file__).parents[2] / "magic_plan"
PLAN_ID = "a7285ed1-878d-47eb-8aa6-85ef9e187516"
@pytest.fixture(scope="module")
def domain_plan() -> Plan:
data = json.loads(
(FIXTURE_DIR / "magicplan_api_plan_response_example.json").read_text()
)
return map_plan(MagicPlanPlan.model_validate(data["data"]))
@pytest.fixture(scope="module")
def api_magic_plan() -> MagicPlanPlan:
data = json.loads(
(FIXTURE_DIR / "magicplan_api_plan_response_example.json").read_text()
)
return MagicPlanPlan.model_validate(data["data"])
@pytest.fixture(scope="module")
def plan_summary() -> PlanSummary:
data = json.loads(
(FIXTURE_DIR / "magicplan_api_plan_response_example.json").read_text()
)
return MagicPlanPlan.model_validate(data["data"]).plan
@pytest.fixture()
def mock_client() -> MagicMock:
return MagicMock(spec=MagicPlanClient)
def _make_service(mock_client: MagicMock) -> MagicPlanService:
return MagicPlanService(client=mock_client)
# --- no match ---
def test_run_raises_when_no_plan_found(mock_client: MagicMock) -> None:
# Arrange
mock_client.get_plans.return_value.plans = []
service = _make_service(mock_client)
# Act / Assert
with pytest.raises(ValueError, match="No MagicPlan found"):
service.run("99 Nowhere Road London SW1A 1AA")
# --- match found ---
def test_run_fetches_plan_with_matched_id(
mock_client: MagicMock,
api_magic_plan: MagicPlanPlan,
plan_summary: PlanSummary,
domain_plan: Plan,
) -> None:
# Arrange
mock_client.get_plans.return_value.plans = [plan_summary]
mock_client.get_plan.return_value = api_magic_plan
service = _make_service(mock_client)
with patch(
"backend.magic_plan.magic_plan_service.find_matching_plan",
return_value=plan_summary,
), patch("backend.magic_plan.magic_plan_service.save_plan"), patch(
"backend.magic_plan.magic_plan_service.db_session"
):
service.run("2 Laburnum Way Bromley BR2 8BZ")
# Assert
mock_client.get_plan.assert_called_once_with(plan_summary.id)
def test_run_returns_mapped_plan(
mock_client: MagicMock,
api_magic_plan: MagicPlanPlan,
plan_summary: PlanSummary,
domain_plan: Plan,
) -> None:
# Arrange
mock_client.get_plans.return_value.plans = [plan_summary]
mock_client.get_plan.return_value = api_magic_plan
service = _make_service(mock_client)
with patch(
"backend.magic_plan.magic_plan_service.find_matching_plan",
return_value=plan_summary,
), patch("backend.magic_plan.magic_plan_service.save_plan"), patch(
"backend.magic_plan.magic_plan_service.db_session"
):
result = service.run("2 Laburnum Way Bromley BR2 8BZ")
# Assert
assert isinstance(result, Plan)
assert result.uid == PLAN_ID
def test_run_calls_save_plan_with_mapped_plan(
mock_client: MagicMock,
api_magic_plan: MagicPlanPlan,
plan_summary: PlanSummary,
) -> None:
# Arrange
mock_client.get_plans.return_value.plans = [plan_summary]
mock_client.get_plan.return_value = api_magic_plan
service = _make_service(mock_client)
with patch(
"backend.magic_plan.magic_plan_service.find_matching_plan",
return_value=plan_summary,
), patch("backend.magic_plan.magic_plan_service.save_plan") as mock_save, patch(
"backend.magic_plan.magic_plan_service.db_session"
):
service.run("2 Laburnum Way Bromley BR2 8BZ")
# Assert — save_plan called with a Plan whose uid matches
call_args = mock_save.call_args
saved_plan: Plan = call_args[0][1]
assert saved_plan.uid == PLAN_ID
def test_run_accepts_uprn_without_error(
mock_client: MagicMock,
api_magic_plan: MagicPlanPlan,
plan_summary: PlanSummary,
) -> None:
# Arrange
mock_client.get_plans.return_value.plans = [plan_summary]
mock_client.get_plan.return_value = api_magic_plan
service = _make_service(mock_client)
with patch(
"backend.magic_plan.magic_plan_service.find_matching_plan",
return_value=plan_summary,
), patch("backend.magic_plan.magic_plan_service.save_plan"), patch(
"backend.magic_plan.magic_plan_service.db_session"
):
service.run("2 Laburnum Way Bromley BR2 8BZ", uprn="100023336956")

View file

@ -0,0 +1,40 @@
import pytest
from pydantic import ValidationError
from backend.magic_plan.magic_plan_trigger_request import MagicPlanTriggerRequest
def test_valid_payload_with_address_only() -> None:
# Arrange
payload = {"address": "123 High St London SW1A 1AA"}
# Act
req = MagicPlanTriggerRequest.model_validate(payload)
# Assert
assert req.address == "123 High St London SW1A 1AA"
assert req.uprn is None
def test_valid_payload_with_uprn() -> None:
# Arrange
payload = {"address": "123 High St London SW1A 1AA", "uprn": "100023336956"}
# Act
req = MagicPlanTriggerRequest.model_validate(payload)
# Assert
assert req.uprn == "100023336956"
def test_missing_address_raises() -> None:
# Arrange
payload = {"uprn": "100023336956"}
# Act / Assert
with pytest.raises(ValidationError):
MagicPlanTriggerRequest.model_validate(payload)
def test_extra_fields_ignored() -> None:
# Arrange
payload = {"address": "123 High St London SW1A 1AA", "unknown_field": "whatever"}
# Act
req = MagicPlanTriggerRequest.model_validate(payload)
# Assert
assert req.address == "123 High St London SW1A 1AA"

View file

@ -1,72 +1,18 @@
from datetime import datetime, timezone
import os
import re
from typing import Any, Dict, List, Optional
from openpyxl import load_workbook
from typing import Any, Dict, List
from backend.app.config import get_settings
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
FileSourceEnum,
UploadedFile,
)
from backend.pashub_fetcher.core_files import infer_file_type
from backend.pashub_fetcher.job import Job
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
from backend.pashub_fetcher.pashub_service import PashubService
from backend.pashub_fetcher.pashub_to_ara_trigger_request import PashubToAraTriggerRequest
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
from backend.utils.subtasks import task_handler
from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
logger = setup_logger()
def extract_jobs(filepath: str) -> List[Job]:
wb = load_workbook(filepath, data_only=True)
# ws = wb["watford warm homes (wave 3) mai"]
ws = wb["filtered"]
HEADER_ROW = 3
headers: Dict[str, int] = {}
for col in range(1, ws.max_column + 1):
value = str(ws.cell(row=HEADER_ROW, column=col).value)
if value:
headers[value.strip()] = col
name_col = headers["Name"]
# link_col = headers["Pashub Link"]
link_col = headers["PasHub Link"]
jobs: List[Job] = []
for row in range(HEADER_ROW + 1, ws.max_row + 1):
name = ws.cell(row=row, column=name_col).value
link = ws.cell(row=row, column=link_col).value
if not name or not link:
continue
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
if not match:
continue
jobs.append(
{
"id": match.group(1),
"address": str(name),
}
)
return jobs
S3_BUCKET = "retrofit-energy-assessments-dev"
def get_pashub_client(email: str, password: str) -> PashubClient:
@ -75,108 +21,6 @@ def get_pashub_client(email: str, password: str) -> PashubClient:
return PashubClient(token=token)
def upload_job_to_sharepoint(
sharepoint_client: DomnaSharepointClient,
# base_path: str,
sharepoint_link: str,
job_files: List[str],
) -> None:
# job_path = f"{base_path}/{job['address']}"
# Create main job folder
# sharepoint_client.makedir(job["address"], base_path)
# Create subfolders
# for folder in SharepointSubfolders:
# sharepoint_client.makedir(folder.value, job_path)
# Upload into assessment folder
assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}"
for file_path in job_files:
filename = file_path.split("/")[-1]
sharepoint_client.upload_file(
file_path,
assessment_path,
filename,
)
def upload_job_to_s3_and_update_db(
job_files: List[str], uprn: Optional[str], hubspot_deal_id: Optional[str]
) -> None:
bucket = "retrofit-energy-assessments-dev"
if not uprn and not hubspot_deal_id:
return
base_path = (
f"documents/uprn/{uprn}"
if uprn
else f"documents/hubspot_deal_id/{hubspot_deal_id}"
)
uploaded_files: List[UploadedFile] = []
for file_path in job_files:
filename = os.path.basename(file_path)
file_key = f"{base_path}/{filename}"
upload_file_to_s3(file_path, bucket, file_key)
# load row to db
# TODO: use same upload_file_to_s3_and_update_db method as ecmk fetcher does
uploaded_files.append(
UploadedFile(
s3_file_bucket=bucket,
s3_file_key=file_key,
s3_upload_timestamp=datetime.now(timezone.utc),
uprn=int(uprn) if uprn else None,
hubspot_deal_id=hubspot_deal_id,
file_source=FileSourceEnum.PAS_HUB.value,
file_type=infer_file_type(filename),
)
)
with db_session() as session:
session.add_all(uploaded_files)
session.commit()
pass
def process_job(
job: PashubToAraTriggerRequest,
pashub_client: PashubClient,
sharepoint_client: DomnaSharepointClient,
) -> List[str]:
job_id = job.pashub_job_id
uprn: Optional[str] = job.uprn or pashub_client.get_uprn_by_job_id(job_id)
hubspot_deal_id: Optional[str] = job.hubspot_deal_id
if uprn:
logger.info(f"Got UPRN {uprn} for job {job_id}")
else:
logger.info(f"No UPRN found for job {job_id}")
job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id(job_id)
if uprn or hubspot_deal_id:
logger.info("Uploading files to s3")
upload_job_to_s3_and_update_db(job_files, uprn, hubspot_deal_id)
# # Comment out sharepoint loading for now:
# Seems like the sharepoint link in pas hub is inconsistent in terms
# of whether it points to a property or a project
# if job.sharepoint_link:
# upload_job_to_sharepoint(sharepoint_client, job.sharepoint_link, job_files)
return job_files
@task_handler()
def handler(body: Dict[str, Any], context: Any) -> List[str]:
logger.info("Received message")
@ -189,8 +33,6 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:
if (not pas_hub_email) or (not pas_hub_password):
raise ValueError("Pas Hub credentials not provided")
pashub_client = get_pashub_client(pas_hub_email, pas_hub_password)
sharepoint_client = DomnaSharepointClient(
sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3
)
@ -199,26 +41,24 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:
payload = PashubToAraTriggerRequest.model_validate(body)
logger.debug("Successfully validated request body")
service = PashubService(
pashub_client=get_pashub_client(pas_hub_email, pas_hub_password),
sharepoint_client=sharepoint_client,
s3_bucket=S3_BUCKET,
)
try:
files: List[str] = process_job(
payload,
pashub_client,
sharepoint_client,
)
files: List[str] = service.run(payload)
except UnauthorizedError:
logger.warning("Token expired - refreshing")
pashub_client = get_pashub_client(
pas_hub_email,
pas_hub_password,
service = PashubService(
pashub_client=get_pashub_client(pas_hub_email, pas_hub_password),
sharepoint_client=sharepoint_client,
s3_bucket=S3_BUCKET,
)
# retry once
files = process_job(
payload,
pashub_client,
sharepoint_client,
)
files = service.run(payload)
logger.info(f"Saved {len(files)} files")

View file

@ -11,3 +11,4 @@ pytz
boto3==1.35.44
pandas==2.2.2
numpy<2.0
pymupdf

View file

@ -14,7 +14,7 @@ payload = {
{
"pashub_link": "https://google.co.uk",
"uprn": "123456",
"hubspot_deal_id": "498926855369",
"hubspot_deal_id": "1",
}
)
}

View file

@ -0,0 +1,158 @@
import os
from datetime import datetime, timezone
from typing import List, NamedTuple, Optional, cast
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
FileSourceEnum,
FileTypeEnum,
UploadedFile,
)
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
from backend.pashub_fetcher.core_files import infer_file_type
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
logger = setup_logger()
class _FileUploadRecord(NamedTuple):
file_path: str
file_type: Optional[str]
uploaded_file_id: int
class PashubService:
def __init__(
self,
pashub_client: PashubClient,
sharepoint_client: DomnaSharepointClient,
s3_bucket: str,
) -> None:
self._pashub_client = pashub_client
self._sharepoint_client = sharepoint_client
self._s3_bucket = s3_bucket
def run(self, request: PashubToAraTriggerRequest) -> List[str]:
job_id = request.pashub_job_id
uprn: Optional[str] = request.uprn or self._pashub_client.get_uprn_by_job_id(
job_id
)
hubspot_deal_id: Optional[str] = request.hubspot_deal_id
if uprn:
logger.info(f"Got UPRN {uprn} for job {job_id}")
else:
logger.info(f"No UPRN found for job {job_id}")
job_files: List[str] = self._pashub_client.get_core_evidence_files_by_job_id(
job_id
)
if uprn or hubspot_deal_id:
logger.info("Uploading files to s3")
upload_records = self._upload_to_s3_and_update_db(
job_files, uprn, hubspot_deal_id
)
self._save_site_notes(upload_records)
# SharePoint upload disabled: pashub sharepoint_link is inconsistent
# (points to property or project unpredictably)
# if request.sharepoint_link:
# self._upload_to_sharepoint(request.sharepoint_link, job_files)
for file_path in job_files:
try:
os.remove(file_path)
except OSError:
logger.warning(f"Failed to delete temp file {file_path}")
return job_files
def _upload_to_s3_and_update_db(
self,
job_files: List[str],
uprn: Optional[str],
hubspot_deal_id: Optional[str],
) -> List[_FileUploadRecord]:
if not uprn and not hubspot_deal_id:
return []
base_path = (
f"documents/uprn/{uprn}"
if uprn
else f"documents/hubspot_deal_id/{hubspot_deal_id}"
)
file_paths: List[str] = []
uploaded_files: List[UploadedFile] = []
for file_path in job_files:
filename = os.path.basename(file_path)
file_key = f"{base_path}/{filename}"
upload_file_to_s3(file_path, self._s3_bucket, file_key)
uploaded_file = UploadedFile(
s3_file_bucket=self._s3_bucket,
s3_file_key=file_key,
s3_upload_timestamp=datetime.now(timezone.utc),
uprn=int(uprn) if uprn else None,
hubspot_deal_id=hubspot_deal_id,
file_source=FileSourceEnum.PAS_HUB.value,
file_type=infer_file_type(filename),
)
file_paths.append(file_path)
uploaded_files.append(uploaded_file)
with db_session() as session:
session.add_all(uploaded_files)
session.flush()
upload_records = [
_FileUploadRecord(
file_path=fp,
file_type=cast(Optional[str], uf.file_type),
uploaded_file_id=cast(int, uf.id),
)
for fp, uf in zip(file_paths, uploaded_files)
]
return upload_records
def _save_site_notes(self, upload_records: List[_FileUploadRecord]) -> None:
for record in upload_records:
if (
record.file_type is None
or FileTypeEnum(record.file_type) != FileTypeEnum.RD_SAP_SITE_NOTE
):
continue
try:
epc_data: EpcPropertyData = parse_site_notes_pdf(record.file_path)
with db_session() as session:
save_epc_property_data(
session, epc_data, uploaded_file_id=record.uploaded_file_id
)
except Exception:
logger.warning(
f"Failed to parse site notes {record.file_path}", exc_info=True
)
def _upload_to_sharepoint(
self,
sharepoint_link: str,
job_files: List[str],
) -> None:
assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}"
for file_path in job_files:
filename = file_path.split("/")[-1]
self._sharepoint_client.upload_file(file_path, assessment_path, filename)

View file

@ -0,0 +1,43 @@
import re
from typing import Dict, List
from openpyxl import load_workbook
from backend.pashub_fetcher.job import Job
def extract_jobs(filepath: str) -> List[Job]:
wb = load_workbook(filepath, data_only=True)
ws = wb["filtered"]
HEADER_ROW = 3
headers: Dict[str, int] = {}
for col in range(1, ws.max_column + 1):
value = str(ws.cell(row=HEADER_ROW, column=col).value)
if value:
headers[value.strip()] = col
name_col = headers["Name"]
link_col = headers["PasHub Link"]
jobs: List[Job] = []
for row in range(HEADER_ROW + 1, ws.max_row + 1):
name = ws.cell(row=row, column=name_col).value
link = ws.cell(row=row, column=link_col).value
if not name or not link:
continue
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
if not match:
continue
jobs.append(
{
"id": match.group(1),
"address": str(name),
}
)
return jobs

View file

@ -0,0 +1,254 @@
from typing import Optional
from unittest.mock import MagicMock, call, patch
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.pashub_service import PashubService
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
FAKE_JOB_LINK = "https://pashub.net/jobs/job-id-123/details"
def make_request(
pashub_link: str = FAKE_JOB_LINK,
uprn: Optional[str] = None,
hubspot_deal_id: Optional[str] = None,
sharepoint_link: Optional[str] = None,
) -> PashubToAraTriggerRequest:
return PashubToAraTriggerRequest(
pashub_link=pashub_link,
uprn=uprn,
hubspot_deal_id=hubspot_deal_id,
sharepoint_link=sharepoint_link,
)
def make_service(
pashub_client: Optional[PashubClient] = None,
sharepoint_client: Optional[DomnaSharepointClient] = None,
s3_bucket: str = "test-bucket",
) -> PashubService:
return PashubService(
pashub_client=pashub_client or MagicMock(spec=PashubClient),
sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient),
s3_bucket=s3_bucket,
)
# ---------------------------------------------------------------------------
# run(): returns file paths
# ---------------------------------------------------------------------------
def test_run_returns_file_paths() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/a.pdf",
"/tmp/b.pdf",
]
service = make_service(pashub_client=mock_client)
with patch("backend.pashub_fetcher.pashub_service.os.remove"):
result = service.run(make_request())
assert result == ["/tmp/a.pdf", "/tmp/b.pdf"]
# ---------------------------------------------------------------------------
# run(): skips upload when neither uprn nor hubspot_deal_id
# ---------------------------------------------------------------------------
def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
service = make_service(pashub_client=mock_client)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
service.run(make_request(uprn=None, hubspot_deal_id=None))
mock_s3.assert_not_called()
# ---------------------------------------------------------------------------
# run(): UPRN present → uploads each file to S3 with correct bucket/key
# ---------------------------------------------------------------------------
def test_run_uploads_files_to_s3_using_uprn_path() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf",
"/tmp/Photopack_002.pdf",
]
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3,
patch("backend.pashub_fetcher.pashub_service.db_session"),
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
service.run(make_request(uprn="12345"))
mock_s3.assert_has_calls(
[
call(
"/tmp/SiteNote_001.pdf",
"my-bucket",
"documents/uprn/12345/SiteNote_001.pdf",
),
call(
"/tmp/Photopack_002.pdf",
"my-bucket",
"documents/uprn/12345/Photopack_002.pdf",
),
],
any_order=False,
)
# ---------------------------------------------------------------------------
# run(): UPRN present → UploadedFile records added to DB session
# ---------------------------------------------------------------------------
def test_run_persists_uploaded_file_records_to_db() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf"
]
fake_session = MagicMock()
service = make_service(pashub_client=mock_client)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
mock_db.return_value.__enter__.return_value = fake_session
service.run(make_request(uprn="12345"))
fake_session.add_all.assert_called_once()
added: list = fake_session.add_all.call_args[0][0]
assert len(added) == 1
assert added[0].s3_file_bucket == "test-bucket"
assert added[0].uprn == 12345
# ---------------------------------------------------------------------------
# run(): hubspot_deal_id only → uses deal_id S3 path prefix
# ---------------------------------------------------------------------------
def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf"
]
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3,
patch("backend.pashub_fetcher.pashub_service.db_session"),
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
service.run(make_request(uprn=None, hubspot_deal_id="deal-abc"))
mock_s3.assert_called_once_with(
"/tmp/SiteNote_001.pdf",
"my-bucket",
"documents/hubspot_deal_id/deal-abc/SiteNote_001.pdf",
)
# ---------------------------------------------------------------------------
# run(): RD_SAP_SITE_NOTE file → site notes parsed and saved to DB
# ---------------------------------------------------------------------------
def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/RdSAP_SiteNote_001.pdf"
]
fake_epc_data = MagicMock()
fake_session = MagicMock()
fake_uploaded_file_id = 99
service = make_service(pashub_client=mock_client)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch(
"backend.pashub_fetcher.pashub_service.parse_site_notes_pdf",
return_value=fake_epc_data,
) as mock_parse,
patch(
"backend.pashub_fetcher.pashub_service.save_epc_property_data"
) as mock_save,
patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
fake_session.add_all = MagicMock(
side_effect=lambda files: setattr(files[0], "id", fake_uploaded_file_id)
)
mock_db.return_value.__enter__.return_value = fake_session
service.run(make_request(uprn="12345"))
mock_parse.assert_called_once_with("/tmp/RdSAP_SiteNote_001.pdf")
mock_save.assert_called_once_with(
fake_session, fake_epc_data, uploaded_file_id=fake_uploaded_file_id
)
# ---------------------------------------------------------------------------
# run(): site notes parse failure → warning logged, run returns normally
# ---------------------------------------------------------------------------
def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/RdSAP_SiteNote_001.pdf"
]
service = make_service(pashub_client=mock_client)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch(
"backend.pashub_fetcher.pashub_service.parse_site_notes_pdf",
side_effect=ValueError("corrupt pdf"),
),
patch(
"backend.pashub_fetcher.pashub_service.save_epc_property_data"
) as mock_save,
patch("backend.pashub_fetcher.pashub_service.db_session"),
patch("backend.pashub_fetcher.pashub_service.logger") as mock_logger,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
result = service.run(make_request(uprn="12345"))
assert result == ["/tmp/RdSAP_SiteNote_001.pdf"]
mock_logger.warning.assert_called()
mock_save.assert_not_called()

View file

@ -89,13 +89,13 @@ class SapVentilation:
@dataclass
class WindowTransmissionDetails:
u_value: float
data_source: int
data_source: Union[int, str]
solar_transmittance: float
@dataclass
class SapWindow:
pvc_frame: str
frame_material: Optional[str]
glazing_gap: Union[int, str]
orientation: Union[int, str]
window_type: Union[int, str]

View file

@ -0,0 +1,98 @@
from dataclasses import dataclass
@dataclass
class HistoricEpc:
lmk_key: str
address1: str
address2: str
address3: str
postcode: str
building_reference_number: str
current_energy_rating: str
potential_energy_rating: str
current_energy_efficiency: str
potential_energy_efficiency: str
property_type: str
built_form: str
inspection_date: str
local_authority: str
constituency: str
county: str
lodgement_date: str
transaction_type: str
environment_impact_current: str
environment_impact_potential: str
energy_consumption_current: str
energy_consumption_potential: str
co2_emissions_current: str
co2_emiss_curr_per_floor_area: str
co2_emissions_potential: str
lighting_cost_current: str
lighting_cost_potential: str
heating_cost_current: str
heating_cost_potential: str
hot_water_cost_current: str
hot_water_cost_potential: str
total_floor_area: str
energy_tariff: str
mains_gas_flag: str
floor_level: str
flat_top_storey: str
flat_storey_count: str
main_heating_controls: str
multi_glaze_proportion: str
glazed_type: str
glazed_area: str
extension_count: str
number_habitable_rooms: str
number_heated_rooms: str
low_energy_lighting: str
number_open_fireplaces: str
hotwater_description: str
hot_water_energy_eff: str
hot_water_env_eff: str
floor_description: str
floor_energy_eff: str
floor_env_eff: str
windows_description: str
windows_energy_eff: str
windows_env_eff: str
walls_description: str
walls_energy_eff: str
walls_env_eff: str
secondheat_description: str
sheating_energy_eff: str
sheating_env_eff: str
roof_description: str
roof_energy_eff: str
roof_env_eff: str
mainheat_description: str
mainheat_energy_eff: str
mainheat_env_eff: str
mainheatcont_description: str
mainheatc_energy_eff: str
mainheatc_env_eff: str
lighting_description: str
lighting_energy_eff: str
lighting_env_eff: str
main_fuel: str
wind_turbine_count: str
heat_loss_corridor: str
unheated_corridor_length: str
floor_height: str
photo_supply: str
solar_water_heating_flag: str
mechanical_ventilation: str
address: str
local_authority_label: str
constituency_label: str
posttown: str
construction_age_band: str
lodgement_datetime: str
tenure: str
fixed_lighting_outlets_count: str
low_energy_fixed_light_count: str
uprn: str
uprn_source: str
report_type: str

View file

@ -0,0 +1,104 @@
from dataclasses import dataclass
from typing import Optional
import pandas as pd
from botocore.exceptions import ClientError
from backend.address2UPRN.scoring import get_uprn_candidates
from backend.utils.addressMatch import AddressMatch
from datatypes.epc.domain.historic_epc import HistoricEpc
from utils.pandas_utils import pandas_cell_to_str
from utils.s3 import parse_s3_uri, read_csv_gz_from_s3
DEFAULT_S3_ROOT = "s3://retrofit-data-dev/historical_epc"
_EXTRA_COLS = {"lexiscore", "lexirank"}
def _map_historic_epc_pandas_row_to_domain(row: pd.Series) -> HistoricEpc:
kwargs = {
col.lower(): pandas_cell_to_str(val)
for col, val in row.items()
if col.lower() not in _EXTRA_COLS
}
return HistoricEpc(**kwargs)
@dataclass(frozen=True)
class ScoredHistoricEpc:
record: HistoricEpc
lexiscore: float
lexirank: int
@dataclass
class HistoricEpcMatches:
user_address: str
postcode: str
matches: list[ScoredHistoricEpc]
def top(self) -> Optional[ScoredHistoricEpc]:
return self.matches[0] if self.matches else None
def top_n(self, k: int) -> list[ScoredHistoricEpc]:
return self.matches[:k]
def unambiguous_uprn(self) -> Optional[str]:
top = self.top()
if top is None or top.lexiscore <= 0:
return None
rank1 = [m for m in self.matches if m.lexirank == top.lexirank]
uprns = {m.record.uprn for m in rank1 if m.record.uprn}
return next(iter(uprns)) if len(uprns) == 1 else None
def _sanitise_postcode(postcode: str) -> str:
cleaned = (postcode or "").upper().replace(" ", "")
if not cleaned:
raise ValueError("postcode must contain non-whitespace characters")
if not AddressMatch.is_valid_postcode(cleaned):
raise ValueError(f"postcode {cleaned!r} is not a valid UK postcode")
return cleaned
def match_addresses_for_postcode(
user_address: str,
postcode: str,
*,
s3_root: str = DEFAULT_S3_ROOT,
address_column: str = "ADDRESS",
uprn_column: str = "UPRN",
) -> HistoricEpcMatches:
if not user_address:
raise ValueError("user_address must be non-empty")
pc = _sanitise_postcode(postcode)
bucket, root_prefix = parse_s3_uri(s3_root)
key = f"{root_prefix.rstrip('/')}/{pc}/data.csv.gz"
try:
df = read_csv_gz_from_s3(bucket, key)
except ClientError as e:
if e.response.get("Error", {}).get("Code") in ("NoSuchKey", "404"):
raise FileNotFoundError(
f"No historic EPC data at s3://{bucket}/{key}"
) from e
raise
scored = get_uprn_candidates(
df,
user_address=user_address,
address_column=address_column,
uprn_column=uprn_column,
)
matches = [
ScoredHistoricEpc(
record=_map_historic_epc_pandas_row_to_domain(row),
lexiscore=float(row["lexiscore"]),
lexirank=int(row["lexirank"]),
)
for _, row in scored.iterrows()
]
return HistoricEpcMatches(user_address=user_address, postcode=pc, matches=matches)

View file

@ -52,13 +52,17 @@ from datatypes.epc.schema.rdsap_schema_21_0_1 import (
RdSapSchema21_0_1,
EnergyElement as EnergyElement_21_0_1,
)
from datatypes.epc.surveys.elmhurst_site_notes import (
ElmhurstSiteNotes,
VentilationAndCooling as ElmhurstVentilation,
Window as ElmhurstWindow,
)
from datatypes.epc.surveys.pashub_rdsap_site_notes import (
BuildingConstruction,
BuildingMeasurements,
ExtensionConstruction,
ExtensionMeasurements,
ExtensionRoofSpace,
FloorConstruction,
FloorMeasurement,
HeatingAndHotWater,
PasHubRdSapSiteNotes,
@ -201,6 +205,80 @@ class EpcPropertyDataMapper:
sap_ventilation=_map_sap_ventilation(ventilation),
)
@staticmethod
def from_elmhurst_site_notes(survey: ElmhurstSiteNotes) -> EpcPropertyData:
pd = survey.property_details
built_form = _strip_code(survey.attachment)
property_type = _strip_code(survey.property_type)
prefix = pd.house_number or pd.house_name or ""
address_line_1 = f"{prefix}, {pd.street}" if prefix else pd.street
return EpcPropertyData(
dwelling_type=f"{built_form} {property_type.lower()}",
inspection_date=pd.inspection_date,
tenure=pd.tenure,
transaction_type=pd.transaction_type,
address_line_1=address_line_1,
post_town=pd.town,
postcode=pd.postcode,
report_reference=pd.reference_number,
roofs=[],
walls=[],
floors=[],
main_heating=[],
door_count=survey.door_count,
sap_heating=_map_elmhurst_sap_heating(survey),
sap_windows=[_map_elmhurst_window(w) for w in survey.windows],
sap_energy_source=SapEnergySource(
mains_gas=survey.meters.main_gas,
meter_type=survey.meters.electricity_meter_type,
pv_battery_count=0,
wind_turbines_count=1 if survey.renewables.wind_turbine_present else 0,
gas_smart_meter_present=survey.meters.gas_smart_meter,
is_dwelling_export_capable=survey.renewables.export_capable_meter,
wind_turbines_terrain_type=survey.renewables.wind_turbines_terrain_type,
electricity_smart_meter_present=survey.meters.electricity_smart_meter,
),
sap_building_parts=[_map_elmhurst_building_part(survey)],
solar_water_heating=survey.renewables.solar_water_heating,
has_hot_water_cylinder=survey.water_heating.hot_water_cylinder_present,
has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling,
wet_rooms_count=0,
extensions_count=0,
heated_rooms_count=survey.heated_habitable_rooms,
open_chimneys_count=survey.ventilation.open_chimneys_count,
habitable_rooms_count=survey.habitable_rooms,
insulated_door_count=survey.insulated_door_count,
cfl_fixed_lighting_bulbs_count=survey.lighting.cfl_count,
led_fixed_lighting_bulbs_count=survey.lighting.led_count,
incandescent_fixed_lighting_bulbs_count=survey.lighting.incandescent_count,
total_floor_area_m2=round(
sum(f.area_m2 for f in survey.dimensions.floors), 2
),
built_form=built_form,
property_type=property_type,
has_conservatory=survey.has_conservatory,
blocked_chimneys_count=survey.ventilation.blocked_chimneys_count,
number_of_storeys=survey.number_of_storeys,
hydro=survey.renewables.hydro_electricity_generated_kwh > 0,
photovoltaic_array=survey.renewables.photovoltaic_panel != "None",
sap_ventilation=_map_elmhurst_ventilation(survey.ventilation),
percent_draughtproofed=survey.draught_proofing_percent,
waste_water_heat_recovery=(
"None" if not survey.renewables.wwhrs_present else "Present"
),
any_unheated_rooms=survey.heated_habitable_rooms < survey.habitable_rooms,
low_energy_fixed_lighting_bulbs_count=(
survey.lighting.low_energy_count if not survey.lighting.led_cfl_count_known else None
),
energy_rating_current=survey.current_sap_rating,
energy_rating_potential=survey.potential_sap_rating,
environmental_impact_current=survey.current_ei_rating,
environmental_impact_potential=survey.potential_ei_rating,
co2_emissions_current=survey.co2_emissions_current_t,
)
@staticmethod
def from_rdsap_schema_17_0(schema: RdSapSchema17_0) -> EpcPropertyData:
es = schema.sap_energy_source
@ -867,7 +945,7 @@ class EpcPropertyDataMapper:
# 20.0.0 SapWindow lacks frame/gap/draught fields present in later schemas
sap_windows=[
SapWindow(
pvc_frame="",
frame_material=None,
glazing_gap=0,
orientation=w.orientation,
window_type=w.window_type,
@ -1044,7 +1122,7 @@ class EpcPropertyDataMapper:
),
sap_windows=[
SapWindow(
pvc_frame=w.pvc_frame,
frame_material="PVC" if w.pvc_frame == "true" else None,
glazing_gap=w.glazing_gap,
orientation=w.orientation,
window_type=w.window_type,
@ -1278,7 +1356,7 @@ class EpcPropertyDataMapper:
# SAP windows
sap_windows=[
SapWindow(
pvc_frame=w.pvc_frame,
frame_material="PVC" if w.pvc_frame == "true" else None,
glazing_gap=w.glazing_gap,
orientation=w.orientation,
window_type=w.window_type,
@ -1477,6 +1555,12 @@ class EpcPropertyDataMapper:
# ---------------------------------------------------------------------------
def _strip_code(value: str) -> str:
"""Strip leading uppercase code from Elmhurst coded strings, e.g. 'CA Cavity''Cavity'."""
parts = value.split(" ", 1)
return parts[1] if len(parts) > 1 else value
def _extract_age_band(age_range: str) -> str:
"""Return the letter code from a site-notes age range, e.g. 'I: 1996 - 2002''I'."""
return age_range.split(":")[0].strip()
@ -1521,7 +1605,7 @@ def _map_main_building_part(
construction_age_band=_extract_age_band(main.age_range),
wall_construction=main.walls_construction_type,
wall_insulation_type=main.walls_insulation_type,
wall_thickness_measured=main.wall_thickness_mm > 0,
wall_thickness_measured=main.wall_thickness_mm is not None,
party_wall_construction=main.party_wall_construction_type,
sap_floor_dimensions=_map_floor_dimensions(measurements.main_building.floors),
wall_thickness_mm=main.wall_thickness_mm,
@ -1545,7 +1629,7 @@ def _map_extension_building_part(
construction_age_band=_extract_age_band(ext_c.age_range),
wall_construction=ext_c.walls_construction_type,
wall_insulation_type=ext_c.walls_insulation_type,
wall_thickness_measured=ext_c.wall_thickness_mm > 0,
wall_thickness_measured=ext_c.wall_thickness_mm is not None,
party_wall_construction=ext_c.party_wall_construction_type,
sap_floor_dimensions=_map_floor_dimensions(ext_m.floors),
wall_thickness_mm=ext_c.wall_thickness_mm,
@ -1556,7 +1640,7 @@ def _map_extension_building_part(
def _map_sap_window(window: Window) -> SapWindow:
return SapWindow(
pvc_frame=window.frame_type,
frame_material=window.frame_type,
glazing_gap=window.glazing_gap,
orientation=window.orientation,
window_type=window.window_type,
@ -1649,3 +1733,112 @@ def _map_sap_ventilation(ventilation: Ventilation) -> SapVentilation:
flueless_gas_fires_count=ventilation.number_of_flueless_gas_fires,
ventilation_in_pcdf_database=ventilation.ventilation_in_pcdf_database,
)
def _map_elmhurst_building_part(survey: ElmhurstSiteNotes) -> SapBuildingPart:
dims = survey.dimensions
floor_dims = [
SapFloorDimension(
room_height_m=f.room_height_m,
total_floor_area_m2=f.area_m2,
party_wall_length_m=f.party_wall_length_m,
heat_loss_perimeter_m=f.heat_loss_perimeter_m,
floor=i,
)
for i, f in enumerate(dims.floors)
]
return SapBuildingPart(
identifier="main",
construction_age_band=_strip_code(survey.construction_age_band),
wall_construction=_strip_code(survey.walls.wall_type),
wall_insulation_type=_strip_code(survey.walls.insulation),
wall_thickness_measured=not survey.walls.thickness_unknown,
party_wall_construction=_strip_code(survey.walls.party_wall_type),
sap_floor_dimensions=floor_dims,
wall_thickness_mm=survey.walls.thickness_mm,
roof_insulation_location=_strip_code(survey.roof.insulation),
roof_insulation_thickness=survey.roof.insulation_thickness_mm,
floor_type=_strip_code(survey.floor.location),
floor_construction_type=_strip_code(survey.floor.floor_type),
floor_insulation_type_str=_strip_code(survey.floor.insulation),
floor_u_value_known=survey.floor.u_value_known,
)
def _map_elmhurst_window(w: ElmhurstWindow) -> SapWindow:
return SapWindow(
frame_material=w.frame_type or None,
glazing_gap=w.glazing_gap or "",
orientation=w.orientation,
window_type="Window",
glazing_type=w.glazing_type,
window_width=w.width_m,
window_height=w.height_m,
draught_proofed=w.draught_proofed,
window_location=w.building_part,
window_wall_type=w.location,
permanent_shutters_present=w.permanent_shutters,
frame_factor=w.frame_factor,
window_transmission_details=WindowTransmissionDetails(
u_value=w.u_value,
solar_transmittance=w.g_value,
data_source=w.data_source,
),
)
def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating:
mh = survey.main_heating
sap_control = mh.heating_controls_sap
control = (
sap_control.split(", ", 1)[1]
if sap_control.startswith("SAP code") and ", " in sap_control
else sap_control
)
shower_outlets = (
ShowerOutlets(
shower_outlet=ShowerOutlet(
shower_outlet_type=survey.baths_and_showers.showers[0].outlet_type
)
)
if survey.baths_and_showers.showers
else None
)
return SapHeating(
instantaneous_wwhrs=InstantaneousWwhrs(),
main_heating_details=[
MainHeatingDetail(
has_fghrs=survey.renewables.flue_gas_heat_recovery_present,
main_fuel_type=mh.fuel_type,
heat_emitter_type=mh.heat_emitter,
emitter_temperature=mh.design_flow_temperature,
fan_flue_present=mh.fan_assisted_flue,
main_heating_control=control,
central_heating_pump_age_str=mh.heat_pump_age,
)
],
has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling,
shower_outlets=shower_outlets,
cylinder_size=(
None
if not survey.water_heating.hot_water_cylinder_present
else survey.water_heating.water_heating_code
),
water_heating_code=survey.water_heating.water_heating_sap_code,
)
def _map_elmhurst_ventilation(v: ElmhurstVentilation) -> SapVentilation:
return SapVentilation(
ventilation_type=None,
draught_lobby=v.draught_lobby != "Not present",
pressure_test=v.pressure_test_method,
open_flues_count=v.open_flues_count,
closed_flues_count=v.open_chimneys_closed_fire_count,
boiler_flues_count=v.solid_fuel_boiler_flues_count,
other_flues_count=v.other_heater_flues_count,
extract_fans_count=v.extract_fans_count,
passive_vents_count=v.passive_vents_count,
flueless_gas_fires_count=v.flueless_gas_fires_count,
ventilation_in_pcdf_database=None,
)

View file

@ -481,6 +481,10 @@ class TestFromRdSapSchema21_0_1:
# draught_proofed: "true"
assert result.sap_windows[0].draught_proofed is True
def test_window_frame_material_false(self, result: EpcPropertyData) -> None:
# pvc_frame: "false" in fixture → frame_material should be None
assert result.sap_windows[0].frame_material is None
# --- sap building parts ---
def test_building_part_count(self, result: EpcPropertyData) -> None:

View file

@ -398,7 +398,7 @@ class TestFromSiteNotesExample1:
# Windows
sap_windows=[
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="South East",
window_type="Window",
@ -411,7 +411,7 @@ class TestFromSiteNotesExample1:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="South East",
window_type="Window",
@ -424,7 +424,7 @@ class TestFromSiteNotesExample1:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -437,7 +437,7 @@ class TestFromSiteNotesExample1:
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@ -694,3 +694,21 @@ class TestFromSiteNotesMiscTopLevel:
def test_photovoltaic_array(self, result: EpcPropertyData) -> None:
# renewables.photovoltaic_array: false
assert result.photovoltaic_array is False
class TestUnmeasurableWallThickness:
"""wall_thickness_mm=None in site notes → wall_thickness_measured=False in domain."""
@pytest.fixture
def result(self) -> EpcPropertyData:
survey = from_dict(
PasHubRdSapSiteNotes,
load("pashub_rdsap_site_notes_example_unmeasurable_wall.json"),
)
return EpcPropertyDataMapper.from_site_notes(survey)
def test_wall_thickness_measured_is_false(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_thickness_measured is False
def test_wall_thickness_mm_is_none(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_thickness_mm is None

View file

@ -0,0 +1,239 @@
from unittest.mock import patch
import numpy as np
import pandas as pd
import pytest
from botocore.exceptions import ClientError
from datatypes.epc.domain import historic_epc_matching as matcher_mod
from datatypes.epc.domain.historic_epc_matching import (
HistoricEpcMatches,
ScoredHistoricEpc,
_sanitise_postcode,
match_addresses_for_postcode,
)
# Columns required by the HistoricEpc dataclass (lower-cased CSV columns).
# The matcher only reads ADDRESS + UPRN to score; everything else is filled
# with "" but must be present for HistoricEpc(**kwargs) to construct.
_FULL_COLUMN_FIELDS = [
"LMK_KEY", "ADDRESS1", "ADDRESS2", "ADDRESS3", "POSTCODE",
"BUILDING_REFERENCE_NUMBER", "CURRENT_ENERGY_RATING", "POTENTIAL_ENERGY_RATING",
"CURRENT_ENERGY_EFFICIENCY", "POTENTIAL_ENERGY_EFFICIENCY", "PROPERTY_TYPE",
"BUILT_FORM", "INSPECTION_DATE", "LOCAL_AUTHORITY", "CONSTITUENCY", "COUNTY",
"LODGEMENT_DATE", "TRANSACTION_TYPE", "ENVIRONMENT_IMPACT_CURRENT",
"ENVIRONMENT_IMPACT_POTENTIAL", "ENERGY_CONSUMPTION_CURRENT",
"ENERGY_CONSUMPTION_POTENTIAL", "CO2_EMISSIONS_CURRENT",
"CO2_EMISS_CURR_PER_FLOOR_AREA", "CO2_EMISSIONS_POTENTIAL",
"LIGHTING_COST_CURRENT", "LIGHTING_COST_POTENTIAL", "HEATING_COST_CURRENT",
"HEATING_COST_POTENTIAL", "HOT_WATER_COST_CURRENT", "HOT_WATER_COST_POTENTIAL",
"TOTAL_FLOOR_AREA", "ENERGY_TARIFF", "MAINS_GAS_FLAG", "FLOOR_LEVEL",
"FLAT_TOP_STOREY", "FLAT_STOREY_COUNT", "MAIN_HEATING_CONTROLS",
"MULTI_GLAZE_PROPORTION", "GLAZED_TYPE", "GLAZED_AREA", "EXTENSION_COUNT",
"NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "LOW_ENERGY_LIGHTING",
"NUMBER_OPEN_FIREPLACES", "HOTWATER_DESCRIPTION", "HOT_WATER_ENERGY_EFF",
"HOT_WATER_ENV_EFF", "FLOOR_DESCRIPTION", "FLOOR_ENERGY_EFF", "FLOOR_ENV_EFF",
"WINDOWS_DESCRIPTION", "WINDOWS_ENERGY_EFF", "WINDOWS_ENV_EFF",
"WALLS_DESCRIPTION", "WALLS_ENERGY_EFF", "WALLS_ENV_EFF",
"SECONDHEAT_DESCRIPTION", "SHEATING_ENERGY_EFF", "SHEATING_ENV_EFF",
"ROOF_DESCRIPTION", "ROOF_ENERGY_EFF", "ROOF_ENV_EFF", "MAINHEAT_DESCRIPTION",
"MAINHEAT_ENERGY_EFF", "MAINHEAT_ENV_EFF", "MAINHEATCONT_DESCRIPTION",
"MAINHEATC_ENERGY_EFF", "MAINHEATC_ENV_EFF", "LIGHTING_DESCRIPTION",
"LIGHTING_ENERGY_EFF", "LIGHTING_ENV_EFF", "MAIN_FUEL", "WIND_TURBINE_COUNT",
"HEAT_LOSS_CORRIDOR", "UNHEATED_CORRIDOR_LENGTH", "FLOOR_HEIGHT",
"PHOTO_SUPPLY", "SOLAR_WATER_HEATING_FLAG", "MECHANICAL_VENTILATION",
"ADDRESS", "LOCAL_AUTHORITY_LABEL", "CONSTITUENCY_LABEL", "POSTTOWN",
"CONSTRUCTION_AGE_BAND", "LODGEMENT_DATETIME", "TENURE",
"FIXED_LIGHTING_OUTLETS_COUNT", "LOW_ENERGY_FIXED_LIGHT_COUNT", "UPRN",
"UPRN_SOURCE", "REPORT_TYPE",
]
def _row(address: str, uprn) -> dict:
row = {col: "" for col in _FULL_COLUMN_FIELDS}
row["ADDRESS"] = address
row["UPRN"] = uprn
return row
def _build_df(rows: list[dict]) -> pd.DataFrame:
return pd.DataFrame(rows, columns=_FULL_COLUMN_FIELDS)
@pytest.fixture
def patch_postcode_valid():
with patch.object(matcher_mod.AddressMatch, "is_valid_postcode", return_value=True) as m:
yield m
@pytest.fixture
def patch_read():
with patch.object(matcher_mod, "read_csv_gz_from_s3") as m:
yield m
# ---------- _sanitise_postcode ----------
class TestSanitisePostcode:
def test_uppercases_and_strips_spaces(self, patch_postcode_valid):
assert _sanitise_postcode("ab33 8al") == "AB338AL"
def test_empty_raises(self, patch_postcode_valid):
with pytest.raises(ValueError, match="non-whitespace"):
_sanitise_postcode("")
def test_whitespace_only_raises(self, patch_postcode_valid):
with pytest.raises(ValueError, match="non-whitespace"):
_sanitise_postcode(" ")
def test_invalid_postcode_raises(self):
with patch.object(
matcher_mod.AddressMatch, "is_valid_postcode", return_value=False
):
with pytest.raises(ValueError, match="not a valid UK postcode"):
_sanitise_postcode("NONSENSE")
# ---------- match_addresses_for_postcode ----------
class TestMatchAddressesForPostcode:
def test_preserves_row_count_including_zero_score_rows(
self, patch_read, patch_postcode_valid
):
# Disjoint number sets => hard zero. Still kept in matches.
patch_read.return_value = _build_df([
_row("47 GORDON ROAD", "100"),
_row("999 SOMEWHERE ELSE", "200"),
])
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
assert isinstance(result, HistoricEpcMatches)
assert len(result.matches) == 2
def test_top_has_lexirank_one_and_lexiscore_monotone(
self, patch_read, patch_postcode_valid
):
patch_read.return_value = _build_df([
_row("48 GORDON ROAD", "200"), # near miss
_row("47 GORDON ROAD", "100"), # exact (after normalisation)
])
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
assert result.top().lexirank == 1
scores = [m.lexiscore for m in result.matches]
assert scores == sorted(scores, reverse=True)
def test_s3_key_built_from_default_root(self, patch_read, patch_postcode_valid):
patch_read.return_value = _build_df([_row("47 GORDON ROAD", "100")])
match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
patch_read.assert_called_once_with(
"retrofit-data-dev", "historical_epc/AB338AL/data.csv.gz"
)
def test_s3_key_respects_custom_root_with_trailing_slash(
self, patch_read, patch_postcode_valid
):
patch_read.return_value = _build_df([_row("47 GORDON ROAD", "100")])
match_addresses_for_postcode(
"47 Gordon Road",
"AB33 8AL",
s3_root="s3://my-bucket/some/prefix/",
)
patch_read.assert_called_once_with(
"my-bucket", "some/prefix/AB338AL/data.csv.gz"
)
def test_no_such_key_translates_to_filenotfound(
self, patch_read, patch_postcode_valid
):
patch_read.side_effect = ClientError(
{"Error": {"Code": "NoSuchKey", "Message": "missing"}}, "GetObject"
)
with pytest.raises(FileNotFoundError):
match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
def test_other_client_error_propagates(self, patch_read, patch_postcode_valid):
patch_read.side_effect = ClientError(
{"Error": {"Code": "AccessDenied", "Message": "nope"}}, "GetObject"
)
with pytest.raises(ClientError):
match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
def test_empty_user_address_raises(self, patch_postcode_valid):
with pytest.raises(ValueError, match="user_address"):
match_addresses_for_postcode("", "AB33 8AL")
# ---------- unambiguous_uprn ----------
class TestUnambiguousUprn:
def test_exact_match_returns_uprn(self, patch_read, patch_postcode_valid):
patch_read.return_value = _build_df([
_row("47 GORDON ROAD", "100"),
_row("48 GORDON ROAD", "200"),
])
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
assert result.unambiguous_uprn() == "100"
def test_ambiguous_tie_returns_none(self, patch_read, patch_postcode_valid):
# Two duplicate addresses with different UPRNs share rank-1.
patch_read.return_value = _build_df([
_row("47 GORDON ROAD", "100"),
_row("47 GORDON ROAD", "200"),
])
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
assert result.unambiguous_uprn() is None
def test_all_zero_score_returns_none_even_when_uprn_unique(
self, patch_read, patch_postcode_valid
):
# User address has building number 47; no row has 47 -> all hard-zero.
patch_read.return_value = _build_df([
_row("999 ELSEWHERE", "100"),
_row("888 ELSEWHERE", "200"),
])
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
assert all(m.lexiscore == 0.0 for m in result.matches)
assert result.unambiguous_uprn() is None
def test_nan_uprn_becomes_empty_string_not_nan(
self, patch_read, patch_postcode_valid
):
# Use a real NaN in the UPRN cell.
patch_read.return_value = _build_df([
_row("47 GORDON ROAD", np.nan),
_row("48 GORDON ROAD", "200"),
])
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
top = result.top()
# pandas_cell_to_str must turn NaN/"nan" into "" (not the literal string "nan"),
# so unambiguous_uprn's truthiness check correctly drops the row.
assert top.record.uprn == ""
# ---------- top / top_n ----------
class TestTopHelpers:
def test_top_n_returns_first_k(self, patch_read, patch_postcode_valid):
patch_read.return_value = _build_df([
_row("47 GORDON ROAD", "100"),
_row("48 GORDON ROAD", "200"),
_row("49 GORDON ROAD", "300"),
])
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
top2 = result.top_n(2)
assert len(top2) == 2
assert all(isinstance(m, ScoredHistoricEpc) for m in top2)
def test_top_on_empty_matches_returns_none(self):
empty = HistoricEpcMatches(user_address="x", postcode="AB338AL", matches=[])
assert empty.top() is None
assert empty.top_n(5) == []
assert empty.unambiguous_uprn() is None

View file

View file

@ -0,0 +1,18 @@
import csv
from datatypes.epc.domain.historic_epc import HistoricEpc
def _normalise(value: str | None) -> str:
if value is None:
return ""
return value.replace("\xa0", " ")
def read_historic_epc_csv(path: str) -> list[HistoricEpc]:
with open(path, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
return [
HistoricEpc(**{k.lower(): _normalise(v) for k, v in row.items()})
for row in reader
]

View file

@ -0,0 +1,49 @@
import os
import pytest
from datatypes.epc.loaders.historic_epc import read_historic_epc_csv
from datatypes.epc.domain.historic_epc import HistoricEpc
FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures")
class TestHistoricEpcLoading:
@pytest.fixture
def epc(self) -> HistoricEpc:
rows = read_historic_epc_csv(os.path.join(FIXTURES, "historic_epc.csv"))
return rows[0]
def test_returns_historic_epc_instance(self, epc: HistoricEpc) -> None:
assert isinstance(epc, HistoricEpc)
def test_lmk_key(self, epc: HistoricEpc) -> None:
assert epc.lmk_key == "9292c3bf26a8876ce59274401ea73e3de5bd0b3e52a507c2162a46e57db8ea2f"
def test_address1(self, epc: HistoricEpc) -> None:
assert epc.address1 == "47 GORDON ROAD"
def test_postcode(self, epc: HistoricEpc) -> None:
assert epc.postcode == "AB33 8AL"
def test_current_energy_rating(self, epc: HistoricEpc) -> None:
assert epc.current_energy_rating == "E"
def test_property_type(self, epc: HistoricEpc) -> None:
assert epc.property_type == "House"
def test_built_form(self, epc: HistoricEpc) -> None:
assert epc.built_form == "Semi-Detached"
def test_inspection_date(self, epc: HistoricEpc) -> None:
assert epc.inspection_date == "2021-04-11"
def test_uprn(self, epc: HistoricEpc) -> None:
assert epc.uprn == "151020766.0"
def test_uprn_source(self, epc: HistoricEpc) -> None:
assert epc.uprn_source == "Energy Assessor"
def test_report_type(self, epc: HistoricEpc) -> None:
assert epc.report_type == "100"

View file

@ -0,0 +1,247 @@
from dataclasses import dataclass
from datetime import date
from typing import List, Optional
@dataclass
class SurveyorInfo:
surveyor_code: str
name: str
title: str
tel_number: str
survey_reference: str
my_reference: Optional[str] = None
@dataclass
class PropertyDetails:
rdsap_version: str
reference_number: str
lodgement_required: bool
regs_region: str
epc_language: str
postcode: str
region: str
street: str
town: str
tenure: str
transaction_type: str
inspection_date: date
process_date: date
epc_exists: bool
uprn: Optional[str] = None
house_name: Optional[str] = None
house_number: Optional[str] = None
locality: Optional[str] = None
county: Optional[str] = None
@dataclass
class FloorDimension:
name: str # e.g. "Lowest Floor"
area_m2: float
room_height_m: float
heat_loss_perimeter_m: float
party_wall_length_m: float
@dataclass
class BuildingPartDimensions:
dimension_type: str # e.g. "Internal"
floors: List[FloorDimension]
@dataclass
class WallDetails:
wall_type: str # e.g. "CA Cavity"
insulation: str # e.g. "F Filled Cavity"
thickness_unknown: bool
u_value_known: bool
party_wall_type: str # e.g. "U Unable to determine"
thickness_mm: Optional[int] = None
@dataclass
class RoofDetails:
roof_type: str # e.g. "PA Pitched (slates/tiles), access to loft"
insulation: str # e.g. "J Joists"
u_value_known: bool
insulation_thickness_mm: Optional[int] = None
@dataclass
class FloorDetails:
location: str # e.g. "G Ground floor"
floor_type: str # e.g. "N Suspended, not timber"
insulation: str # e.g. "A As built"
u_value_known: bool
default_u_value: Optional[float] = None
@dataclass
class Window:
width_m: float
height_m: float
area_m2: float
glazing_type: str
frame_factor: float
building_part: str
location: str
orientation: str
data_source: str
u_value: float
g_value: float
draught_proofed: bool
permanent_shutters: str # e.g. "None"
frame_type: Optional[str] = None
glazing_gap: Optional[str] = None
@dataclass
class VentilationAndCooling:
open_chimneys_count: int
open_flues_count: int
open_chimneys_closed_fire_count: int
solid_fuel_boiler_flues_count: int
other_heater_flues_count: int
blocked_chimneys_count: int
extract_fans_count: int
passive_vents_count: int
flueless_gas_fires_count: int
fixed_space_cooling: bool
draught_lobby: str # e.g. "Not present"
mechanical_ventilation: bool
pressure_test_method: str # e.g. "Not available"
@dataclass
class Lighting:
total_bulbs: int
led_cfl_count_known: bool
led_count: int
cfl_count: int
incandescent_count: int
low_energy_count: int = 0
@dataclass
class MainHeating:
heat_emitter: str # e.g. "Radiators"
fuel_type: str # e.g. "Mains gas"
flue_type: str # e.g. "Balanced"
fan_assisted_flue: bool
design_flow_temperature: str # e.g. "Unknown"
heating_controls_ees: str # e.g. "CBE"
heating_controls_sap: (
str # e.g. "SAP code 2106, Programmer, room thermostat and TRVs"
)
percentage_of_heat: int
pcdf_boiler_reference: Optional[str] = (
None # e.g. "17742 Potterton, Promax 33 Combi ErP, 88.30%"
)
heat_pump_age: Optional[str] = None
@dataclass
class Meters:
electricity_meter_type: str # e.g. "Single"
main_gas: bool
electricity_smart_meter: bool
gas_smart_meter: bool
@dataclass
class WaterHeating:
water_heating_code: str # e.g. "HWP"
water_heating_sap_code: int
water_heating_fuel_type: str
hot_water_cylinder_present: bool
@dataclass
class Shower:
shower_number: int
outlet_type: str
connected: str # e.g. "None"
@dataclass
class BathsAndShowers:
number_of_baths: int
number_of_baths_connected: int
showers: List[Shower]
@dataclass
class Renewables:
solar_water_heating: bool
wwhrs_present: bool
flue_gas_heat_recovery_present: bool
photovoltaic_panel: str # e.g. "None"
export_capable_meter: bool
wind_turbine_present: bool
wind_turbines_terrain_type: str
hydro_electricity_generated_kwh: float
@dataclass
class ElmhurstSiteNotes:
surveyor_info: SurveyorInfo
property_details: PropertyDetails
# Summary Information
current_sap_rating: int
potential_sap_rating: int
current_ei_rating: int
potential_ei_rating: int
co2_emissions_current_t: float
# Section 1.0
property_type: str # e.g. "B Bungalow"
attachment: str # e.g. "E End-Terrace"
# Section 2.0
number_of_storeys: int
habitable_rooms: int
heated_habitable_rooms: int
# Section 3.0
construction_age_band: str # e.g. "D 1950-1966"
# Section 4.0
dimensions: BuildingPartDimensions
# Section 5.0
has_conservatory: bool
# Sections 7.09.0
walls: WallDetails
roof: RoofDetails
floor: FloorDetails
# Section 10.0
door_count: int
insulated_door_count: int
# Section 11.0
windows: List[Window]
draught_proofing_percent: int
# Section 12.0
ventilation: VentilationAndCooling
# Section 13.0
lighting: Lighting
# Section 14.014.2
main_heating: MainHeating
meters: Meters
# Section 15.0
water_heating: WaterHeating
# Section 1x.0
baths_and_showers: BathsAndShowers
# Sections 16.022.0
renewables: Renewables

View file

@ -44,7 +44,7 @@ class MainBuildingConstruction:
walls_insulation_type: str
thermal_conductivity_of_wall_insulation: str
wall_u_value_known: bool
wall_thickness_mm: int
wall_thickness_mm: Optional[int]
party_wall_construction_type: str
filled_cavity_indicators: Optional[str] = None
@ -59,7 +59,7 @@ class ExtensionConstruction:
walls_insulation_type: str
thermal_conductivity_of_wall_insulation: str
wall_u_value_known: bool
wall_thickness_mm: int
wall_thickness_mm: Optional[int]
party_wall_construction_type: str
filled_cavity_indicators: Optional[str] = None

View file

@ -0,0 +1,190 @@
{
"inspection_metadata": {
"inspection_surveyor": "test",
"email_address": "test@test.com",
"report_reference": "49D422A9-0779-44DD-9665-464D35DFF1A8",
"created_on": "2026-03-31",
"date_of_inspection": "2026-03-31",
"property_address": "1, Test Street, Test Town, Test County, TE1 1ST"
},
"general": {
"epc_checked_before_assessment": true,
"epc_exists_at_point_of_assessment": false,
"inspection_date": "2026-03-31",
"transaction_type": "None of the Above",
"tenure": "Rented Social",
"property_type": "House",
"detachment_type": "Mid-terrace",
"number_of_storeys": 2,
"terrain_type": "Suburban",
"number_of_extensions": 0,
"electricity_smart_meter": true,
"electric_meter_type": "Single",
"dwelling_export_capable": true,
"mains_gas_available": true,
"gas_smart_meter": true,
"gas_meter_accessible": true,
"measurements_location": "Internal"
},
"building_construction": {
"main_building": {
"age_range": "I: 1996 - 2002",
"age_indicators": "local knowledge",
"walls_construction_type": "Cavity",
"cavity_construction_indicators": "stretcher bond",
"walls_insulation_type": "As built",
"thermal_conductivity_of_wall_insulation": "Unknown",
"wall_u_value_known": false,
"wall_thickness_mm": null,
"party_wall_construction_type": "Cavity Masonry, Unfilled"
},
"floor": {
"floor_type": "Ground Floor",
"floor_construction": "Suspended, not timber",
"floor_insulation_type": "As Built",
"floor_u_value_known": false
}
},
"building_measurements": {
"main_building": {
"floors": [
{
"name": "Floor 1",
"area_m2": 24.78,
"height_m": 2.37,
"heat_loss_perimeter_m": 14.21,
"pwl_m": 6.15
},
{
"name": "Floor 0",
"area_m2": 24.78,
"height_m": 2.35,
"heat_loss_perimeter_m": 14.21,
"pwl_m": 6.15
}
]
}
},
"roof_space": {
"main_building": {
"construction_type": "Pitched roof (Slates or tiles), Access to loft",
"insulation_at": "Joists",
"roof_u_value_known": false,
"insulation_thickness_mm": 100,
"cavity_wall_construction_indicators": "No indicator of construction visible",
"rooms_in_roof": false
}
},
"windows": [
{
"id": 1,
"location": "Main Building",
"wall_type": "External wall",
"glazing_type": "Double glazing, Unknown install date",
"window_type": "Window",
"frame_type": "Wooden or PVC",
"glazing_gap": "16 mm or more",
"draught_proofed": true,
"permanent_shutters": false,
"height_m": 1.36,
"width_m": 1.0,
"orientation": "South East"
}
],
"heating_and_hot_water": {
"main_heating": {
"selection_method": "PCDF Search",
"system_type": "Boiler with radiators or underfloor heating",
"product_id": 18400,
"manufacturer": "Vaillant",
"model": "ecoFIT sustain 415",
"orig_manufacturer": "Vaillant",
"fuel": "Mains gas",
"summer_efficiency": 0,
"type": "Regular",
"condensing": true,
"year": "2018 - current",
"mount": "Wall",
"open_flue": "Room-sealed",
"fan_assist": true,
"status": "Normal status for an actual product",
"central_heating_pump_age": "Unknown",
"controls": "Programmer, room thermostat and TRVs",
"flue_gas_heat_recovery_system": false,
"weather_compensator": false,
"emitter": "Radiators",
"emitter_temperature": "Unknown"
},
"secondary_heating": {
"secondary_fuel": "No Secondary Heating"
},
"water_heating": {
"type": "Regular",
"system": "From main heating 1",
"cylinder_size": "Normal (90-130 litres)",
"cylinder_measured_heat_loss": "Not known",
"insulation_type": "Factory fitted",
"insulation_thickness_mm": 12,
"has_thermostat": true
}
},
"ventilation": {
"ventilation_type": "Natural",
"has_fixed_air_conditioning": false,
"number_of_open_flues": 0,
"number_of_closed_flues": 0,
"number_of_boiler_flues": 0,
"number_of_other_flues": 0,
"number_of_extract_fans": 2,
"number_of_passive_vents": 0,
"number_of_flueless_gas_fires": 0,
"pressure_test": "No test",
"draught_lobby": false
},
"conservatories": {
"has_conservatory": false
},
"renewables": {
"wind_turbines": false,
"solar_hot_water": false,
"photovoltaic_array": false,
"number_of_pv_batteries": 0,
"hydro": false
},
"room_count_elements": {
"number_of_habitable_rooms": 2,
"any_unheated_rooms": true,
"number_of_heated_rooms": 0,
"number_of_external_doors": 2,
"number_of_insulated_external_doors": 0,
"number_of_draughtproofed_external_doors": 2,
"number_of_open_chimneys": 0,
"number_of_blocked_chimneys": 0,
"number_of_fixed_incandescent_bulbs": 0,
"exact_led_cfl_count_known": true,
"number_of_fixed_led_bulbs": 5,
"number_of_fixed_cfl_bulbs": 4,
"waste_water_heat_recovery": "None"
},
"water_use": {
"number_of_baths": 1,
"number_of_special_features": 0,
"showers": [
{
"id": 1,
"outlet_type": "Non-Electric Shower"
}
]
},
"customer_response": {
"customer_present": true,
"willing_to_answer_satisfaction_survey": false
},
"addendum": {
"addendum": "None",
"related_party_disclosure": "No related party",
"hard_to_treat_cavity_access_issues": false,
"hard_to_treat_cavity_high_exposure": false,
"hard_to_treat_cavity_narrow_cavities": false
}
}

View file

View file

View file

@ -0,0 +1,292 @@
from typing import Any, Optional, Union
from pydantic import BaseModel, ConfigDict, Field
_IGNORE = ConfigDict(extra="ignore")
_IGNORE_POPULATE = ConfigDict(extra="ignore", populate_by_name=True)
class Vec3(BaseModel):
model_config = _IGNORE
x: float
y: float
z: float
class Symbol(BaseModel):
model_config = _IGNORE
id: str
name: str
description: Optional[str] = None
valid: bool
class FieldValue(BaseModel):
model_config = _IGNORE
index: int
has_value: bool
is_array: bool
value: Union[str, list[str]]
class SurveyField(BaseModel):
model_config = _IGNORE
id: str
type: int
type_as_string: str
is_required: bool
label: str
description: Optional[str] = None
list_values: list[str] = []
value: FieldValue
class ImageMapEntry(BaseModel):
model_config = _IGNORE
symbol_id: str
uid: str
owner_uid: str
type: int
coordinates: list[int]
class FormattedDimensions(BaseModel):
model_config = _IGNORE
width: Optional[str] = None
depth: Optional[str] = None
height: Optional[str] = None
class FormattedMeasures(BaseModel):
model_config = _IGNORE
width: Optional[str] = None
depth: Optional[str] = None
height: Optional[str] = None
area: Optional[str] = None
area_without_walls: Optional[str] = None
area_with_interior_walls_only: Optional[str] = None
area_with_walls: Optional[str] = None
doors_surface: Optional[str] = None
walls_surface: Optional[str] = None
walls_surface_without_openings: Optional[str] = None
windows_surface: Optional[str] = None
perimeter: Optional[str] = None
ground_perimeter: Optional[str] = None
living_area: Optional[str] = None
below_grade_living_area: Optional[str] = None
above_grade_living_area: Optional[str] = None
exterior_perimeter: Optional[str] = None
volume: Optional[str] = None
class Address(BaseModel):
model_config = _IGNORE
street: Optional[str] = None
street_number: Optional[str] = None
postal_code: Optional[str] = None
city: Optional[str] = None
country: Optional[str] = None
longitude: Optional[float] = None
latitude: Optional[float] = None
class CreatedBy(BaseModel):
model_config = _IGNORE
id: str
email: str
firstname: Optional[str] = None
lastname: Optional[str] = None
class Location(BaseModel):
model_config = _IGNORE
valid: bool
longitude: float
latitude: float
altitude: float
class ItemBase(BaseModel):
model_config = _IGNORE
uid: str
symbol: Symbol
size: Vec3
position: Vec3
rotation: Vec3
formatted: Optional[FormattedDimensions] = None
images: list[Any] = []
notes: Optional[str] = None
displayable_fields: list[SurveyField] = []
custom_displayable_fields: list[SurveyField] = []
class WallItem(ItemBase):
pass
class Furniture(ItemBase):
pass
class SymbolInstance(ItemBase):
pass
class Wall(BaseModel):
model_config = _IGNORE
uid: str
symbol: Symbol
length: float
images: list[Any] = []
notes: Optional[str] = None
displayable_fields: list[SurveyField] = []
custom_displayable_fields: list[SurveyField] = []
class Room(BaseModel):
model_config = _IGNORE
name: str
uid: str
symbol: Optional[Symbol] = None
size: Vec3
position: Vec3
rotation: Vec3
area: float
perimeter: Optional[float] = None
ground_perimeter: Optional[float] = None
area_without_walls: Optional[float] = None
area_with_interior_walls_only: Optional[float] = None
area_with_walls: Optional[float] = None
wall_count: Optional[int] = None
wall_count_with_interior_walls: Optional[int] = None
corner_count_with_interior_walls: Optional[int] = None
door_count: Optional[int] = None
window_count: Optional[int] = None
height: Optional[float] = None
volume: Optional[float] = None
width: Optional[float] = None
doors_surface: Optional[float] = None
walls_surface: Optional[float] = None
walls_surface_without_openings: Optional[float] = None
windows_surface: Optional[float] = None
dimensions: Optional[str] = None
room_type: Optional[str] = None
furniture_count: Optional[int] = None
image: Optional[str] = None
image_map: list[ImageMapEntry] = []
images: list[Any] = []
notes: Optional[str] = None
formatted: Optional[FormattedMeasures] = None
displayable_fields: list[SurveyField] = []
custom_displayable_fields: list[SurveyField] = []
wall_items: list[WallItem] = []
furnitures: list[Furniture] = []
walls: list[Wall] = []
class Floor(BaseModel):
model_config = _IGNORE
uid: str
symbol: Optional[Symbol] = None
size: Vec3
position: Vec3
rotation: Vec3
name: Optional[str] = None
area: Optional[float] = None
perimeter: Optional[float] = None
ground_perimeter: Optional[float] = None
area_without_walls: Optional[float] = None
area_with_interior_walls_only: Optional[float] = None
area_with_walls: Optional[float] = None
wall_count: Optional[int] = None
wall_count_with_interior_walls: Optional[int] = None
corner_count_with_interior_walls: Optional[int] = None
door_count: Optional[int] = None
window_count: Optional[int] = None
bathrooms_count: Optional[int] = None
bedrooms_count: Optional[int] = None
doors_surface: Optional[float] = None
floor_type: Optional[Union[int, str]] = None
furniture_count: Optional[int] = None
height: Optional[float] = None
level: Optional[int] = None
room_count: Optional[int] = None
volume: Optional[float] = None
walls_surface: Optional[float] = None
walls_surface_without_openings: Optional[float] = None
windows_surface: Optional[float] = None
image: Optional[str] = None
image_map: list[ImageMapEntry] = []
images: list[Any] = []
notes: Optional[str] = None
formatted: Optional[FormattedMeasures] = None
displayable_fields: list[SurveyField] = []
custom_displayable_fields: list[SurveyField] = []
rooms: list[Room] = []
furnitures: list[Furniture] = []
symbol_instances: list[SymbolInstance] = []
class PlanBody(BaseModel):
model_config = _IGNORE
uid: str
name: Optional[str] = None
symbol: Optional[Symbol] = None
size: Vec3
position: Vec3
rotation: Vec3
area: Optional[float] = None
location: Location
floors: list[Floor] = []
images: list[Any] = []
notes: Optional[str] = None
formatted: Optional[FormattedMeasures] = None
displayable_fields: list[SurveyField] = []
custom_displayable_fields: list[SurveyField] = []
customer: list[Any] = []
custom_attributes: list[Any] = []
class PlanDetail(BaseModel):
model_config = _IGNORE
extension_version: Optional[str] = None
wrapper_version: Optional[str] = None
document_version: Optional[str] = None
last_modification_date: Optional[Union[int, str]] = None
plan: PlanBody
class PlanSummary(BaseModel):
model_config = _IGNORE_POPULATE
id: str
project_id: Optional[str] = None
name: str
address: Optional[Address] = None
creation_date: Optional[str] = None
update_date: Optional[str] = None
thumbnail_url: Optional[str] = None
public_url: Optional[str] = None
cloud_url: Optional[str] = None
url_3d: Optional[str] = Field(default=None, alias="3d_url")
workgroup_id: Optional[str] = None
team_id: Optional[str] = None
created_by: Optional[CreatedBy] = None
class Paging(BaseModel):
model_config = _IGNORE
page: int
next_page: bool
count: int
class PlansListResponse(BaseModel):
model_config = _IGNORE
paging: Paging
plans: list[PlanSummary] = []
class MagicPlanPlan(BaseModel):
model_config = _IGNORE
plan: PlanSummary
plan_detail: PlanDetail

Some files were not shown because too many files have changed in this diff Show more