diff --git a/.devcontainer/backend/Dockerfile b/.devcontainer/backend/Dockerfile
index ebe405a0..9a75ffc7 100644
--- a/.devcontainer/backend/Dockerfile
+++ b/.devcontainer/backend/Dockerfile
@@ -18,15 +18,6 @@ RUN curl -fsSL https://github.com/neovim/neovim/releases/latest/download/nvim-li
| tar -xz -C /opt \
&& ln -s /opt/nvim-linux-x86_64/bin/nvim /usr/local/bin/nvim
-# # 2) Build and install libpostal from source
-# RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
-# && cd /tmp/libpostal \
-# && ./bootstrap.sh \
-# && ./configure --datadir=/usr/local/share/libpostal \
-# && make -j"$(nproc)" \
-# && make install \
-# && ldconfig \
-# && rm -rf /tmp/libpostal
# 3) Create the user and grant sudo privileges
RUN groupadd -g ${USER_GID} ${USER} \
@@ -34,10 +25,7 @@ RUN groupadd -g ${USER_GID} ${USER} \
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
&& chmod 0440 /etc/sudoers.d/${USER}
-# # 4) Python deps - if you want to run assest list
-# ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
-# ADD asset_list/requirements.txt requirements.txt
-# RUN pip install -r requirements.txt
+
#
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
@@ -75,26 +63,27 @@ RUN wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key
RUN apt update
RUN apt install -y postgresql-14
-# Install Node.js + backlog.md
+# Install Node.js
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \
- && npm install -g backlog.md \
&& rm -rf /var/lib/apt/lists/*
+# GitHub CLI — used by the postCreate skill installer to authenticate against
+# private Hestia-Homes repos via the host's mounted ~/.config/gh.
+RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+ | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
+ && chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
+ && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
+ > /etc/apt/sources.list.d/github-cli.list \
+ && apt update && apt install -y gh \
+ && rm -rf /var/lib/apt/lists/*
+
USER ${USER}
# Bootstrap LazyVim starter config
RUN git clone https://github.com/LazyVim/starter /home/${USER}/.config/nvim \
&& rm -rf /home/${USER}/.config/nvim/.git
-# Install Claude + plugins + skills
-RUN curl -fsSL https://claude.ai/install.sh | bash \
- && export PATH="/home/${USER}/.local/bin:${PATH}" \
- && claude plugin marketplace add JuliusBrussee/caveman \
- && claude plugin install caveman@caveman \
- && npx skills@latest add --global --yes mattpocock/skills/grill-me \
- && npx skills@latest add --global --yes mattpocock/skills/to-prd \
- && npx skills@latest add --global --yes mattpocock/skills/ubiquitous-language \
- && npx skills@latest add --global --yes mattpocock/skills/tdd \
- && npx skills@latest add --global --yes mattpocock/skills/improve-codebase-architecture
+# Install Claude Code CLI (skills are installed via postCreate from Hestia-Homes/agentic-toolkit)
+RUN curl -fsSL https://claude.ai/install.sh | bash
ENV PATH="/home/vscode/.local/bin:${PATH}"
USER root
diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json
index 54e45095..24949770 100644
--- a/.devcontainer/backend/devcontainer.json
+++ b/.devcontainer/backend/devcontainer.json
@@ -4,7 +4,8 @@
"service": "model-backend",
"remoteUser": "vscode",
"workspaceFolder": "/workspaces/model",
- "initializeCommand": "docker network create shared-dev 2>/dev/null || true",
+ "initializeCommand": "docker network create shared-dev 2>/dev/null || true; test -d \"$HOME/.config/gh\" || test -n \"$GITHUB_TOKEN\" || { echo >&2 'error: no GitHub auth found. Run `gh auth login && gh auth setup-git` on the host, or export GITHUB_TOKEN, then retry.'; exit 1; }",
+ "postCreateCommand": "gh repo clone Hestia-Homes/agentic-toolkit /tmp/agentic-toolkit -- --branch 0.0.5 --depth 1 && bash /tmp/agentic-toolkit/setup.sh",
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
"mounts": [
"source=${localEnv:HOME},target=/workspaces/home,type=bind",
@@ -41,12 +42,8 @@
"containerEnv": {
"PYTHONFLAGS": "-Xfrozen_modules=off"
},
- "forwardPorts": [6421, 8000],
+ "forwardPorts": [8000],
"portsAttributes": {
- "6421": {
- "label": "Backlog.md",
- "onAutoForward": "notify"
- },
"8000": {
"label": "FastAPI",
"onAutoForward": "notify"
diff --git a/.devcontainer/backend/docker-compose.yml b/.devcontainer/backend/docker-compose.yml
index 757cfbe0..cf3bb2c0 100644
--- a/.devcontainer/backend/docker-compose.yml
+++ b/.devcontainer/backend/docker-compose.yml
@@ -14,8 +14,13 @@ services:
volumes:
- ../../:/workspaces/model
- ~/.gitconfig:/home/vscode/.gitconfig:ro
+ # GitHub CLI auth from host (created by `gh auth login`). Used by the
+ # postCreate skill installer to clone private Hestia-Homes repos.
+ - ~/.config/gh:/home/vscode/.config/gh:ro
environment:
- SSH_AUTH_SOCK=${SSH_AUTH_SOCK:-}
+ # Fallback HTTPS auth if ~/.config/gh isn't present on the host.
+ - GITHUB_TOKEN=${GITHUB_TOKEN:-}
networks:
- backend-net
- shared-dev
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index dede3162..573c4f7c 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -31,17 +31,19 @@ from recommendations.recommendation_utils import (
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
-from dotenv import load_dotenv
+# from dotenv import load_dotenv
logger = setup_logger()
-load_dotenv(dotenv_path="../backend/.env")
+# load_dotenv(dotenv_path="../backend/.env")
# OpenAI API Key (set this in your environment variables for security)
-OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+# OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
class DataRemapper:
- def __init__(self, standard_values, standard_map=None, max_tokens=1000):
+ def __init__(
+ self, standard_values, standard_map=None, max_tokens=1000, api_key=None
+ ):
"""
Initialize the remapper with standard values and a predefined mapping.
@@ -75,7 +77,8 @@ class DataRemapper:
"gpt-3.5-turbo": {"input": 0.0015 / 1000, "output": 0.002 / 1000},
}
- self.openai_client = OpenAI(api_key=OPENAI_API_KEY)
+ print(f"DATA REMAPPER api key is {api_key}")
+ self.openai_client = OpenAI(api_key=api_key)
@staticmethod
def clean_string(text):
@@ -136,12 +139,20 @@ class DataRemapper:
raise ValueError("Input tokens exceed the maximum limit.")
logger.info("Calling OpenAI API for standardization...")
- response = self.openai_client.chat.completions.create(
- model=self.ai_model,
- messages=[{"role": "user", "content": prompt}],
- max_tokens=self.max_tokens,
- temperature=0.1,
- )
+
+ try:
+ response = self.openai_client.chat.completions.create(
+ model=self.ai_model,
+ messages=[{"role": "user", "content": prompt}],
+ max_tokens=self.max_tokens,
+ temperature=0.1,
+ )
+ except Exception as e:
+ print(f"[debug] OpenAI call failed. type={type(e).__name__}")
+ print(f"[debug] status={getattr(e, 'status_code', None)}")
+ print(f"[debug] body={getattr(e, 'response', None) and e.response.text}")
+ print(f"[debug] model={self.ai_model}")
+ raise
output_text = response.choices[0].message.content.strip()
output_tokens = self.count_tokens(output_text) # Count output tokens
@@ -504,6 +515,7 @@ class AssetList:
landlord_block_reference=None,
phase=False,
header=0,
+ openai_api_key=None,
):
self.local_filepath = local_filepath
self.sheet_name = sheet_name
@@ -529,6 +541,7 @@ class AssetList:
self.ecosurv = None
self.ecosurv_no_match = pd.DataFrame()
self.geographical_areas = pd.DataFrame()
+ self.openai_api_key = openai_api_key
# When this is True, we intend to break the programme into multiple phases. We may need to review
# how this is structured in the future, as depending on how we get future data, we may need to
@@ -1107,6 +1120,7 @@ class AssetList:
remapper = DataRemapper(
standard_values=config["standard_values"],
standard_map=config["standard_map"],
+ api_key=self.openai_api_key,
)
remap_dictionary = remapper.standardize_list(
values_to_remap=values_to_remap.tolist()
@@ -1296,8 +1310,8 @@ class AssetList:
self.standardised_asset_list[
self.ATTRIBUTE_HAS_SOLAR
] = self.standardised_asset_list[
- self.FIND_EPC_DATA_NAMES["Solar photovoltaics"]
- ] | ~self.standardised_asset_list[
+ self.FIND_EPC_DATA_NAMES["Solar photovoltaics"]
+ ] | ~self.standardised_asset_list[
self.EPC_API_DATA_NAMES["photo-supply"]
].isin(
["0.0", 0, None, "", np.nan]
@@ -1315,7 +1329,7 @@ class AssetList:
property_type=(
str(x[self.STANDARD_PROPERTY_TYPE]).title()
if str(x[self.STANDARD_PROPERTY_TYPE]).title()
- in accepted_epc_property_types
+ in accepted_epc_property_types
else (
x[self.EPC_API_DATA_NAMES["property-type"]]
if not pd.isnull(
@@ -1373,9 +1387,9 @@ class AssetList:
self.standardised_asset_list.apply(
lambda x: estimate_perimeter(
floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]]
- / x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
+ / x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]]
- / x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
+ / x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
),
axis=1,
)
@@ -1460,7 +1474,7 @@ class AssetList:
year_lower_bound = (
2007
if x[self.EPC_API_DATA_NAMES["construction-age-band"]]
- == "England and Wales: 2007 onwards"
+ == "England and Wales: 2007 onwards"
else 2012
)
@@ -1515,7 +1529,7 @@ class AssetList:
age_band_matches = (
"EPC Age Band Matches Year Built"
if x[self.STANDARD_YEAR_BUILT]
- == int(x[self.EPC_API_DATA_NAMES["construction-age-band"]])
+ == int(x[self.EPC_API_DATA_NAMES["construction-age-band"]])
else "EPC Age Band is different from Year Built"
)
@@ -1545,7 +1559,7 @@ class AssetList:
age_band_matches = (
"EPC Age Band Matches Year Built"
if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date))
- and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date))
+ and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date))
else (
"EPC Age Band is older than Year Built"
if x[self.STANDARD_YEAR_BUILT] > float(upper_date)
@@ -1717,22 +1731,22 @@ class AssetList:
if self.non_intrusives_present:
if self.new_format_non_insturives_present_v2:
non_intrusives_wall_filter = (
- self.standardised_asset_list["non-intrusives: Construction"]
- == "CAVITY"
- ) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
+ self.standardised_asset_list["non-intrusives: Construction"]
+ == "CAVITY"
+ ) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
["EMPTY", "PARTIAL", "EMPTY CAVITY"]
)
else:
non_intrusives_wall_filter = (
- self.standardised_asset_list["non-intrusives: Construction"]
- == "CAVITY"
- ) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
+ self.standardised_asset_list["non-intrusives: Construction"]
+ == "CAVITY"
+ ) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
["EMPTY", "PARTIAL"]
)
elif self.old_format_non_intrusives_present:
non_intrusives_wall_filter = self.standardised_asset_list[
- "non-intrusives: WFT Findings"
- ].str.lower().str.strip().isin(
+ "non-intrusives: WFT Findings"
+ ].str.lower().str.strip().isin(
[
"empty cavity",
"partial fill",
@@ -1742,18 +1756,18 @@ class AssetList:
"empty cav",
]
) | (
- (
- self.standardised_asset_list["non-intrusives: WFT Findings"]
- .str.lower()
- .str.strip()
- .str.contains("empty cavity|partial fill")
- & ~self.standardised_asset_list["non-intrusives: WFT Findings"]
- .astype(str)
- .str.lower()
- .str.strip()
- .str.contains("major access issues")
- )
- )
+ (
+ self.standardised_asset_list["non-intrusives: WFT Findings"]
+ .str.lower()
+ .str.strip()
+ .str.contains("empty cavity|partial fill")
+ & ~self.standardised_asset_list["non-intrusives: WFT Findings"]
+ .astype(str)
+ .str.lower()
+ .str.strip()
+ .str.contains("major access issues")
+ )
+ )
else:
# We set the filter to False, as we have no non-intrusives
non_intrusives_wall_filter = False
@@ -1765,12 +1779,12 @@ class AssetList:
)
else:
year_built_filter = (
- self.standardised_asset_list[self.STANDARD_YEAR_BUILT]
- <= self.EMPTY_CAVITY_YEAR_THRESHOLD
- ) | (
- self.standardised_asset_list["epc_year_upper_bound"]
- <= self.EMPTY_CAVITY_YEAR_THRESHOLD
- )
+ self.standardised_asset_list[self.STANDARD_YEAR_BUILT]
+ <= self.EMPTY_CAVITY_YEAR_THRESHOLD
+ ) | (
+ self.standardised_asset_list["epc_year_upper_bound"]
+ <= self.EMPTY_CAVITY_YEAR_THRESHOLD
+ )
# Criteria:
# The property isn't a bedsit
@@ -1811,8 +1825,8 @@ class AssetList:
] = (
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]
& ~self.standardised_asset_list[
- "non_intrusive_indicates_empty_cavity_has_solar"
- ]
+ "non_intrusive_indicates_empty_cavity_has_solar"
+ ]
& (
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(
["bedsit"]
@@ -1888,8 +1902,8 @@ class AssetList:
.str.lower()
.isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS)
| self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(
- ["uninsulated cavity"]
- )
+ ["uninsulated cavity"]
+ )
)
######################################################
@@ -1926,8 +1940,8 @@ class AssetList:
extraction_wall_filter = (
extraction_wall_filter
& ~self.standardised_asset_list[
- "non-intrusives: Eligibility (Red/Yellow/Green)"
- ].isin(["RED"])
+ "non-intrusives: Eligibility (Red/Yellow/Green)"
+ ].isin(["RED"])
)
self.standardised_asset_list[
@@ -2023,26 +2037,26 @@ class AssetList:
self.standardised_asset_list[
"solar_epc_data_indicates_correct_heating_system"
] = (
- self.standardised_asset_list[
- self.EPC_API_DATA_NAMES["mainheat-description"]
- ]
- .str.lower()
- .str.contains(
- "air source heat pump|ground source heat pump|boiler and radiators, electric"
- )
- ) | (
- self.standardised_asset_list[
- self.EPC_API_DATA_NAMES["mainheat-description"]
- ]
- .str.lower()
- .str.contains("electric storage heaters")
- & (
- self.standardised_asset_list[
- self.EPC_API_DATA_NAMES["mainheatcont-description"]
- ]
- == "Controls for high heat retention storage heaters"
- )
+ self.standardised_asset_list[
+ self.EPC_API_DATA_NAMES["mainheat-description"]
+ ]
+ .str.lower()
+ .str.contains(
+ "air source heat pump|ground source heat pump|boiler and radiators, electric"
)
+ ) | (
+ self.standardised_asset_list[
+ self.EPC_API_DATA_NAMES["mainheat-description"]
+ ]
+ .str.lower()
+ .str.contains("electric storage heaters")
+ & (
+ self.standardised_asset_list[
+ self.EPC_API_DATA_NAMES["mainheatcont-description"]
+ ]
+ == "Controls for high heat retention storage heaters"
+ )
+ )
# If the landlord has given us the heating system, we default to that on heating upgrades. Because of the
# poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the
@@ -2050,25 +2064,25 @@ class AssetList:
self.standardised_asset_list[
"solar_epc_data_indicates_requires_heating_upgrade"
] = (
+ self.standardised_asset_list[
+ self.EPC_API_DATA_NAMES["mainheat-description"]
+ ]
+ .str.lower()
+ .str.contains("electric storage heaters|room heaters")
+ & (
self.standardised_asset_list[
- self.EPC_API_DATA_NAMES["mainheat-description"]
+ self.EPC_API_DATA_NAMES["mainheatcont-description"]
]
- .str.lower()
- .str.contains("electric storage heaters|room heaters")
- & (
- self.standardised_asset_list[
- self.EPC_API_DATA_NAMES["mainheatcont-description"]
- ]
- != "Controls for high heat retention storage heaters"
- )
- ) & (
- ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
- ["district heating", "communal heating", "communal gas boiler"]
- )
- & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM]
- .astype(str)
- .str.contains("gas ")
+ != "Controls for high heat retention storage heaters"
)
+ ) & (
+ ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
+ ["district heating", "communal heating", "communal gas boiler"]
+ )
+ & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM]
+ .astype(str)
+ .str.contains("gas ")
+ )
# Basic check - both of the previous two shouldn't be true simultaneously
if (
@@ -2148,8 +2162,8 @@ class AssetList:
self.standardised_asset_list[
"solar_non_intrusives_walls_insulated"
] = self.standardised_asset_list[
- "non-intrusives: WFT Findings"
- ].str.lower().str.strip().isin(
+ "non-intrusives: WFT Findings"
+ ].str.lower().str.strip().isin(
[
"retro drilled",
"retro filled",
@@ -2158,8 +2172,8 @@ class AssetList:
"retro drilled and filled",
]
) | self.standardised_asset_list[
- "non-intrusives: WFT Findings"
- ].str.lower().str.strip().str.contains(
+ "non-intrusives: WFT Findings"
+ ].str.lower().str.strip().str.contains(
"retro drilled"
)
else:
@@ -2176,19 +2190,14 @@ class AssetList:
)
self.standardised_asset_list["solar_epc_walls_insulated"] = (
- self.standardised_asset_list[
- self.EPC_API_DATA_NAMES[
- "walls-description"]]
- .str.lower()
- .str.contains("|".join(
- self.EPC_INSULATED_WALLS_SUBSTRINGS))
- ) | (
- self.standardised_asset_list[
- "walls_u_value"].apply(
- lambda x: x <= 0.7 if not pd.isnull(
- x) else False
- )
- )
+ self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]]
+ .str.lower()
+ .str.contains("|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS))
+ ) | (
+ self.standardised_asset_list["walls_u_value"].apply(
+ lambda x: x <= 0.7 if not pd.isnull(x) else False
+ )
+ )
roof_data = []
for desc in self.standardised_asset_list[
@@ -2230,20 +2239,20 @@ class AssetList:
self.standardised_asset_list[
"solar_epc_loft_needs_topup"
] = self.standardised_asset_list[
- self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
- ].apply(
+ self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
+ ].apply(
lambda x: int(x) < 200 if str(x).isdigit() else False
) | (
- (
- self.standardised_asset_list["is_loft"]
- | self.standardised_asset_list["is_pitched"]
- )
- & (
- self.standardised_asset_list[
- self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
- ].isin(["below average", "none"])
- )
+ (
+ self.standardised_asset_list["is_loft"]
+ | self.standardised_asset_list["is_pitched"]
)
+ & (
+ self.standardised_asset_list[
+ self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
+ ].isin(["below average", "none"])
+ )
+ )
self.standardised_asset_list["epc_has_floor_recommendation"] = (
self.standardised_asset_list["epc_has_floor_recommendation"].fillna(False)
@@ -2252,16 +2261,15 @@ class AssetList:
# Check if the boiler is electric
# We check if it contains both the terms boiler & electric
self.standardised_asset_list["has_electric_boiler"] = (
- self.standardised_asset_list[
- self.EPC_API_DATA_NAMES["mainheat-description"]
- ]
- .str.lower()
- .isin(["boiler and radiators, electric"])
- ) | (
- self.standardised_asset_list[
- self.STANDARD_HEATING_SYSTEM]
- == "electric boiler"
- )
+ self.standardised_asset_list[
+ self.EPC_API_DATA_NAMES["mainheat-description"]
+ ]
+ .str.lower()
+ .isin(["boiler and radiators, electric"])
+ ) | (
+ self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM]
+ == "electric boiler"
+ )
####################################
# Check solar eligibility
@@ -2399,11 +2407,11 @@ class AssetList:
empty_cavity_map = {
"non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE
- + ": ",
+ + ": ",
"non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property "
- "already has solar: ",
+ "already has solar: ",
"non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, "
- f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ",
+ f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ",
}
for variable, description in empty_cavity_map.items():
self.standardised_asset_list["cavity_reason"] = np.where(
@@ -2419,8 +2427,8 @@ class AssetList:
(
self.standardised_asset_list["epc_indicates_empty_cavity"]
& ~self.standardised_asset_list[
- "non_intrusive_indicates_empty_cavity"
- ]
+ "non_intrusive_indicates_empty_cavity"
+ ]
& (
self.standardised_asset_list["non-intrusives: WFT Findings"]
.str.lower()
@@ -2445,8 +2453,8 @@ class AssetList:
(
self.standardised_asset_list["epc_indicates_empty_cavity"]
& ~self.standardised_asset_list[
- "non_intrusive_indicates_empty_cavity"
- ]
+ "non_intrusive_indicates_empty_cavity"
+ ]
& self.standardised_asset_list[
"non_intrusive_indicates_cavity_extraction"
]
@@ -2461,8 +2469,8 @@ class AssetList:
(
self.standardised_asset_list["epc_indicates_empty_cavity"]
& ~self.standardised_asset_list[
- "non_intrusive_indicates_empty_cavity"
- ]
+ "non_intrusive_indicates_empty_cavity"
+ ]
& (
self.standardised_asset_list["non-intrusives: Insulated"]
== "RETRO DRILLED"
@@ -2478,8 +2486,8 @@ class AssetList:
(
self.standardised_asset_list["epc_indicates_empty_cavity"]
& ~self.standardised_asset_list[
- "non_intrusive_indicates_empty_cavity"
- ]
+ "non_intrusive_indicates_empty_cavity"
+ ]
& (
self.standardised_asset_list["non-intrusives: Insulated"]
== "FILLED AT BUILD"
@@ -2495,8 +2503,8 @@ class AssetList:
(
self.standardised_asset_list["epc_indicates_empty_cavity"]
& ~self.standardised_asset_list[
- "non_intrusive_indicates_empty_cavity"
- ]
+ "non_intrusive_indicates_empty_cavity"
+ ]
& pd.isnull(self.standardised_asset_list["cavity_reason"])
),
f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"],
@@ -2640,7 +2648,7 @@ class AssetList:
identified_work = self.standardised_asset_list[
~pd.isnull(self.standardised_asset_list["cavity_reason"])
| ~pd.isnull(self.standardised_asset_list["solar_reason"])
- ][self.DOMNA_PROPERTY_ID].values
+ ][self.DOMNA_PROPERTY_ID].values
if self.DOMNA_PROPERTY_ID in self.outcomes.columns:
self.outcomes_for_output = self.outcomes[
@@ -2675,12 +2683,12 @@ class AssetList:
blocks_of_flats = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
== "block of flats"
- ]
+ ]
non_blocks_of_flats = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
!= "block of flats"
- ]
+ ]
# Produce some aggregate figures
self.work_type_figures = {
@@ -2723,7 +2731,7 @@ class AssetList:
blocks = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
== "block of flats"
- ].copy()
+ ].copy()
if blocks.empty:
return
@@ -2860,7 +2868,7 @@ class AssetList:
self.standardised_asset_list = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
!= "block of flats"
- ]
+ ]
self.standardised_asset_list = pd.concat(
[self.standardised_asset_list, expanded_blocks], ignore_index=True
@@ -2940,7 +2948,7 @@ class AssetList:
# find any block refs with more than 50% emptires
viable_empty_blocks = self.block_analysis_df[
self.block_analysis_df["Percentage of Empties"] >= 0.50
- ]
+ ]
if not viable_empty_blocks.empty:
project_code_lookup = viable_empty_blocks[["Block Reference"]].copy()
@@ -3179,7 +3187,7 @@ class AssetList:
contact_details = pd.read_excel(local_filepath, sheet_name=sheet_name)[
[self.contact_detail_fields["landlord_property_id"]] + details_colnames
- ]
+ ]
contact_details = contact_details[
~pd.isnull(
contact_details[self.contact_detail_fields["landlord_property_id"]]
@@ -3572,13 +3580,10 @@ class AssetList:
"Non-Intrusives: Date Checked
": date_of_inspections,
"Non-Intrusives: Wall Type ": non_intrusives_construction,
"Non-intrusives: Insulation ": non_intrusives_insulated,
- "Non-intrusives: Insulation Material ":
- non_intrusives_insulation_material,
- "Non-Intrusives: CIGA Check Required ":
- non_intrusives_ciga_check_required,
+ "Non-intrusives: Insulation Material ": non_intrusives_insulation_material,
+ "Non-Intrusives: CIGA Check Required ": non_intrusives_ciga_check_required,
"Non-Intrusives: PV Access Issues ": non_intrusives_pv_access,
- "Non-Intrusives: Roof Orientation ":
- non_intrusives_roof_orientation,
+ "Non-Intrusives: Roof Orientation ": non_intrusives_roof_orientation,
"Non-Intrusives: Surveyor Notes ": non_intrusives_surveyor_notes,
"Non-Intrusives: Surveyor Name ": non_intrusives_surveyor_name,
"CIGA: Date Requested ": None, # TODO: Don't have this for the moment
@@ -3755,8 +3760,8 @@ class AssetList:
# We compare address line 1 to full address
if any(
df[self.STANDARD_FULL_ADDRESS]
- .str.lower()
- .str.contains(row["Address Line 1"].lower(), na=False)
+ .str.lower()
+ .str.contains(row["Address Line 1"].lower(), na=False)
):
df = df[
df[self.STANDARD_FULL_ADDRESS]
@@ -3996,7 +4001,7 @@ class AssetList:
matched = matched[
matched["houseno"].astype(str) == house_no_to_match
- ]
+ ]
if matched.shape[0] == 1:
lookup_i.append(
{
@@ -4021,7 +4026,7 @@ class AssetList:
)[0]
matched = matched[
matched[self.STANDARD_FULL_ADDRESS] == best_match
- ]
+ ]
lookup_i.append(
{
"row_id": x["row_id"],
@@ -4332,7 +4337,7 @@ class AssetList:
df = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID]
== row[master_id_colnames[idx]]
- ]
+ ]
if df.shape[0] == 1:
matched.append(
{
@@ -4438,7 +4443,7 @@ class AssetList:
)[1]
)
> 90
- ]
+ ]
if df.shape[0] == 0:
unmatched.append(row["row_id"])
@@ -4446,8 +4451,8 @@ class AssetList:
if any(
df[self.STANDARD_FULL_ADDRESS]
- .str.lower()
- .str.contains(
+ .str.lower()
+ .str.contains(
" ".join(
[row[house_no_col], row["Street / Block Name"]]
).lower()
@@ -4474,7 +4479,7 @@ class AssetList:
row[property_type_col].split(" ")[-1].lower()
)
& (df[self.STANDARD_PROPERTY_TYPE] != "block of flats")
- ]
+ ]
if df.shape[0] != 1:
# We have multiple matches - it's likely because the landlord has a duplicate
diff --git a/asset_list/app.py b/asset_list/app.py
index 49ec48a0..7413c7cb 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -21,6 +21,11 @@ EPC_AUTH_TOKEN = os.getenv(
OPENAI_API_KEY = os.getenv(
"OPENAI_API_KEY",
)
+print(
+ f"[debug] OPENAI_API_KEY loaded: "
+ f"{OPENAI_API_KEY[:8]}...{OPENAI_API_KEY[-4:] if OPENAI_API_KEY else 'NONE'} "
+ f"(len={len(OPENAI_API_KEY) if OPENAI_API_KEY else 0})"
+)
def extract_address1(
@@ -74,23 +79,23 @@ def app():
"""
data_folder = "/workspaces/model/asset_list"
- data_filename = "2026-04-22T08_22_00.779745_61049fd3.xlsx"
- sheet_name = "in"
- postcode_column = "postcode_clean"
- address1_column = "address2uprn_address"
+ data_filename = "input.xlsx"
+ sheet_name = "Handovers"
+ postcode_column = "POSTCODE"
+ address1_column = "Full Addres"
address1_method = None
- fulladdress_column = "address2uprn_address"
+ fulladdress_column = "Full Addres"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
- landlord_os_uprn = "address2uprn_uprn"
- landlord_property_type = "Property Type" # Good to include if landlord gave
- landlord_built_form = "Built Form" # Good to include if landlord gave
+ landlord_os_uprn = "domna_found_uprn"
+ landlord_property_type = "PROPERTY TYPE" # Good to include if landlord gave
+ landlord_built_form = "Type Description" # Good to include if landlord gave
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
- landlord_property_id = "UPRN"
+ landlord_property_id = "PROP REF"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@@ -131,6 +136,7 @@ def app():
landlord_sap=landlord_sap,
landlord_block_reference=landlord_block_reference,
phase=phase,
+ openai_api_key=OPENAI_API_KEY,
)
asset_list.init_standardise()
@@ -462,3 +468,9 @@ def app():
asset_list.duplicated_addresses.to_excel(
writer, sheet_name="Duplicate Properties", index=False
)
+
+
+
+
+for key,value in dict.items():
+ lsakjfldsa
\ No newline at end of file
diff --git a/backend/address2UPRN/scoring.py b/backend/address2UPRN/scoring.py
new file mode 100644
index 00000000..d31b9aea
--- /dev/null
+++ b/backend/address2UPRN/scoring.py
@@ -0,0 +1,57 @@
+import pandas as pd
+
+from backend.utils.addressMatch import AddressMatch
+
+
+def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
+ """
+ Returns True if all non-null UPRNs in df match the given uprn.
+ Returns False otherwise.
+ """
+
+ if column not in df.columns:
+ return False
+
+ uprns = df[column].dropna().astype(str).str.strip().unique()
+
+ if len(uprns) == 0:
+ return False
+
+ return len(uprns) == 1 and uprns[0] == str(uprn)
+
+
+def get_uprn_candidates(
+ df: pd.DataFrame,
+ user_address: str,
+ address_column: str = "address",
+ uprn_column: str = "uprn",
+) -> pd.DataFrame:
+ """
+ Annotate EPC results with lexicographical similarity scores and ranks.
+
+ Returns a DataFrame sorted by descending lexiscore.
+ DOES NOT choose or return a UPRN.
+ """
+
+ if address_column not in df.columns:
+ raise ValueError(f"Missing column: {address_column}")
+
+ if uprn_column not in df.columns:
+ raise ValueError(f"Missing column: {uprn_column}")
+
+ out = df.copy()
+
+ user_norm = AddressMatch.normalise_address(user_address)
+
+ out["lexiscore"] = out[address_column].apply(
+ lambda x: AddressMatch.levenshtein(user_norm, x)
+ )
+
+ out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
+
+ out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
+
+ return out.sort_values(
+ ["lexirank", "lexiscore"],
+ ascending=[True, False],
+ )
diff --git a/backend/addresses/Addresses.py b/backend/addresses/Addresses.py
index 4c046554..44083b86 100644
--- a/backend/addresses/Addresses.py
+++ b/backend/addresses/Addresses.py
@@ -110,6 +110,8 @@ class Addresses:
landlord_multi_glaze_proportion=float(row["landlord_multi_glaze_proportion"]) if row.get(
"landlord_multi_glaze_proportion") else None,
landlord_construction_age_band=row.get("landlord_construction_age_band"),
+ lmk_key=None,
+ epc_certificate_number=None,
)
@staticmethod
diff --git a/backend/app/config.py b/backend/app/config.py
index 44826d24..9d8b064e 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -46,6 +46,8 @@ class Settings(BaseSettings):
EPC_AUTH_TOKEN: str = "changeme"
OPEN_EPC_API_TOKEN: str = "changeme"
GOOGLE_SOLAR_API_KEY: str = "changeme"
+ MAGICPLAN_CUSTOMER_ID: str = "changeme"
+ MAGICPLAN_API_KEY: str = "changeme"
# Database settings
DB_HOST: str = "changeme"
@@ -78,6 +80,7 @@ class Settings(BaseSettings):
OSMOSIS_ACD_SHAREPOINT_ID: Optional[str] = None
PRIVATE_PAY_SHAREPOINT_ID: Optional[str] = None
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID: Optional[str] = None
+ OPENAI_API_KEY: Optional[str] = None
# Pas Hub
PASHUB_EMAIL: Optional[str] = None
diff --git a/backend/app/db/functions/magic_plan_functions.py b/backend/app/db/functions/magic_plan_functions.py
new file mode 100644
index 00000000..9400f36f
--- /dev/null
+++ b/backend/app/db/functions/magic_plan_functions.py
@@ -0,0 +1,141 @@
+from typing import Any, cast
+
+from sqlalchemy import delete, select
+from sqlalchemy.dialects.postgresql import insert as pg_insert
+from sqlmodel import Session, col
+
+from datatypes.magicplan.domain.models import Floor, Plan
+from backend.app.db.models.magic_plan import (
+ MagicPlanDoorModel,
+ MagicPlanFloorModel,
+ MagicPlanPlanModel,
+ MagicPlanRoomModel,
+ MagicPlanWindowModel,
+)
+
+
+def save_plan(session: Session, plan: Plan) -> None:
+ plan_id: int = _upsert_plan(session, plan)
+ _delete_children(session, plan_id)
+ floor_ids: list[int] = _insert_floors(session, plan.floors, plan_id)
+ room_ids: list[int] = _insert_rooms(session, plan.floors, floor_ids)
+ _insert_windows_and_doors(session, plan.floors, room_ids)
+
+
+def _upsert_plan(session: Session, plan: Plan) -> int:
+ stmt = (
+ pg_insert(MagicPlanPlanModel)
+ .values(
+ magic_plan_uid=plan.uid,
+ name=plan.name,
+ address=plan.address,
+ postcode=plan.postcode,
+ )
+ .on_conflict_do_update(
+ index_elements=["magic_plan_uid"],
+ set_={
+ "name": plan.name,
+ "address": plan.address,
+ "postcode": plan.postcode,
+ },
+ )
+ .returning(col(MagicPlanPlanModel.id))
+ )
+ row_id: int = session.execute(stmt).scalar_one()
+ return row_id
+
+
+def _delete_children(session: Session, plan_id: int) -> None:
+ floor_subq = (
+ select(col(MagicPlanFloorModel.id))
+ .where(col(MagicPlanFloorModel.magic_plan_plan_id) == plan_id)
+ .scalar_subquery()
+ )
+ room_subq = (
+ select(col(MagicPlanRoomModel.id))
+ .where(col(MagicPlanRoomModel.magic_plan_floor_id).in_(floor_subq))
+ .scalar_subquery()
+ )
+ session.execute(
+ delete(MagicPlanWindowModel).where(
+ col(MagicPlanWindowModel.magic_plan_room_id).in_(room_subq)
+ )
+ )
+ session.execute(
+ delete(MagicPlanDoorModel).where(
+ col(MagicPlanDoorModel.magic_plan_room_id).in_(room_subq)
+ )
+ )
+ session.execute(
+ delete(MagicPlanRoomModel).where(
+ col(MagicPlanRoomModel.magic_plan_floor_id).in_(floor_subq)
+ )
+ )
+ session.execute(
+ delete(MagicPlanFloorModel).where(
+ col(MagicPlanFloorModel.magic_plan_plan_id) == plan_id
+ )
+ )
+
+
+def _insert_floors(session: Session, floors: list[Floor], plan_id: int) -> list[int]:
+ rows: list[dict[str, Any]] = [
+ {"magic_plan_plan_id": plan_id, "level": floor.level} for floor in floors
+ ]
+ result = session.execute(
+ pg_insert(MagicPlanFloorModel)
+ .values(rows)
+ .returning(col(MagicPlanFloorModel.id))
+ )
+ return cast(list[int], list(result.scalars().all()))
+
+
+def _insert_rooms(
+ session: Session, floors: list[Floor], floor_ids: list[int]
+) -> list[int]:
+ rows: list[dict[str, Any]] = [
+ {
+ "magic_plan_floor_id": floor_id,
+ "name": room.name,
+ "width_m": room.width_m,
+ "length_m": room.length_m,
+ "area_m2": room.area_m2,
+ }
+ for floor, floor_id in zip(floors, floor_ids)
+ for room in floor.rooms
+ ]
+ result = session.execute(
+ pg_insert(MagicPlanRoomModel).values(rows).returning(col(MagicPlanRoomModel.id))
+ )
+ return cast(list[int], list(result.scalars().all()))
+
+
+def _insert_windows_and_doors(
+ session: Session, floors: list[Floor], room_ids: list[int]
+) -> None:
+ all_rooms = [room for floor in floors for room in floor.rooms]
+
+ window_rows: list[dict[str, Any]] = [
+ {
+ "magic_plan_room_id": room_id,
+ "width_m": window.width_m,
+ "height_m": window.height_m,
+ "area_m2": window.area_m2,
+ "opening_type": window.opening_type,
+ }
+ for room, room_id in zip(all_rooms, room_ids)
+ for window in room.windows
+ ]
+ door_rows: list[dict[str, Any]] = [
+ {
+ "magic_plan_room_id": room_id,
+ "width_mm": door.width_mm,
+ }
+ for room, room_id in zip(all_rooms, room_ids)
+ for door in room.doors
+ ]
+
+ if window_rows:
+ session.execute(pg_insert(MagicPlanWindowModel).values(window_rows))
+ if door_rows:
+ session.execute(pg_insert(MagicPlanDoorModel).values(door_rows))
diff --git a/backend/app/db/functions/tests/__init__.py b/backend/app/db/functions/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/app/db/functions/tests/conftest.py b/backend/app/db/functions/tests/conftest.py
new file mode 100644
index 00000000..3f97e92b
--- /dev/null
+++ b/backend/app/db/functions/tests/conftest.py
@@ -0,0 +1,41 @@
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from sqlmodel import SQLModel
+
+import backend.app.db.models.magic_plan # noqa: F401 — registers MagicPlan models with SQLModel.metadata
+
+# TODO: promote to backend/app/db/conftest.py once a second DB-touching test directory appears under this tree
+
+
+@pytest.fixture(scope="function")
+def engine(postgresql):
+ connection_string = (
+ f"postgresql+psycopg://"
+ f"{postgresql.info.user}:"
+ f"{postgresql.info.password}@"
+ f"{postgresql.info.host}:"
+ f"{postgresql.info.port}/"
+ f"{postgresql.info.dbname}"
+ )
+
+ engine = create_engine(connection_string)
+ SQLModel.metadata.create_all(engine)
+
+ yield engine
+
+ SQLModel.metadata.drop_all(engine)
+ engine.dispose()
+
+
+@pytest.fixture(scope="function")
+def db_session(engine):
+ connection = engine.connect()
+ transaction = connection.begin()
+ session = sessionmaker(bind=connection)()
+
+ yield session
+
+ session.close()
+ transaction.rollback()
+ connection.close()
diff --git a/backend/app/db/functions/tests/test_magic_plan_functions.py b/backend/app/db/functions/tests/test_magic_plan_functions.py
new file mode 100644
index 00000000..e58d0528
--- /dev/null
+++ b/backend/app/db/functions/tests/test_magic_plan_functions.py
@@ -0,0 +1,95 @@
+import json
+from pathlib import Path
+
+import pytest
+from sqlalchemy import func, select
+from sqlalchemy.orm import Session
+from sqlmodel import SQLModel
+
+from datatypes.magicplan.api.response import MagicPlanPlan
+from datatypes.magicplan.domain.mapper import map_plan
+from datatypes.magicplan.domain.models import Plan
+
+from backend.app.db.functions.magic_plan_functions import save_plan
+from backend.app.db.models.magic_plan import (
+ MagicPlanDoorModel,
+ MagicPlanFloorModel,
+ MagicPlanPlanModel,
+ MagicPlanRoomModel,
+ MagicPlanWindowModel,
+)
+
+FIXTURE_DIR = Path(__file__).parents[4] / "magic_plan"
+
+
+@pytest.fixture(scope="module")
+def domain_plan() -> Plan:
+ data = json.loads(
+ (FIXTURE_DIR / "magicplan_api_plan_response_example.json").read_text()
+ )
+ return map_plan(MagicPlanPlan.model_validate(data["data"]))
+
+
+def _count(session: Session, model: type[SQLModel]) -> int:
+ return session.execute(select(func.count()).select_from(model)).scalar_one()
+
+
+def test_plan_row_present_after_save(db_session: Session, domain_plan: Plan) -> None:
+ # Act
+ save_plan(db_session, domain_plan)
+ # Assert
+ assert _count(db_session, MagicPlanPlanModel) == 1
+
+
+def test_floor_count_matches_domain(db_session: Session, domain_plan: Plan) -> None:
+ # Arrange
+ expected = len(domain_plan.floors)
+ # Act
+ save_plan(db_session, domain_plan)
+ # Assert
+ assert _count(db_session, MagicPlanFloorModel) == expected
+
+
+def test_room_count_matches_domain(db_session: Session, domain_plan: Plan) -> None:
+ # Arrange
+ expected = sum(len(f.rooms) for f in domain_plan.floors)
+ # Act
+ save_plan(db_session, domain_plan)
+ # Assert
+ assert _count(db_session, MagicPlanRoomModel) == expected
+
+
+def test_window_count_matches_domain(db_session: Session, domain_plan: Plan) -> None:
+ # Arrange
+ expected = sum(len(r.windows) for f in domain_plan.floors for r in f.rooms)
+ # Act
+ save_plan(db_session, domain_plan)
+ # Assert
+ assert _count(db_session, MagicPlanWindowModel) == expected
+
+
+def test_door_count_matches_domain(db_session: Session, domain_plan: Plan) -> None:
+ # Arrange
+ expected = sum(len(r.doors) for f in domain_plan.floors for r in f.rooms)
+ # Act
+ save_plan(db_session, domain_plan)
+ # Assert
+ assert _count(db_session, MagicPlanDoorModel) == expected
+
+
+def test_save_plan_idempotent(db_session: Session, domain_plan: Plan) -> None:
+ # Act — call twice within the same session
+ save_plan(db_session, domain_plan)
+ save_plan(db_session, domain_plan)
+ # Assert — same row counts as a single call
+ assert _count(db_session, MagicPlanPlanModel) == 1
+ assert _count(db_session, MagicPlanFloorModel) == len(domain_plan.floors)
+ assert _count(db_session, MagicPlanRoomModel) == sum(
+ len(f.rooms) for f in domain_plan.floors
+ )
+ assert _count(db_session, MagicPlanWindowModel) == sum(
+ len(r.windows) for f in domain_plan.floors for r in f.rooms
+ )
+ assert _count(db_session, MagicPlanDoorModel) == sum(
+ len(r.doors) for f in domain_plan.floors for r in f.rooms
+ )
diff --git a/backend/app/db/models/epc_property.py b/backend/app/db/models/epc_property.py
index 0bbf2add..50523fbb 100644
--- a/backend/app/db/models/epc_property.py
+++ b/backend/app/db/models/epc_property.py
@@ -18,8 +18,9 @@ class EpcPropertyModel(SQLModel, table=True):
__tablename__ = "epc_property"
id: Optional[int] = Field(default=None, primary_key=True)
- property_id: int = Field(foreign_key="property.id", nullable=False)
- portfolio_id: int = Field(foreign_key="portfolio.id", nullable=False)
+ property_id: Optional[int] = Field(default=None)
+ portfolio_id: Optional[int] = Field(default=None)
+ uploaded_file_id: Optional[int] = Field(default=None)
# Identity / admin
uprn: Optional[int] = Field(default=None)
@@ -148,8 +149,8 @@ class EpcPropertyModel(SQLModel, table=True):
def from_epc_property_data(
cls,
data: EpcPropertyData,
- property_id: int,
- portfolio_id: int,
+ property_id: Optional[int] = None,
+ portfolio_id: Optional[int] = None,
) -> EpcPropertyModel:
es = data.sap_energy_source
h = data.sap_heating
@@ -593,7 +594,7 @@ class EpcWindowModel(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
epc_property_id: int = Field(foreign_key="epc_property.id", nullable=False)
- pvc_frame: str
+ frame_material: Optional[str] = Field(default=None)
glazing_gap: str
orientation: str
window_type: str
@@ -607,7 +608,7 @@ class EpcWindowModel(SQLModel, table=True):
frame_factor: Optional[float] = Field(default=None)
permanent_shutters_insulated: Optional[str] = Field(default=None)
transmission_u_value: Optional[float] = Field(default=None)
- transmission_data_source: Optional[int] = Field(default=None)
+ transmission_data_source: Optional[str] = Field(default=None)
transmission_solar_transmittance: Optional[float] = Field(default=None)
@classmethod
@@ -615,7 +616,7 @@ class EpcWindowModel(SQLModel, table=True):
td = window.window_transmission_details
return cls(
epc_property_id=epc_property_id,
- pvc_frame=str(window.pvc_frame),
+ frame_material=window.frame_material,
glazing_gap=str(window.glazing_gap),
orientation=str(window.orientation),
window_type=str(window.window_type),
diff --git a/backend/app/db/models/hubspot_deal_data.py b/backend/app/db/models/hubspot_deal_data.py
index fa508fbe..0ee58d54 100644
--- a/backend/app/db/models/hubspot_deal_data.py
+++ b/backend/app/db/models/hubspot_deal_data.py
@@ -67,6 +67,17 @@ class HubspotDealData(SQLModel, table=True):
surveyed_date: Optional[datetime] = Field(default=None)
design_type: Optional[str] = Field(default=None)
+ survey_type: Optional[str] = Field(default=None)
+ measures_for_pibi_ordered: Optional[str] = Field(default=None)
+ pibi_order_date: Optional[datetime] = Field(default=None)
+ pibi_completed_date: Optional[datetime] = Field(default=None)
+ property_halted_date: Optional[datetime] = Field(default=None)
+ property_halted_reason: Optional[str] = Field(default=None)
+ technical_approved_measures_for_install: Optional[str] = Field(default=None)
+ sent_to_installer_for_pricing: Optional[datetime] = Field(default=None)
+ domna_survey_required: Optional[bool] = Field(default=None)
+ domna_survey_date: Optional[datetime] = Field(default=None)
+
created_at: Optional[datetime] = Field(
sa_column=Column(
DateTime(timezone=True),
diff --git a/backend/app/db/models/hubspot_user.py b/backend/app/db/models/hubspot_user.py
new file mode 100644
index 00000000..424a0c17
--- /dev/null
+++ b/backend/app/db/models/hubspot_user.py
@@ -0,0 +1,13 @@
+from sqlmodel import SQLModel, Field
+from datetime import datetime
+from typing import Optional
+
+
+class HubspotUser(SQLModel, table=True):
+ __tablename__ = "hubspot_users"
+
+ hubspot_owner_id: str = Field(primary_key=True)
+ first_name: Optional[str] = Field(default=None)
+ last_name: Optional[str] = Field(default=None)
+ email: Optional[str] = Field(default=None)
+ updated_at: datetime
diff --git a/backend/app/db/models/magic_plan.py b/backend/app/db/models/magic_plan.py
new file mode 100644
index 00000000..38e9de18
--- /dev/null
+++ b/backend/app/db/models/magic_plan.py
@@ -0,0 +1,52 @@
+from typing import Optional
+
+from sqlmodel import Field, SQLModel
+
+
+class MagicPlanPlanModel(SQLModel, table=True):
+ __tablename__ = "magic_plan_plan"
+
+ id: Optional[int] = Field(default=None, primary_key=True)
+ magic_plan_uid: Optional[str] = Field(default=None, unique=True, index=True)
+ name: Optional[str] = None
+ address: Optional[str] = None
+ postcode: Optional[str] = None
+
+
+class MagicPlanFloorModel(SQLModel, table=True):
+ __tablename__ = "magic_plan_floor"
+
+ id: Optional[int] = Field(default=None, primary_key=True)
+ magic_plan_plan_id: int = Field(foreign_key="magic_plan_plan.id")
+ level: Optional[int] = None
+
+
+class MagicPlanRoomModel(SQLModel, table=True):
+ __tablename__ = "magic_plan_room"
+
+ id: Optional[int] = Field(default=None, primary_key=True)
+ magic_plan_floor_id: int = Field(foreign_key="magic_plan_floor.id")
+ name: Optional[str] = None
+ width_m: Optional[float] = None
+ length_m: Optional[float] = None
+ area_m2: Optional[float] = None
+
+
+class MagicPlanWindowModel(SQLModel, table=True):
+ __tablename__ = "magic_plan_window"
+
+ id: Optional[int] = Field(default=None, primary_key=True)
+ magic_plan_room_id: int = Field(foreign_key="magic_plan_room.id")
+ width_m: Optional[float] = None
+ height_m: Optional[float] = None
+ area_m2: Optional[float] = None
+ opening_type: Optional[str] = None
+
+
+class MagicPlanDoorModel(SQLModel, table=True):
+ __tablename__ = "magic_plan_door"
+
+ id: Optional[int] = Field(default=None, primary_key=True)
+ magic_plan_room_id: int = Field(foreign_key="magic_plan_room.id")
+ width_mm: Optional[float] = None
+ type: Optional[str] = None
diff --git a/backend/app/db/models/tests/test_magic_plan_models.py b/backend/app/db/models/tests/test_magic_plan_models.py
new file mode 100644
index 00000000..0830b184
--- /dev/null
+++ b/backend/app/db/models/tests/test_magic_plan_models.py
@@ -0,0 +1,134 @@
+from backend.app.db.models.magic_plan import (
+ MagicPlanDoorModel,
+ MagicPlanFloorModel,
+ MagicPlanPlanModel,
+ MagicPlanRoomModel,
+ MagicPlanWindowModel,
+)
+
+# --- MagicPlanPlan ---
+
+
+def test_plan_table_name() -> None:
+ assert MagicPlanPlanModel.__tablename__ == "magic_plan_plan"
+
+
+def test_plan_has_magic_plan_uid_column() -> None:
+ assert "magic_plan_uid" in MagicPlanPlanModel.__table__.columns
+
+
+def test_plan_magic_plan_uid_is_unique() -> None:
+ col = MagicPlanPlanModel.__table__.columns["magic_plan_uid"]
+ assert (
+ any(
+ c.unique
+ for c in MagicPlanPlanModel.__table__.constraints
+ if hasattr(c, "columns")
+ and "magic_plan_uid" in [cc.name for cc in c.columns]
+ )
+ or col.unique
+ )
+
+
+def test_plan_instantiation() -> None:
+ plan = MagicPlanPlanModel(
+ magic_plan_uid="uid-123", name="Test", address="1 High St", postcode="SW1A 1AA"
+ )
+ assert plan.magic_plan_uid == "uid-123"
+ assert plan.name == "Test"
+ assert plan.postcode == "SW1A 1AA"
+
+
+# --- MagicPlanFloor ---
+
+
+def test_floor_table_name() -> None:
+ assert MagicPlanFloorModel.__tablename__ == "magic_plan_floor"
+
+
+def test_floor_fk_column_name() -> None:
+ assert "magic_plan_plan_id" in MagicPlanFloorModel.__table__.columns
+
+
+def test_floor_has_level() -> None:
+ floor = MagicPlanFloorModel(magic_plan_plan_id=1, level=0)
+ assert floor.level == 0
+
+
+# --- MagicPlanRoom ---
+
+
+def test_room_table_name() -> None:
+ assert MagicPlanRoomModel.__tablename__ == "magic_plan_room"
+
+
+def test_room_fk_column_name() -> None:
+ assert "magic_plan_floor_id" in MagicPlanRoomModel.__table__.columns
+
+
+def test_room_has_measurement_columns() -> None:
+ cols = MagicPlanRoomModel.__table__.columns
+ assert "width_m" in cols
+ assert "length_m" in cols
+ assert "area_m2" in cols
+
+
+def test_room_instantiation() -> None:
+ room = MagicPlanRoomModel(
+ magic_plan_floor_id=1, name="Kitchen", width_m=2.67, length_m=2.98, area_m2=7.95
+ )
+ assert room.name == "Kitchen"
+ assert room.width_m == 2.67
+
+
+# --- MagicPlanWindow ---
+
+
+def test_window_table_name() -> None:
+ assert MagicPlanWindowModel.__tablename__ == "magic_plan_window"
+
+
+def test_window_fk_column_name() -> None:
+ assert "magic_plan_room_id" in MagicPlanWindowModel.__table__.columns
+
+
+def test_window_has_measurement_columns() -> None:
+ cols = MagicPlanWindowModel.__table__.columns
+ assert "width_m" in cols
+ assert "height_m" in cols
+ assert "area_m2" in cols
+ assert "opening_type" in cols
+
+
+def test_window_instantiation() -> None:
+ window = MagicPlanWindowModel(
+ magic_plan_room_id=1,
+ width_m=1.4,
+ height_m=1.2,
+ area_m2=1.68,
+ opening_type="casement",
+ )
+ assert window.opening_type == "casement"
+
+
+# --- MagicPlanDoor ---
+
+
+def test_door_table_name() -> None:
+ assert MagicPlanDoorModel.__tablename__ == "magic_plan_door"
+
+
+def test_door_fk_column_name() -> None:
+ assert "magic_plan_room_id" in MagicPlanDoorModel.__table__.columns
+
+
+def test_door_has_width_mm_and_type() -> None:
+ cols = MagicPlanDoorModel.__table__.columns
+ assert "width_mm" in cols
+ assert "type" in cols
+
+
+def test_door_instantiation() -> None:
+ door = MagicPlanDoorModel(magic_plan_room_id=1, width_mm=0.79, type="hinged")
+ assert door.width_mm == 0.79
+ assert door.type == "hinged"
diff --git a/backend/app/local/router.py b/backend/app/local/router.py
index 0977be04..ea04dc49 100644
--- a/backend/app/local/router.py
+++ b/backend/app/local/router.py
@@ -2,8 +2,8 @@ from fastapi import APIRouter, HTTPException, status
from jose import jwt, jwe
import json
import datetime
-from app.config import get_settings
-from app.dependencies import get_derived_encryption_key
+from backend.app.config import get_settings
+from backend.app.dependencies import get_derived_encryption_key
router = APIRouter(
prefix="/local",
@@ -27,7 +27,12 @@ def create_dummy_token(secret: str) -> str:
"dbId": "known_id",
}
- token = jwe.encrypt(json.dumps(claims), get_derived_encryption_key(secret), algorithm="dir", encryption="A256GCM")
+ token = jwe.encrypt(
+ json.dumps(claims),
+ get_derived_encryption_key(secret),
+ algorithm="dir",
+ encryption="A256GCM",
+ )
return token
@@ -40,6 +45,8 @@ async def dummy_token():
async def dummy_token():
settings = get_settings()
if settings.ENVIRONMENT != "local":
- raise HTTPException(status_code=status.HTTP_403_FORBIDDEN,
- detail="Dummy token can only be generated in local environment")
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="Dummy token can only be generated in local environment",
+ )
return {"dummy_token": create_dummy_token(settings.SECRET_KEY)}
diff --git a/backend/app/main.py b/backend/app/main.py
index c9733c18..55dfef7d 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -30,10 +30,7 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
logger.error(f"Validation Errors: {exc.errors()}")
return JSONResponse(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
- content=jsonable_encoder({
- "detail": exc.errors(),
- "body": exc.body
- }),
+ content=jsonable_encoder({"detail": exc.errors(), "body": exc.body}),
)
@@ -63,7 +60,8 @@ app.include_router(tasks_router.router, prefix="/v1")
app.include_router(bulk_uploads_router.router, prefix="/v1")
if get_settings().ENVIRONMENT == "local":
- from app.local import router as local_router
+ from backend.app.local import router as local_router
+
app.include_router(local_router.router)
handler = Mangum(app)
@@ -98,10 +96,7 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
logger.error(f"Validation Errors: {exc.errors()}")
return JSONResponse(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
- content=jsonable_encoder({
- "detail": exc.errors(),
- "body": exc.body
- }),
+ content=jsonable_encoder({"detail": exc.errors(), "body": exc.body}),
)
@@ -130,7 +125,8 @@ app.include_router(whlg_router.router, prefix="/v1")
app.include_router(bulk_uploads_router.router, prefix="/v1")
if get_settings().ENVIRONMENT == "local":
- from app.local import router as local_router
+ from backend.app.local import router as local_router
+
app.include_router(local_router.router)
handler = Mangum(app)
diff --git a/backend/documents_parser/db_writer.py b/backend/documents_parser/db_writer.py
new file mode 100644
index 00000000..2039aabe
--- /dev/null
+++ b/backend/documents_parser/db_writer.py
@@ -0,0 +1,76 @@
+from typing import Optional
+
+from sqlmodel import Session
+
+from backend.app.db.models.epc_property import (
+ EpcBuildingPartModel,
+ EpcEnergyElementModel,
+ EpcFlatDetailsModel,
+ EpcFloorDimensionModel,
+ EpcMainHeatingDetailModel,
+ EpcPropertyEnergyPerformanceModel,
+ EpcPropertyModel,
+ EpcWindowModel,
+)
+from datatypes.epc.domain.epc_property_data import EpcPropertyData
+
+
+def save_epc_property_data(
+ session: Session,
+ data: EpcPropertyData,
+ uploaded_file_id: Optional[int] = None,
+ property_id: Optional[int] = None,
+ portfolio_id: Optional[int] = None,
+) -> EpcPropertyModel:
+ epc_prop = EpcPropertyModel.from_epc_property_data(
+ data, property_id=property_id, portfolio_id=portfolio_id
+ )
+ epc_prop.uploaded_file_id = uploaded_file_id
+ session.add(epc_prop)
+ session.flush()
+ assert epc_prop.id is not None
+ epc_property_id: int = epc_prop.id
+
+ session.add(
+ EpcPropertyEnergyPerformanceModel.from_epc_property_data(
+ data, epc_property_id=epc_property_id
+ )
+ )
+
+ for detail in data.sap_heating.main_heating_details:
+ session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id))
+
+ for part in data.sap_building_parts:
+ bp = EpcBuildingPartModel.from_domain(part, epc_property_id)
+ session.add(bp)
+ session.flush()
+ assert bp.id is not None
+ for dim in part.sap_floor_dimensions:
+ session.add(EpcFloorDimensionModel.from_domain(dim, bp.id))
+
+ for window in data.sap_windows:
+ session.add(EpcWindowModel.from_domain(window, epc_property_id))
+
+ for el in data.roofs:
+ session.add(EpcEnergyElementModel.from_domain(el, "roof", epc_property_id))
+ for el in data.walls:
+ session.add(EpcEnergyElementModel.from_domain(el, "wall", epc_property_id))
+ for el in data.floors:
+ session.add(EpcEnergyElementModel.from_domain(el, "floor", epc_property_id))
+ for el in data.main_heating:
+ session.add(EpcEnergyElementModel.from_domain(el, "main_heating", epc_property_id))
+
+ for el, etype in [
+ (data.window, "window"),
+ (data.lighting, "lighting"),
+ (data.hot_water, "hot_water"),
+ (data.secondary_heating, "secondary_heating"),
+ (data.main_heating_controls, "main_heating_controls"),
+ ]:
+ if el is not None:
+ session.add(EpcEnergyElementModel.from_domain(el, etype, epc_property_id))
+
+ if data.sap_flat_details is not None:
+ session.add(EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id))
+
+ return epc_prop
diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py
new file mode 100644
index 00000000..e78d98de
--- /dev/null
+++ b/backend/documents_parser/elmhurst_extractor.py
@@ -0,0 +1,451 @@
+import re
+from datetime import date, datetime
+from typing import List, Optional
+
+from datatypes.epc.surveys.elmhurst_site_notes import (
+ BathsAndShowers,
+ BuildingPartDimensions,
+ ElmhurstSiteNotes,
+ FloorDetails,
+ FloorDimension,
+ Lighting,
+ MainHeating,
+ Meters,
+ PropertyDetails,
+ Renewables,
+ RoofDetails,
+ Shower,
+ SurveyorInfo,
+ VentilationAndCooling,
+ WallDetails,
+ WaterHeating,
+ Window,
+)
+
+
+class ElmhurstSiteNotesExtractor:
+ def __init__(self, pages: List[str]) -> None:
+ self._text = "\n".join(pages)
+ self._lines = [l.strip() for l in self._text.splitlines() if l.strip()]
+
+ # --- generic helpers ---
+
+ def _next_val(self, label: str) -> Optional[str]:
+ lc = label.rstrip(":") + ":"
+ lb = label.rstrip(":")
+ for i, line in enumerate(self._lines):
+ if line.startswith(lc) and len(line) > len(lc):
+ return line[len(lc):].strip() or None
+ if line == lc or line == lb:
+ for j in range(i + 1, min(i + 4, len(self._lines))):
+ v = self._lines[j]
+ if v.endswith(":") or v.startswith("©"):
+ return None
+ if v:
+ return v
+ return None
+ return None
+
+ def _str_val(self, label: str) -> str:
+ v = self._next_val(label)
+ return " ".join(v.split()) if v else ""
+
+ def _opt_str(self, label: str) -> Optional[str]:
+ v = self._next_val(label)
+ return " ".join(v.split()) if v else None
+
+ def _bool_val(self, label: str) -> bool:
+ v = self._next_val(label)
+ return v is not None and v.lower() == "yes"
+
+ def _int_val(self, label: str) -> int:
+ v = self._next_val(label)
+ try:
+ return int(v.split()[0]) if v else 0
+ except (ValueError, IndexError):
+ return 0
+
+ def _date_val(self, label: str) -> date:
+ v = self._next_val(label)
+ if not v:
+ raise ValueError(f"Missing date for label: {label}")
+ return datetime.strptime(v.strip(), "%d/%m/%Y").date()
+
+ def _between(self, start: str, end: str) -> str:
+ try:
+ s = self._text.index(start) + len(start)
+ e = self._text.index(end, s)
+ return self._text[s:e]
+ except ValueError:
+ return ""
+
+ def _section_lines(self, start: str, end: str) -> List[str]:
+ text = self._between(start, end)
+ return [l.strip() for l in text.splitlines() if l.strip()]
+
+ def _local_val(self, lines: List[str], label: str) -> Optional[str]:
+ lb = label.rstrip(":")
+ lc = lb + ":"
+ for i, line in enumerate(lines):
+ if line.startswith(lc) and len(line) > len(lc):
+ return line[len(lc):].strip() or None
+ if line == lc or line == lb:
+ for j in range(i + 1, min(i + 4, len(lines))):
+ v = lines[j]
+ if v.endswith(":") or v.startswith("©"):
+ return None
+ if v:
+ return v
+ return None
+ return None
+
+ def _local_str(self, lines: List[str], label: str) -> str:
+ v = self._local_val(lines, label)
+ return " ".join(v.split()) if v else ""
+
+ def _local_bool(self, lines: List[str], label: str) -> bool:
+ v = self._local_val(lines, label)
+ return v is not None and v.lower() == "yes"
+
+ # --- section extractors ---
+
+ def _extract_surveyor_info(self) -> SurveyorInfo:
+ return SurveyorInfo(
+ surveyor_code=self._str_val("Surveyor"),
+ name=self._str_val("Name"),
+ title=self._str_val("Title"),
+ tel_number=self._str_val("Tel Number"),
+ survey_reference=self._str_val("Survey Reference"),
+ my_reference=self._opt_str("My Reference"),
+ )
+
+ def _extract_property_details(self) -> PropertyDetails:
+ epc_m = re.search(
+ r"Check for the existence of\nan EPC:\n(Yes|No)", self._text
+ )
+ epc_exists = epc_m.group(1).lower() == "yes" if epc_m else False
+
+ return PropertyDetails(
+ rdsap_version=self._str_val("RdSAP version"),
+ reference_number=self._str_val("Reference Number"),
+ lodgement_required=self._bool_val("Lodgement Required"),
+ regs_region=self._str_val("Regs Region"),
+ epc_language=self._str_val("EPC Language"),
+ postcode=self._str_val("Postcode"),
+ region=self._str_val("Region"),
+ street=self._str_val("Street"),
+ town=self._str_val("Town"),
+ tenure=self._str_val("Property Tenure"),
+ transaction_type=self._str_val("Transaction Type"),
+ inspection_date=self._date_val("Inspection Date"),
+ process_date=self._date_val("Process date"),
+ epc_exists=epc_exists,
+ uprn=self._opt_str("UPRN"),
+ house_name=self._opt_str("House Name"),
+ house_number=self._opt_str("House No"),
+ locality=self._opt_str("Locality"),
+ county=self._opt_str("County"),
+ )
+
+ def _extract_attachment(self) -> str:
+ m = re.search(r"1\.0 Property type:\n[^\n]+\n([^\n]+)", self._text)
+ return " ".join(m.group(1).strip().split()) if m else ""
+
+ def _extract_dimensions(self) -> BuildingPartDimensions:
+ dim_type = self._str_val("Dimension type")
+ section = self._between("4.0 Dimensions:", "5.0 Conservatory:")
+ floor_matches = re.findall(
+ r"([A-Za-z ]+Floor):\n([\d.]+)\n([\d.]+)\n([\d.]+)\n([\d.]+)",
+ section,
+ )
+ floors = [
+ FloorDimension(
+ name=name.strip(),
+ area_m2=float(area),
+ room_height_m=float(height),
+ heat_loss_perimeter_m=float(hlp),
+ party_wall_length_m=float(pwl),
+ )
+ for name, area, height, hlp, pwl in floor_matches
+ ]
+ return BuildingPartDimensions(dimension_type=dim_type, floors=floors)
+
+ def _extract_walls(self) -> WallDetails:
+ lines = self._section_lines("7.0 Walls:", "8.0 Roofs:")
+ thickness_raw = self._local_val(lines, "Wall Thickness")
+ thickness_mm = (
+ int(thickness_raw.split()[0]) if thickness_raw else None
+ )
+ return WallDetails(
+ wall_type=self._local_str(lines, "Type"),
+ insulation=self._local_str(lines, "Insulation"),
+ thickness_unknown=self._local_bool(lines, "Wall Thickness Unknown"),
+ u_value_known=self._local_bool(lines, "U-value Known"),
+ party_wall_type=self._local_str(lines, "Party Wall Type"),
+ thickness_mm=thickness_mm,
+ )
+
+ def _extract_roof(self) -> RoofDetails:
+ lines = self._section_lines("8.0 Roofs:", "8.1 Rooms in Roof:")
+ thickness_raw = self._local_val(lines, "Insulation Thickness")
+ thickness_mm = (
+ int(thickness_raw.split()[0]) if thickness_raw else None
+ )
+ return RoofDetails(
+ roof_type=self._local_str(lines, "Type"),
+ insulation=self._local_str(lines, "Insulation"),
+ u_value_known=self._local_bool(lines, "U-value Known"),
+ insulation_thickness_mm=thickness_mm,
+ )
+
+ def _extract_floor(self) -> FloorDetails:
+ lines = self._section_lines("9.0 Floors:", "10.0 Doors:")
+ u_val_raw = self._local_val(lines, "Default U-value")
+ default_u = float(u_val_raw) if u_val_raw else None
+ return FloorDetails(
+ location=self._local_str(lines, "Location"),
+ floor_type=self._local_str(lines, "Type"),
+ insulation=self._local_str(lines, "Insulation"),
+ u_value_known=self._local_bool(lines, "U-value Known"),
+ default_u_value=default_u,
+ )
+
+ def _extract_windows(self) -> List[Window]:
+ m = re.search(
+ r"Permanent\s+Shutters\n(.*?)Draught Proofing",
+ self._text,
+ re.DOTALL,
+ )
+ if not m:
+ return []
+ tokens = [t.strip() for t in m.group(1).splitlines() if t.strip()]
+ windows: List[Window] = []
+ i = 0
+ while i + 12 < len(tokens):
+ try:
+ width_m = float(tokens[i])
+ height_m = float(tokens[i + 1])
+ area_m2 = float(tokens[i + 2])
+ except (ValueError, IndexError):
+ i += 1
+ continue
+ i += 3
+ # Collect glazing type tokens until frame_factor (0 < v ≤ 1.0)
+ glazing_parts: List[str] = []
+ while i < len(tokens):
+ try:
+ v = float(tokens[i])
+ if 0.0 < v <= 1.0:
+ break
+ glazing_parts.append(tokens[i])
+ except ValueError:
+ glazing_parts.append(tokens[i])
+ i += 1
+ # If last glazing token is a single word (no spaces, not numeric) it's the frame_type
+ frame_type: Optional[str] = None
+ if glazing_parts and " " not in glazing_parts[-1] and not glazing_parts[-1].replace(".", "").isdigit():
+ frame_type = glazing_parts.pop()
+ glazing_type = " ".join(glazing_parts).strip()
+ if i >= len(tokens):
+ break
+ frame_factor = float(tokens[i]); i += 1
+ # Consume glazing_gap if present ("mm" token, possibly multi-token e.g. "16 mm or more")
+ glazing_gap: Optional[str] = None
+ if i < len(tokens) and "mm" in tokens[i]:
+ gap_parts = [tokens[i]]; i += 1
+ while i < len(tokens) and tokens[i].lower() in {"or", "more"}:
+ gap_parts.append(tokens[i]); i += 1
+ glazing_gap = " ".join(gap_parts)
+ building_part = tokens[i]; i += 1
+ location = tokens[i]; i += 1
+ orientation = tokens[i]; i += 1
+ data_source = tokens[i]; i += 1
+ u_value = float(tokens[i]); i += 1
+ g_value = float(tokens[i]); i += 1
+ draught_proofed = tokens[i].lower() == "yes"; i += 1
+ permanent_shutters = tokens[i]; i += 1
+ windows.append(
+ Window(
+ width_m=width_m,
+ height_m=height_m,
+ area_m2=area_m2,
+ glazing_type=glazing_type,
+ frame_factor=frame_factor,
+ building_part=building_part,
+ location=location,
+ orientation=orientation,
+ data_source=data_source,
+ u_value=u_value,
+ g_value=g_value,
+ draught_proofed=draught_proofed,
+ permanent_shutters=permanent_shutters,
+ frame_type=frame_type,
+ glazing_gap=glazing_gap,
+ )
+ )
+ return windows
+
+ def _extract_ventilation(self) -> VentilationAndCooling:
+ return VentilationAndCooling(
+ open_chimneys_count=self._int_val("No. of open chimneys"),
+ open_flues_count=self._int_val("No. of open flues"),
+ open_chimneys_closed_fire_count=self._int_val(
+ "No. of open chimneys/open flues attached to closed fire"
+ ),
+ solid_fuel_boiler_flues_count=self._int_val(
+ "No. of flues attached to solid fuel boiler"
+ ),
+ other_heater_flues_count=self._int_val(
+ "No. of open flues attached to other heater"
+ ),
+ blocked_chimneys_count=self._int_val("No. of blocked chimneys"),
+ extract_fans_count=self._int_val("No. of intermittent extract fans"),
+ passive_vents_count=self._int_val("No. of passive vents"),
+ flueless_gas_fires_count=self._int_val("No. of flueless gas fires"),
+ fixed_space_cooling=self._bool_val("Fixed Space Cooling"),
+ draught_lobby=self._str_val("Draught Lobby"),
+ mechanical_ventilation=self._bool_val("Mechanical Ventilation"),
+ pressure_test_method=self._str_val("Test Method"),
+ )
+
+ def _extract_lighting(self) -> Lighting:
+ led_cfl_count_known = self._bool_val("Number of LED and CFL Known")
+ return Lighting(
+ total_bulbs=self._int_val("Total number of bulbs"),
+ led_cfl_count_known=led_cfl_count_known,
+ led_count=self._int_val("Number of LED lights"),
+ cfl_count=self._int_val("Number of CFL lights"),
+ incandescent_count=self._int_val("Total number of incandescents"),
+ low_energy_count=(
+ 0 if led_cfl_count_known
+ else self._int_val("Total number of Low Energy")
+ ),
+ )
+
+ def _extract_main_heating(self) -> MainHeating:
+ lines = self._section_lines("14.0 Main Heating1", "14.1 Main Heating2")
+ pct_raw = self._local_val(lines, "Percentage of Heat")
+ pct = int(pct_raw.split()[0]) if pct_raw else 0
+ return MainHeating(
+ heat_emitter=self._local_str(lines, "Heat Emitter"),
+ fuel_type=self._local_str(lines, "Fuel Type"),
+ flue_type=self._local_str(lines, "Flue Type"),
+ fan_assisted_flue=self._local_bool(lines, "Fan Assisted Flue"),
+ design_flow_temperature=self._local_str(lines, "Design flow temperature"),
+ heating_controls_ees=self._local_str(lines, "Main Heating Controls EES"),
+ heating_controls_sap=self._local_str(lines, "Main Heating Controls Sap"),
+ percentage_of_heat=pct,
+ pcdf_boiler_reference=self._local_val(lines, "PCDF boiler Reference"),
+ heat_pump_age=self._local_val(lines, "Heat pump age"),
+ )
+
+ def _extract_meters(self) -> Meters:
+ return Meters(
+ electricity_meter_type=self._str_val("Electricity meter type"),
+ main_gas=self._bool_val("Main gas"),
+ electricity_smart_meter=self._bool_val("Electricity Smart Meter Present"),
+ gas_smart_meter=self._bool_val("Gas Smart Meter Present"),
+ )
+
+ def _extract_water_heating(self) -> WaterHeating:
+ return WaterHeating(
+ water_heating_code=self._str_val("Water Heating Code"),
+ water_heating_sap_code=self._int_val("Water Heating SapCode"),
+ water_heating_fuel_type=self._str_val("Water Heating Fuel Type"),
+ hot_water_cylinder_present=self._bool_val("Hot Water Cylinder Present"),
+ )
+
+ def _extract_baths_and_showers(self) -> BathsAndShowers:
+ n_baths = self._int_val("Total Number of Baths")
+ n_connected = self._int_val("Number of Baths Connected")
+ try:
+ idx = self._lines.index("Connected")
+ except ValueError:
+ return BathsAndShowers(
+ number_of_baths=n_baths,
+ number_of_baths_connected=n_connected,
+ showers=[],
+ )
+ showers: List[Shower] = []
+ j = idx + 1
+ while j + 2 <= len(self._lines) - 1:
+ num_line = self._lines[j]
+ if not num_line.isdigit():
+ break
+ showers.append(
+ Shower(
+ shower_number=int(num_line),
+ outlet_type=self._lines[j + 1],
+ connected=self._lines[j + 2],
+ )
+ )
+ j += 3
+ return BathsAndShowers(
+ number_of_baths=n_baths,
+ number_of_baths_connected=n_connected,
+ showers=showers,
+ )
+
+ def _rating_val(self, label: str) -> int:
+ v = self._next_val(label)
+ try:
+ return int(v.split()[-1]) if v else 0
+ except (ValueError, IndexError):
+ return 0
+
+ def _extract_renewables(self) -> Renewables:
+ fghrs_lines = self._section_lines(
+ "18.0 Flue Gas Heat Recovery System", "19.0 Photovoltaic Panel"
+ )
+ fghrs = self._local_bool(fghrs_lines, "Present")
+
+ terrain = self._str_val("Terrain Type")
+ hydro_raw = self._next_val("Electricity generated [kWh/year]")
+ hydro = float(hydro_raw) if hydro_raw else 0.0
+
+ return Renewables(
+ solar_water_heating=self._bool_val("Solar Water Heating"),
+ wwhrs_present=self._bool_val("Is WWHRS present in the property?"),
+ flue_gas_heat_recovery_present=fghrs,
+ photovoltaic_panel=self._str_val("Photovoltaic Panel"),
+ export_capable_meter=self._bool_val("Export capable meter"),
+ wind_turbine_present=self._bool_val("Wind turbine present?"),
+ wind_turbines_terrain_type=terrain,
+ hydro_electricity_generated_kwh=hydro,
+ )
+
+ def extract(self) -> ElmhurstSiteNotes:
+ emissions_raw = self._next_val("Emissions (t/year)")
+ co2 = float(emissions_raw.split()[0]) if emissions_raw else 0.0
+
+ return ElmhurstSiteNotes(
+ surveyor_info=self._extract_surveyor_info(),
+ property_details=self._extract_property_details(),
+ current_sap_rating=self._rating_val("Current SAP rating"),
+ potential_sap_rating=self._rating_val("Potential SAP rating"),
+ current_ei_rating=self._rating_val("Current EI rating"),
+ potential_ei_rating=self._rating_val("Potential EI rating"),
+ co2_emissions_current_t=co2,
+ property_type=self._str_val("1.0 Property type"),
+ attachment=self._extract_attachment(),
+ number_of_storeys=self._int_val("Storeys"),
+ habitable_rooms=self._int_val("Habitable Rooms"),
+ heated_habitable_rooms=self._int_val("Heated Habitable Rooms"),
+ construction_age_band=self._str_val("Main Property"),
+ dimensions=self._extract_dimensions(),
+ has_conservatory=self._bool_val("Is there a conservatory?"),
+ walls=self._extract_walls(),
+ roof=self._extract_roof(),
+ floor=self._extract_floor(),
+ door_count=self._int_val("Total Number of Doors"),
+ insulated_door_count=self._int_val("Number of Insulated Doors"),
+ windows=self._extract_windows(),
+ draught_proofing_percent=self._int_val("Draught Proofing"),
+ ventilation=self._extract_ventilation(),
+ lighting=self._extract_lighting(),
+ main_heating=self._extract_main_heating(),
+ meters=self._extract_meters(),
+ water_heating=self._extract_water_heating(),
+ baths_and_showers=self._extract_baths_and_showers(),
+ renewables=self._extract_renewables(),
+ )
diff --git a/backend/documents_parser/extractor.py b/backend/documents_parser/extractor.py
index 822f7907..47022c55 100644
--- a/backend/documents_parser/extractor.py
+++ b/backend/documents_parser/extractor.py
@@ -66,9 +66,11 @@ class PasHubRdSapSiteNotesExtractor:
val = self._get_in(list_to_process, key)
return val is not None and val.lower() != "not known"
- def _wall_thickness_in(self, list_to_process: List[str]) -> int:
+ def _wall_thickness_in(self, list_to_process: List[str]) -> Optional[int]:
val = self._get_in(list_to_process, "Wall thickness:")
- return int(val.split()[0]) if val else 0
+ if not val or val.split()[0].lower() == "unmeasurable":
+ return None
+ return int(val.split()[0])
def _section(self, start: str, end: str) -> List[str]:
try:
@@ -83,10 +85,17 @@ class PasHubRdSapSiteNotesExtractor:
def extract_inspection_metadata(self) -> InspectionMetadata:
try:
addr_start = self.text_list.index("Property Address:") + 1
- addr_end = self.text_list.index("Property Photo", addr_start)
- property_address = ", ".join(
- t.rstrip(",") for t in self.text_list[addr_start:addr_end]
- )
+ try:
+ addr_end = self.text_list.index("Property Photo", addr_start)
+ address_tokens = self.text_list[addr_start:addr_end]
+ except ValueError:
+ addr_end = self.text_list.index("RdSAP Assessment", addr_start)
+ address_tokens = []
+ for t in self.text_list[addr_start:addr_end]:
+ if not t or t.startswith("Page "):
+ break
+ address_tokens.append(t)
+ property_address = ", ".join(t.rstrip(",") for t in address_tokens)
except ValueError:
property_address = ""
diff --git a/backend/documents_parser/local_runner.py b/backend/documents_parser/local_runner.py
new file mode 100644
index 00000000..89dc7cdb
--- /dev/null
+++ b/backend/documents_parser/local_runner.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Parse a local site-notes PDF and load the result into the database.
+
+Usage:
+ python local_runner.py
+"""
+from typing import List, Optional, Tuple
+
+from backend.app.db.connection import db_session
+from backend.app.db.models.epc_property import (
+ EpcBuildingPartModel,
+ EpcEnergyElementModel,
+ EpcFlatDetailsModel,
+ EpcFloorDimensionModel,
+ EpcMainHeatingDetailModel,
+ EpcPropertyEnergyPerformanceModel,
+ EpcPropertyModel,
+ EpcWindowModel,
+)
+from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
+from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
+from backend.documents_parser.pdf import pdf_to_pages, pdf_to_text_list
+from datatypes.epc.domain.epc_property_data import EnergyElement, EpcPropertyData
+from datatypes.epc.domain.mapper import EpcPropertyDataMapper
+
+
+def _parse_pdf(pdf_path: str) -> EpcPropertyData:
+ with open(pdf_path, "rb") as f:
+ pdf_bytes: bytes = f.read()
+
+ pages: List[str] = pdf_to_pages(pdf_bytes)
+ full_text: str = "\n".join(pages)
+
+ if "Elmhurst Energy Systems" in full_text:
+ site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+ return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+ tokens: List[str] = pdf_to_text_list(pdf_bytes)
+ pashub_notes = PasHubRdSapSiteNotesExtractor(tokens).extract()
+ return EpcPropertyDataMapper.from_site_notes(pashub_notes)
+
+
+def _insert_energy_elements(
+ session,
+ elements: List[EnergyElement],
+ element_type: str,
+ epc_property_id: int,
+) -> None:
+ for el in elements:
+ session.add(
+ EpcEnergyElementModel.from_domain(el, element_type, epc_property_id)
+ )
+
+
+def _insert_optional_energy_element(
+ session,
+ el: Optional[EnergyElement],
+ element_type: str,
+ epc_property_id: int,
+) -> None:
+ if el is not None:
+ session.add(
+ EpcEnergyElementModel.from_domain(el, element_type, epc_property_id)
+ )
+
+
+def run(pdf_path: str) -> None:
+ data: EpcPropertyData = _parse_pdf(pdf_path)
+ print("successfully mapped pdf")
+
+ with db_session() as session:
+ epc_prop: EpcPropertyModel = EpcPropertyModel.from_epc_property_data(data)
+ session.add(epc_prop)
+ session.flush()
+ assert epc_prop.id is not None
+ epc_property_id: int = epc_prop.id
+
+ session.add(
+ EpcPropertyEnergyPerformanceModel.from_epc_property_data(
+ data, epc_property_id=epc_property_id
+ )
+ )
+
+ for detail in data.sap_heating.main_heating_details:
+ session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id))
+
+ for part in data.sap_building_parts:
+ bp: EpcBuildingPartModel = EpcBuildingPartModel.from_domain(
+ part, epc_property_id
+ )
+ session.add(bp)
+ session.flush()
+ assert bp.id is not None
+ for dim in part.sap_floor_dimensions:
+ session.add(EpcFloorDimensionModel.from_domain(dim, bp.id))
+
+ for window in data.sap_windows:
+ session.add(EpcWindowModel.from_domain(window, epc_property_id))
+
+ list_elements: List[Tuple[List[EnergyElement], str]] = [
+ (data.roofs, "roof"),
+ (data.walls, "wall"),
+ (data.floors, "floor"),
+ (data.main_heating, "main_heating"),
+ ]
+ for elements, etype in list_elements:
+ _insert_energy_elements(session, elements, etype, epc_property_id)
+
+ optional_elements: List[Tuple[Optional[EnergyElement], str]] = [
+ (data.window, "window"),
+ (data.lighting, "lighting"),
+ (data.hot_water, "hot_water"),
+ (data.secondary_heating, "secondary_heating"),
+ (data.main_heating_controls, "main_heating_controls"),
+ ]
+ for el, etype in optional_elements:
+ _insert_optional_energy_element(session, el, etype, epc_property_id)
+
+ if data.sap_flat_details is not None:
+ session.add(
+ EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id)
+ )
+
+ print(f"epc_property_id={epc_property_id}")
+ print(f"address: {data.address_line_1}, {data.post_town}, {data.postcode}")
+
+
+if __name__ == "__main__":
+ # run("backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf")
+ run("backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf")
diff --git a/backend/documents_parser/parser.py b/backend/documents_parser/parser.py
new file mode 100644
index 00000000..cff21e0e
--- /dev/null
+++ b/backend/documents_parser/parser.py
@@ -0,0 +1,28 @@
+from typing import List
+
+from datatypes.epc.domain.epc_property_data import EpcPropertyData
+from datatypes.epc.domain.mapper import EpcPropertyDataMapper
+
+from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
+from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
+from backend.documents_parser.pdf import pdf_to_pages, pdf_to_text_list
+
+
+def parse_site_notes_pdf(file_path: str) -> EpcPropertyData:
+ with open(file_path, "rb") as f:
+ pdf_bytes = f.read()
+ pages = pdf_to_pages(pdf_bytes)
+ if "Elmhurst Energy Systems" in "\n".join(pages):
+ return _parse_elmhurst(pages)
+ return _parse_pashub(pdf_bytes)
+
+
+def _parse_elmhurst(pages: List[str]) -> EpcPropertyData:
+ site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+ return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+
+def _parse_pashub(pdf_bytes: bytes) -> EpcPropertyData:
+ tokens = pdf_to_text_list(pdf_bytes)
+ site_notes = PasHubRdSapSiteNotesExtractor(tokens).extract()
+ return EpcPropertyDataMapper.from_site_notes(site_notes)
diff --git a/backend/documents_parser/pdf.py b/backend/documents_parser/pdf.py
index dfa07300..53e209ad 100644
--- a/backend/documents_parser/pdf.py
+++ b/backend/documents_parser/pdf.py
@@ -10,3 +10,8 @@ def pdf_to_text_list(pdf_bytes: bytes) -> List[str]:
for line in page.get_text().split("\n"):
tokens.append(line)
return tokens
+
+
+def pdf_to_pages(pdf_bytes: bytes) -> List[str]:
+ with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc:
+ return [page.get_text() for page in doc]
diff --git a/backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf b/backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf
new file mode 100644
index 00000000..964c2ffb
Binary files /dev/null and b/backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf differ
diff --git a/backend/documents_parser/tests/fixtures/ElmhurstSiteNotes_2.pdf b/backend/documents_parser/tests/fixtures/ElmhurstSiteNotes_2.pdf
new file mode 100644
index 00000000..f8565917
Binary files /dev/null and b/backend/documents_parser/tests/fixtures/ElmhurstSiteNotes_2.pdf differ
diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_1.pdf
similarity index 100%
rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes.pdf
rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_1.pdf
diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_2.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_2.pdf
similarity index 100%
rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes_2.pdf
rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_2.pdf
diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_3.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_3.pdf
similarity index 100%
rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes_3.pdf
rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_3.pdf
diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_4.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_4.pdf
similarity index 100%
rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes_4.pdf
rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_4.pdf
diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_5.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_5.pdf
similarity index 100%
rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes_5.pdf
rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_5.pdf
diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_6.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf
similarity index 100%
rename from backend/documents_parser/tests/fixtures/ExampleSiteNotes_6.pdf
rename to backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf
diff --git a/backend/documents_parser/tests/fixtures/PasHubSiteNotes_7.pdf b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_7.pdf
new file mode 100644
index 00000000..29d083c6
Binary files /dev/null and b/backend/documents_parser/tests/fixtures/PasHubSiteNotes_7.pdf differ
diff --git a/backend/documents_parser/tests/fixtures/elmhurst_site_notes_1_text.json b/backend/documents_parser/tests/fixtures/elmhurst_site_notes_1_text.json
new file mode 100644
index 00000000..3e21bc51
--- /dev/null
+++ b/backend/documents_parser/tests/fixtures/elmhurst_site_notes_1_text.json
@@ -0,0 +1,6 @@
+[
+ "Summary Information\nSurveyor:\nP960-0001\nName:\nRichard Matthew Ratcliff\nTitle: Mr.\nTel Number: 07760 443 469\nSurvey Reference:\n001573\nMy Reference:\nCurrent SAP rating:\nC 69\nPotential SAP rating: C 77\nEmissions (t/year):\n1.683 tonnes\nCurrent EI rating:\nC 76\nPotential EI rating:\nB 81\nFuel Bill:\n\u00a3896\nProperty Details:\nRdSAP version:\nRdSAP10\nReference Number:\nP960-0001-001573\nMy Reference:\nLodgement Required:\nNo\nRegs Region:\nEngland\nEPC Language:\nEnglish\nUPRN:\nPostcode:\nBB10 1XX\nRegion:\nWest Pennines\nHouse Name:\nHouse No:\n19\nStreet:\nQueens Road\nLocality:\nTown:\nBURNLEY\nCounty:\nProperty Tenure:\nRented (social)\nTransaction Type:\nGrant scheme\nInspection Date:\n06/03/2026\nProcess date:\n06/03/2026\nCheck for the existence of\nan EPC:\nNo\nDoes an EPC exist at the\npoint of carrying out this\nenergy assessment:\nNo\nReason why another energy\nassessment needs to be\nundertaken:\nRdSAP Inputs\nProperty Description:\n1.0 Property type:\nB Bungalow\nE End-Terrace\n2.0 Number of\nStoreys:\n1\nHabitable Rooms:\n2\nHeated Habitable Rooms:\n2\n3.0 Date Built:\nMain Property\nD 1950-1966\n4.0 Dimensions:\nDimension type:\nInternal\nMain Property\nFloor\nArea\n[m2]\nRoom\nHeight\n[m]\nHeat Loss\nWall Perimeter\n[m]\nParty Wall\nLength\n[m]\nLowest Floor:\n44.89\n2.24\n20.10\n6.70\nNo\n5.0 Conservatory:\nIs there a conservatory?\nNo\n7.0 Walls:\nMain Property\nType\nCA Cavity\nInsulation\nF Filled Cavity\nWall Thickness Unknown\nNo\nWall Thickness\n300 mm\nU-value Known\nNo\nParty Wall Type\nU Unable to determine\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
+ "Summary Information\n8.0 Roofs:\nMain Property\nType\nPA Pitched (slates/tiles), access to loft\nInsulation\nJ Joists\nInsulation Thickness\n270 mm\nU-value Known\nNo\n8.1 Rooms in Roof:\n9.0 Floors:\nMain Property\nLocation\nG Ground floor\nType\nN Suspended, not timber\nInsulation\nA As built\nDefault U-value\n0.69\nU-value Known\nNo\n10.0 Doors:\nTotal Number of Doors\n0\nNumber of Insulated Doors\n0\n11.0 Windows:\nW\nH\nArea Glazing Type\nFrame \nType\nFrame \nFactor\nGlazing \nGap\nBuilding \nPart\nLocation\nOrient. Data-Source\nU \nvalue\ng \nvalue\nDraught \nProofed\nPermanent \nShutters\n1.30\n1.10\n1.43\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nNorth\nManufacturer\n1.40\n0.72\nYes\nNone\n1.80\n1.00\n1.80\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nNorth\nManufacturer\n1.40\n0.72\nYes\nNone\n0.70\n0.80\n0.56\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nSouth\nManufacturer\n1.40\n0.72\nYes\nNone\n0.70\n1.30\n0.91\nDouble post or during \n2022\n0.70\nMain\nExternal wall\nSouth\nManufacturer\n1.40\n0.72\nYes\nNone\nDraught Proofing\n100 %\n12.0 Ventilation & Cooling\nNo. of open chimneys\n0\nNo. of open flues\n0\nNo. of open chimneys/open flues attached to closed fire\n0\nNo. of flues attached to solid fuel boiler\n0\nNo. of open flues attached to other heater\n0\nNo. of blocked chimneys\n0\nNo. of intermittent extract fans\n2\nNo. of passive vents\n0\nNo. of flueless gas fires\n0\nFixed Space Cooling\nNo\nDraught Lobby\nNot present\n12.1 Mechanical Ventilation\nMechanical Ventilation\nNo\n12.2 Air Pressure Test\nTest Method\nNot available\n13.0 Lighting\nTotal number of bulbs\n8\nNumber of LED and CFL Known\nYes\nNumber of LED lights\n4\nNumber of CFL lights\n4\nTotal number of Low Energy\n8\nTotal number of incandescents\n0\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
+ "Summary Information\n14.0 Main Heating1\nPCDF boiler Reference\n17742 Potterton, Promax 33 Combi ErP, 88.30%\nHeat Emitter\nRadiators\nHeat pump age\nUnknown\nFuel Type\nMains gas\nFlue Type\nBalanced\nFan Assisted Flue\nYes\nDesign flow temperature\nUnknown\nPCDF Heating Controls\n0 \nMain Heating Controls EES\nCBE\nMain Heating Controls Sap\nSAP code 2106, Programmer, room thermostat and TRVs\nPCDF Compensator\n0 \nPercentage of Heat\n100 %\n14.1 Main Heating2\nPCDF boiler Reference\n0 \nMain Heating EES Code\nMain Heating SAP Code\n0\nPercentage of Heat\n0 %\n14.1 Community Heating/Heat Network\nHeating Type\nNone\n14.2 Meters\nElectricity meter type\nSingle\nMain gas\nYes\nElectricity Smart Meter Present\nNo\nGas Smart Meter Present\nNo\n15.0 Water Heating\nWater Heating Code\nHWP\nWater Heating SapCode\n901\nWater Heating Fuel Type\nMains gas\n15.1 Hot Water Cylinder\nHot Water Cylinder Present\nNo\n15.2 Community Hot Water\nPCDF boiler Reference\n0\n16.0 Solar water heating\nSolar Water Heating\nNo\n17.0 Waste Water Heat Recovery System\nIs WWHRS present in the property?\nNo / Unknown\n1x.0 Baths and Showers\nTotal Number of Baths\n0\nNumber of Baths Connected\n0\nDescription\nType\nConnected\n1\nElectric shower\nNone\n18.0 Flue Gas Heat Recovery System\nPresent\nNo\n19.0 Photovoltaic Panel\nPhotovoltaic Panel\nNone\nExport capable meter\nNo\n20.0 Wind Turbine\nTerrain Type\nSuburban\nWind turbine present?\nNo\n22.0 Special Features\n21.0 Small-Scale Hydro\nElectricity generated [kWh/year]\n0.00\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
+ "Summary Information\nRecommendations\nLoft insulation (Already installed)\nFlat roof insulation (Not applicable)\nRoom-in-roof insulation (Not applicable)\nCavity wall insulation (Already installed)\nSolid wall insulation (Not applicable)\nFloor insulation (suspended floor) (Recommended)\nHot water cylinder insulation (Not applicable)\nDraught proofing (Already installed)\nLow energy lighting (Already installed)\nCylinder thermostat (Not applicable)\nHeating controls for wet central heating system (Already installed)\nUpgrade boiler, same fuel (Already installed)\nChange heating to condensing gas condensing boiler (fuel switch) (Not applicable)\nFlue gas heat recovery in conjunction with new boiler (Not applicable)\nSolar water heating (SAP increase too small)\nHeat recovery system for mixer showers (Not applicable)\nDouble glazed windows (Already installed)\nInsulated doors (Already installed)\nSolar photovoltaic panels (Recommended)\nWind turbine (Not applicable)\nPV diverter (Not applicable)\nPV battery (Not applicable)\nWater heating controls (Not applicable)\nAlternative Recommendations\nExternal wall insulation with cavity insulation (Not applicable)\nBiomass boiler (alternative) (Not applicable)\nMicro CHP (alternative) (Not applicable)\nRelated Party Disclosure\nAddenda\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n"
+]
\ No newline at end of file
diff --git a/backend/documents_parser/tests/fixtures/elmhurst_site_notes_2_text.json b/backend/documents_parser/tests/fixtures/elmhurst_site_notes_2_text.json
new file mode 100644
index 00000000..7cb3a9fc
--- /dev/null
+++ b/backend/documents_parser/tests/fixtures/elmhurst_site_notes_2_text.json
@@ -0,0 +1,6 @@
+[
+ "Summary Information\nSurveyor:\nBW22-0001\nName:\nIan Marsh\nTitle:\nTel Number: 07709266472\nSurvey Reference:\n001233\nMy Reference:\nCurrent SAP rating:\nD 68\nPotential SAP rating: A 92\nEmissions (t/year):\n2.812 tonnes\nCurrent EI rating:\nD 68\nPotential EI rating:\nC 76\nFuel Bill:\n\u00a31098\nProperty Details:\nRdSAP version:\nRdSAP10\nReference Number:\nBW22-0001-001233\nMy Reference:\nLodgement Required:\nNo\nRegs Region:\nEngland\nEPC Language:\nEnglish\nUPRN:\nPostcode:\nBB11 2NU\nRegion:\nWest Pennines\nHouse Name:\nHouse No:\n39\nStreet:\nConstable Avenue\nLocality:\nTown:\nBURNLEY\nCounty:\nProperty Tenure:\nRented (social)\nTransaction Type:\nGrant scheme\nInspection Date:\n06/03/2026\nProcess date:\n06/03/2026\nCheck for the existence of\nan EPC:\nNo\nDoes an EPC exist at the\npoint of carrying out this\nenergy assessment:\nNo\nReason why another energy\nassessment needs to be\nundertaken:\nRdSAP Inputs\nProperty Description:\n1.0 Property type:\nH House\nS Semi-Detached\n2.0 Number of\nStoreys:\n2\nHabitable Rooms:\n4\nHeated Habitable Rooms:\n4\n3.0 Date Built:\nMain Property\nD 1950-1966\n4.0 Dimensions:\nDimension type:\nInternal\nMain Property\nFloor\nArea\n[m2]\nRoom\nHeight\n[m]\nHeat Loss\nWall Perimeter\n[m]\nParty Wall\nLength\n[m]\n1st Floor:\n35.88\n2.51\n17.46\n6.62\nLowest Floor:\n35.88\n2.67\n17.46\n6.62\nNo\n5.0 Conservatory:\nIs there a conservatory?\nNo\n7.0 Walls:\nMain Property\nType\nCA Cavity\nInsulation\nF Filled Cavity\nWall Thickness Unknown\nNo\nWall Thickness\n300 mm\nU-value Known\nNo\nParty Wall Type\nCU Cavity masonry unfilled\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
+ "Summary Information\n8.0 Roofs:\nMain Property\nType\nPA Pitched (slates/tiles), access to loft\nInsulation\nJ Joists\nInsulation Thickness\n200 mm\nU-value Known\nNo\n8.1 Rooms in Roof:\n9.0 Floors:\nMain Property\nLocation\nG Ground floor\nType\nT Suspended timber\nInsulation\nA As built\nDefault U-value\n0.72\nU-value Known\nNo\n10.0 Doors:\nTotal Number of Doors\n2\nNumber of Insulated Doors\n0\n11.0 Windows:\nW\nH\nArea Glazing Type\nFrame \nType\nFrame \nFactor\nGlazing \nGap\nBuilding \nPart\nLocation\nOrient. Data-Source\nU \nvalue\ng \nvalue\nDraught \nProofed\nPermanent \nShutters\n1.59\n1.36\n2.16\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nEast\nManufacturer\n2.70\n0.76\nYes\nNone\n1.27\n0.43\n0.55\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nEast\nManufacturer\n2.70\n0.76\nYes\nNone\n1.54\n1.06\n1.63\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nEast\nManufacturer\n2.70\n0.76\nYes\nNone\n0.61\n1.07\n0.65\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nSouth\nManufacturer\n2.70\n0.76\nYes\nNone\n1.07\n1.05\n1.12\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\n1.07\n1.08\n1.16\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\n1.10\n1.06\n1.17\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\n1.12\n1.06\n1.19\nDouble with unknown \ninstall date\nPVC\n0.70\n16 mm or \nmore\nMain\nExternal wall\nWest\nManufacturer\n2.70\n0.76\nYes\nNone\nDraught Proofing\n90 %\n12.0 Ventilation & Cooling\nNo. of open chimneys\n0\nNo. of open flues\n0\nNo. of open chimneys/open flues attached to closed fire\n0\nNo. of flues attached to solid fuel boiler\n0\nNo. of open flues attached to other heater\n0\nNo. of blocked chimneys\n0\nNo. of intermittent extract fans\n2\nNo. of passive vents\n2\nNo. of flueless gas fires\n0\nFixed Space Cooling\nNo\nDraught Lobby\nNot present\n12.1 Mechanical Ventilation\nMechanical Ventilation\nNo\n12.2 Air Pressure Test\nTest Method\nNot available\n13.0 Lighting\nTotal number of bulbs\n10\nNumber of LED and CFL Known\nNo\nTotal number of Low Energy\n5\nTotal number of incandescents\n5\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
+ "Summary Information\n14.0 Main Heating1\nPCDF boiler Reference\n18737 Baxi, ASSURE, 88.40%\nHeat Emitter\nRadiators\nHeat pump age\nUnknown\nFuel Type\nMains gas\nFlue Type\nBalanced\nFan Assisted Flue\nYes\nDesign flow temperature\nUnknown\nPCDF Heating Controls\n0 \nMain Heating Controls EES\nCBE\nMain Heating Controls Sap\nSAP code 2106, Programmer, room thermostat and TRVs\nPCDF Compensator\n0 \nPercentage of Heat\n100 %\n14.1 Main Heating2\nPCDF boiler Reference\n0 \nMain Heating EES Code\nMain Heating SAP Code\n0\nPercentage of Heat\n0 %\n14.1 Community Heating/Heat Network\nHeating Type\nNone\n14.2 Meters\nElectricity meter type\nSingle\nMain gas\nYes\nElectricity Smart Meter Present\nNo\nGas Smart Meter Present\nNo\n15.0 Water Heating\nWater Heating Code\nHWP\nWater Heating SapCode\n901\nWater Heating Fuel Type\nMains gas\n15.1 Hot Water Cylinder\nHot Water Cylinder Present\nNo\n15.2 Community Hot Water\nPCDF boiler Reference\n0\n16.0 Solar water heating\nSolar Water Heating\nNo\n17.0 Waste Water Heat Recovery System\nIs WWHRS present in the property?\nNo / Unknown\n1x.0 Baths and Showers\nTotal Number of Baths\n1\nNumber of Baths Connected\n0\nDescription\nType\nConnected\n1\nNon-electric shower\nNone\n18.0 Flue Gas Heat Recovery System\nPresent\nNo\n19.0 Photovoltaic Panel\nPhotovoltaic Panel\nNone\nExport capable meter\nNo\n20.0 Wind Turbine\nTerrain Type\nRural\nWind turbine present?\nNo\n22.0 Special Features\n21.0 Small-Scale Hydro\nElectricity generated [kWh/year]\n0.00\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n",
+ "Summary Information\nRecommendations\nLoft insulation (Already installed)\nFlat roof insulation (Not applicable)\nRoom-in-roof insulation (Not applicable)\nCavity wall insulation (Already installed)\nSolid wall insulation (Not applicable)\nFloor insulation (suspended floor) (Recommended)\nHot water cylinder insulation (Not applicable)\nDraught proofing (SAP increase too small)\nLow energy lighting (Recommended)\nCylinder thermostat (Not applicable)\nHeating controls for wet central heating system (Already installed)\nUpgrade boiler, same fuel (Already installed)\nChange heating to condensing gas condensing boiler (fuel switch) (Not applicable)\nFlue gas heat recovery in conjunction with new boiler (Not applicable)\nSolar water heating (SAP increase too small)\nHeat recovery system for mixer showers (SAP increase too small)\nDouble glazed windows (Already installed)\nInsulated doors (SAP increase too small)\nSolar photovoltaic panels (Recommended)\nWind turbine (Recommended)\nPV diverter (Not applicable)\nPV battery (Not applicable)\nWater heating controls (Not applicable)\nAlternative Recommendations\nExternal wall insulation with cavity insulation (Not applicable)\nBiomass boiler (alternative) (Not applicable)\nMicro CHP (alternative) (Not applicable)\nRelated Party Disclosure\nAddenda\n\u00a9 Elmhurst Energy Systems Limited Registered Office Unit 16, St Johns Business Park, Lutterworth, Leicestershire LE17 4HB\n"
+]
\ No newline at end of file
diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_1_text.json
similarity index 100%
rename from backend/documents_parser/tests/fixtures/site_notes_example_text.json
rename to backend/documents_parser/tests/fixtures/pashub_site_notes_1_text.json
diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_2_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_2_text.json
similarity index 100%
rename from backend/documents_parser/tests/fixtures/site_notes_example_2_text.json
rename to backend/documents_parser/tests/fixtures/pashub_site_notes_2_text.json
diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_3_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_3_text.json
similarity index 100%
rename from backend/documents_parser/tests/fixtures/site_notes_example_3_text.json
rename to backend/documents_parser/tests/fixtures/pashub_site_notes_3_text.json
diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_4_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_4_text.json
similarity index 100%
rename from backend/documents_parser/tests/fixtures/site_notes_example_4_text.json
rename to backend/documents_parser/tests/fixtures/pashub_site_notes_4_text.json
diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_5_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_5_text.json
similarity index 100%
rename from backend/documents_parser/tests/fixtures/site_notes_example_5_text.json
rename to backend/documents_parser/tests/fixtures/pashub_site_notes_5_text.json
diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_6_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_6_text.json
similarity index 100%
rename from backend/documents_parser/tests/fixtures/site_notes_example_6_text.json
rename to backend/documents_parser/tests/fixtures/pashub_site_notes_6_text.json
diff --git a/backend/documents_parser/tests/fixtures/pashub_site_notes_7_text.json b/backend/documents_parser/tests/fixtures/pashub_site_notes_7_text.json
new file mode 100644
index 00000000..933535cf
--- /dev/null
+++ b/backend/documents_parser/tests/fixtures/pashub_site_notes_7_text.json
@@ -0,0 +1,670 @@
+[
+ "SMART EPC: Record of",
+ "Inspection & Site Notes",
+ "Inspection Surveyor:",
+ "Dave Elliott",
+ "E-Mail Address:",
+ "davejohns36@icloud.com",
+ "Report Reference:",
+ "Not Applicable",
+ "Created On:",
+ "12 September 2025",
+ "Date of Inspection:",
+ "08 September 2025",
+ "Property Address:",
+ "Flat 3,",
+ "29 Watcombe Circus,",
+ "NOTTINGHAM,",
+ "NG5 2DU",
+ "Page 1",
+ "",
+ "Photo of electricity meter:",
+ "Single Smart Meter",
+ "RdSAP Assessment",
+ "General",
+ "Confirm you have checked for the existence of an",
+ "EPC before carrying out another energy assessment.",
+ "Yes",
+ "Does an EPC exist at the point of carrying out this",
+ "energy assessment?",
+ "No",
+ "Inspection Date:",
+ "08/09/2025",
+ "Transaction Type:",
+ "None of the Above",
+ "Tenure:",
+ "Rented Social",
+ "Type of Property:",
+ "Maisonette",
+ "Detachment Type:",
+ "Semi-Detached",
+ "Flat Type:",
+ "Mid-floor",
+ "Flat Location:",
+ "3",
+ "Corridor Type:",
+ "Unheated Corridor",
+ "Unheated corridor wall length:",
+ "6.59 m",
+ "Number of storeys:",
+ "2 Storeys",
+ "Terrain Type:",
+ "Suburban",
+ "Number of Extensions:",
+ "2 Extensions",
+ "Is an electricity smart meter present?",
+ "Yes",
+ "Electric meter type:",
+ "Single",
+ "Is the dwelling export-capable?",
+ "No",
+ "Is mains gas available?",
+ "Yes",
+ "Is there a gas smart meter?",
+ "No",
+ "Is the gas meter accessible?",
+ "Yes",
+ "Page 2",
+ "",
+ "Photo of Gas Meter:",
+ "Gas Meter",
+ "External indicators of Solid Brick construction:",
+ "Brick Pattern",
+ "Select Measurements Location:",
+ "Internal",
+ "Building Construction",
+ "Main Building",
+ "Age Range:",
+ "1900-1929",
+ "Record indicators of property age:",
+ "Property checker",
+ "Walls - Construction Type:",
+ "Solid brick",
+ "Record external indicators of Solid Brick",
+ "Construction:",
+ "consistent with build age",
+ "Walls - Insulation Type:",
+ "As built",
+ "Thermal conductivity of wall insulation:",
+ "Unknown",
+ "Wall U-Value known?",
+ "Not Known",
+ "Wall thickness:",
+ "280 mm",
+ "Page 3",
+ "",
+ "Photo wall thickness:",
+ "Wall Measurements",
+ "Wall Dry-Lined?",
+ "No",
+ "Party wall construction type:",
+ "Solid Masonry, Timber Frame, or System Built",
+ "Floor type:",
+ "Other dwelling below",
+ "Extension 1",
+ "Age Range:",
+ "1900-1929",
+ "Record indicators of property age:",
+ "Property checker",
+ "Walls - Construction Type:",
+ "Solid brick",
+ "Record external indicators of Solid Brick",
+ "Construction:",
+ "headers and stretchers in brick bond",
+ "Walls - Insulation Type:",
+ "As built",
+ "Thermal conductivity of wall insulation:",
+ "Unknown",
+ "Wall U-Value known?",
+ "Not Known",
+ "Wall thickness:",
+ "280 mm",
+ "Wall Dry-Lined?",
+ "Yes",
+ "Party wall construction type:",
+ "Solid Masonry, Timber Frame, or System Built",
+ "Floor type:",
+ "Other dwelling below",
+ "Extension 2",
+ "Age Range:",
+ "1900-1929",
+ "Record indicators of property age:",
+ "Property checker",
+ "Walls - Construction Type:",
+ "Solid brick",
+ "Record external indicators of Solid Brick",
+ "Construction:",
+ "headers and stretchers in brick bond",
+ "Walls - Insulation Type:",
+ "As built",
+ "Thermal conductivity of wall insulation:",
+ "Unknown",
+ "Page 4",
+ "",
+ "Loft insulation:",
+ "Loft",
+ "Loft insulation:",
+ "Loft",
+ "Wall U-Value known?",
+ "Not Known",
+ "Wall thickness:",
+ "280 mm",
+ "Wall Dry-Lined?",
+ "Yes",
+ "Party wall construction type:",
+ "Solid Masonry, Timber Frame, or System Built",
+ "Floor type:",
+ "Other dwelling below",
+ "Building Measurements",
+ "Area (m2)",
+ "Height (m)",
+ "Heat Loss Perimeter (m)",
+ "PWL (m)",
+ "Main Building",
+ "Floor 1",
+ "39.5",
+ "3.58",
+ "11.02",
+ "15.21",
+ "Floor 0",
+ "23.06",
+ "2.87",
+ "11.72",
+ "10.8",
+ "Extension 1",
+ "Floor 1",
+ "3.43",
+ "3.58",
+ "4.97",
+ "1",
+ "Floor 0",
+ "3.43",
+ "2.87",
+ "4.97",
+ "1",
+ "Extension 2",
+ "Floor 0",
+ "1.81",
+ "3.58",
+ "4.96",
+ "1",
+ "Roof Space",
+ "Main Building",
+ "Roofs - Construction Type:",
+ "Pitched roof (Slates or tiles), Access to loft",
+ "Roofs - Insulation At:",
+ "Joists",
+ "Roof U-Value:",
+ "Not Known",
+ "Roofs - Insulation Thickness:",
+ "225 mm",
+ "Page 5",
+ "",
+ "Loft insulation:",
+ "Loft",
+ "Loft insulation:",
+ "Loft",
+ "Loft insulation:",
+ "Loft",
+ "Loft insulation:",
+ "Loft",
+ "Loft insulation:",
+ "Loft",
+ "Loft insulation:",
+ "Loft",
+ "Loft insulation:",
+ "Loft",
+ "Loft insulation:",
+ "Loft",
+ "Page 6",
+ "",
+ "Loft insulation:",
+ "Loft",
+ "Indicators of Solid Brick Wall Construction in roof space:",
+ "solid wall construction visible to gables",
+ "Record indicators of Solid Brick Wall Construction in",
+ "roof space:",
+ "solid wall construction visible to gables",
+ "Extension 1",
+ "Roofs - Construction Type:",
+ "Flat",
+ "Roofs - Insulation At:",
+ "Unknown",
+ "Record indicators of Solid Brick Wall Construction in",
+ "roof space:",
+ "solid wall construction visible at eaves",
+ "Extension 2",
+ "Roofs - Construction Type:",
+ "Flat",
+ "Roofs - Insulation At:",
+ "Unknown",
+ "Record indicators of Solid Brick Wall Construction in",
+ "roof space:",
+ "Couldn\u2019t enter",
+ "Page 7",
+ "",
+ "Alternative Wall",
+ "Main Building",
+ "Alternative Wall 1",
+ "Construction type:",
+ "Solid brick",
+ "Record external indicators of Solid Brick",
+ "Construction:",
+ "consistent with building age, no visible cavity trays",
+ "Insulation Type:",
+ "As Built",
+ "Sheltered wall?",
+ "Yes",
+ "Thermal conductivity of wall insulation:",
+ "Unknown",
+ "Wall thickness:",
+ "280 mm",
+ "Wall Dry-Lined?",
+ "Yes",
+ "Windows",
+ "Window 1",
+ "Window location:",
+ "Main Building",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.2 m",
+ "Window width:",
+ "0.8 m",
+ "Orientation:",
+ "South West",
+ "Window 2",
+ "Window location:",
+ "Extension 1",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.65 m",
+ "Window width:",
+ "0.52 m",
+ "Orientation:",
+ "East",
+ "Page 8",
+ "",
+ "Window 3",
+ "Window location:",
+ "Extension 1",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.95 m",
+ "Window width:",
+ "0.86 m",
+ "Orientation:",
+ "East",
+ "Window 4",
+ "Window location:",
+ "Extension 1",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.76 m",
+ "Window width:",
+ "0.65 m",
+ "Orientation:",
+ "North",
+ "Window 5",
+ "Window location:",
+ "Extension 1",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.68 m",
+ "Window width:",
+ "0.68 m",
+ "Orientation:",
+ "East",
+ "Page 9",
+ "",
+ "Window 6",
+ "Window location:",
+ "Extension 1",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.84 m",
+ "Window width:",
+ "1.18 m",
+ "Orientation:",
+ "North East",
+ "Window 7",
+ "Window location:",
+ "Extension 1",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.76 m",
+ "Window width:",
+ "0.65 m",
+ "Orientation:",
+ "North",
+ "Window 8",
+ "Window location:",
+ "Extension 2",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.82 m",
+ "Window width:",
+ "0.84 m",
+ "Orientation:",
+ "South East",
+ "Page 10",
+ "",
+ "Window 9",
+ "Window location:",
+ "Extension 2",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.65 m",
+ "Window width:",
+ "0.5 m",
+ "Orientation:",
+ "South",
+ "Window 10",
+ "Window location:",
+ "Extension 2",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.71 m",
+ "Window width:",
+ "0.47 m",
+ "Orientation:",
+ "East",
+ "Window 11",
+ "Window location:",
+ "Extension 2",
+ "Window wall type:",
+ "External wall",
+ "Glazing Type:",
+ "Double glazing, Unknown install date",
+ "Window type:",
+ "Window",
+ "Window frame type:",
+ "Wooden or PVC",
+ "What size is the glazing gap?",
+ "16 mm or more",
+ "Is the window draught proofed?",
+ "Yes",
+ "Are there permanent shutters present?",
+ "No",
+ "Window height:",
+ "1.2 m",
+ "Window width:",
+ "0.8 m",
+ "Orientation:",
+ "South West",
+ "Page 11",
+ "",
+ "Heating & Hot Water",
+ "Main Heating Systems",
+ "Main Heating 1",
+ "How would you like to select the Heating System?",
+ "PCDF Search",
+ "System type:",
+ "Boiler with radiators or underfloor heating",
+ "Product Id",
+ "15030",
+ "Manufacturer",
+ "Baxi",
+ "Model",
+ "Duo-tec Combi",
+ "Orig Manuf",
+ "Baxi Heating",
+ "Fuel",
+ "Mains gas",
+ "S. Efficiency",
+ "0",
+ "Type",
+ "Combi",
+ "Condensing",
+ "Yes",
+ "Year",
+ "2006 - 2008",
+ "Mount",
+ "Wall",
+ "Open Flue",
+ "Room-sealed",
+ "Fan Assist",
+ "Yes",
+ "Status",
+ "Normal status for an actual product",
+ "Central heating pump age:",
+ "Unknown",
+ "Controls:",
+ "Programmer, room thermostat and TRVs",
+ "Does the boiler have a Flue Gas Heat Recover",
+ "System (FGHRS)?",
+ "No",
+ "Is there a weather compensator?",
+ "No",
+ "Emitter:",
+ "Radiators",
+ "Emitter Temperature:",
+ "Unknown",
+ "Secondary Heating System",
+ "Secondary Fuel",
+ "No Secondary Heating",
+ "Water Heating & Cylinder",
+ "Water Heating Type:",
+ "Regular",
+ "Water Heating System:",
+ "From main heating 1",
+ "Cylinder Size:",
+ "No Cylinder",
+ "Ventilation",
+ "Ventilation type:",
+ "Natural",
+ "Has fixed air conditioning?",
+ "No",
+ "Number of open flues:",
+ "0",
+ "Number of closed flues:",
+ "0",
+ "Number of boiler flues:",
+ "1",
+ "Page 12",
+ "",
+ "Number of other flues:",
+ "0",
+ "Number of extract fans:",
+ "2",
+ "Number of passive vents:",
+ "1",
+ "Number of flueless gas fires:",
+ "0",
+ "Pressure test:",
+ "No test",
+ "Is there a draught lobby?",
+ "Yes",
+ "Conservatories",
+ "Is there conservatory?",
+ "No conservatory",
+ "Renewables",
+ "Wind Turbines",
+ "Has wind turbines?",
+ "No",
+ "Solar hot water",
+ "Has solar hot water?",
+ "No",
+ "Photovoltaics",
+ "Has photovoltaic array?",
+ "No",
+ "Number of PV batteries:",
+ "None",
+ "Hydro",
+ "Is the dwelling connected to Hydro?",
+ "No",
+ "Room Count Elements",
+ "Number of habitable rooms?",
+ "3",
+ "Are any of these rooms unheated?",
+ "No",
+ "Number of external doors?",
+ "1",
+ "Number of insulated external doors?",
+ "0",
+ "Number of draughtproofed external doors?",
+ "1",
+ "Number of open chimneys?",
+ "0",
+ "Number of blocked chimneys?",
+ "0",
+ "Number of fixed incandescent bulbs:",
+ "7",
+ "Is the exact number of LED and CFL bulbs known?",
+ "Yes",
+ "Number of fixed LED bulbs:",
+ "7",
+ "Number of fixed CFL bulbs:",
+ "0",
+ "Are there any waste water heat recovery systems?",
+ "None",
+ "Number of baths:",
+ "1",
+ "How many special features are there at the",
+ "property?",
+ "0",
+ "Customer Response",
+ "Customer present?",
+ "Yes",
+ "Page 13",
+ "",
+ "Customer willing to answer satisfaction survey?",
+ "No",
+ "Addendum + Related Party Disclosure",
+ "Addendum",
+ "None",
+ "Related party disclosure",
+ "No related party",
+ "Photographs Required",
+ "Page 14",
+ ""
+]
\ No newline at end of file
diff --git a/backend/documents_parser/tests/test_elmhurst_end_to_end.py b/backend/documents_parser/tests/test_elmhurst_end_to_end.py
new file mode 100644
index 00000000..977ea138
--- /dev/null
+++ b/backend/documents_parser/tests/test_elmhurst_end_to_end.py
@@ -0,0 +1,356 @@
+import json
+import os
+from datetime import date
+
+import pytest
+
+from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
+from datatypes.epc.domain.epc_property_data import EpcPropertyData
+from datatypes.epc.domain.mapper import EpcPropertyDataMapper
+
+FIXTURE_PATH = os.path.join(
+ os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json"
+)
+FIXTURE_PATH_2 = os.path.join(
+ os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_2_text.json"
+)
+
+
+@pytest.fixture(scope="module")
+def result() -> EpcPropertyData:
+ with open(FIXTURE_PATH) as f:
+ pages = json.load(f)
+ site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+ return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+
+@pytest.fixture(scope="module")
+def result2() -> EpcPropertyData:
+ with open(FIXTURE_PATH_2) as f:
+ pages = json.load(f)
+ site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+ return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+
+class TestAddress:
+ def test_address_line_1(self, result: EpcPropertyData) -> None:
+ assert result.address_line_1 == "19, Queens Road"
+
+ def test_post_town(self, result: EpcPropertyData) -> None:
+ assert result.post_town == "BURNLEY"
+
+ def test_postcode(self, result: EpcPropertyData) -> None:
+ assert result.postcode == "BB10 1XX"
+
+
+class TestInspectionInfo:
+ def test_inspection_date(self, result: EpcPropertyData) -> None:
+ assert result.inspection_date == date(2026, 3, 6)
+
+ def test_tenure(self, result: EpcPropertyData) -> None:
+ assert result.tenure == "Rented (social)"
+
+ def test_transaction_type(self, result: EpcPropertyData) -> None:
+ assert result.transaction_type == "Grant scheme"
+
+ def test_report_reference(self, result: EpcPropertyData) -> None:
+ assert result.report_reference == "P960-0001-001573"
+
+
+class TestPropertyDescription:
+ def test_property_type(self, result: EpcPropertyData) -> None:
+ assert result.property_type == "Bungalow"
+
+ def test_built_form(self, result: EpcPropertyData) -> None:
+ assert result.built_form == "End-Terrace"
+
+ def test_dwelling_type(self, result: EpcPropertyData) -> None:
+ assert result.dwelling_type == "End-Terrace bungalow"
+
+ def test_number_of_storeys(self, result: EpcPropertyData) -> None:
+ assert result.number_of_storeys == 1
+
+ def test_has_conservatory(self, result: EpcPropertyData) -> None:
+ assert result.has_conservatory is False
+
+ def test_total_floor_area(self, result: EpcPropertyData) -> None:
+ assert result.total_floor_area_m2 == 44.89
+
+
+class TestCounts:
+ def test_habitable_rooms_count(self, result: EpcPropertyData) -> None:
+ assert result.habitable_rooms_count == 2
+
+ def test_heated_rooms_count(self, result: EpcPropertyData) -> None:
+ assert result.heated_rooms_count == 2
+
+ def test_door_count(self, result: EpcPropertyData) -> None:
+ assert result.door_count == 0
+
+ def test_insulated_door_count(self, result: EpcPropertyData) -> None:
+ assert result.insulated_door_count == 0
+
+ def test_open_chimneys_count(self, result: EpcPropertyData) -> None:
+ assert result.open_chimneys_count == 0
+
+ def test_blocked_chimneys_count(self, result: EpcPropertyData) -> None:
+ assert result.blocked_chimneys_count == 0
+
+
+class TestLighting:
+ def test_led_count(self, result: EpcPropertyData) -> None:
+ assert result.led_fixed_lighting_bulbs_count == 4
+
+ def test_cfl_count(self, result: EpcPropertyData) -> None:
+ assert result.cfl_fixed_lighting_bulbs_count == 4
+
+ def test_incandescent_count(self, result: EpcPropertyData) -> None:
+ assert result.incandescent_fixed_lighting_bulbs_count == 0
+
+
+class TestFlags:
+ def test_solar_water_heating(self, result: EpcPropertyData) -> None:
+ assert result.solar_water_heating is False
+
+ def test_has_hot_water_cylinder(self, result: EpcPropertyData) -> None:
+ assert result.has_hot_water_cylinder is False
+
+ def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
+ assert result.has_fixed_air_conditioning is False
+
+ def test_hydro(self, result: EpcPropertyData) -> None:
+ assert result.hydro is False
+
+ def test_photovoltaic_array(self, result: EpcPropertyData) -> None:
+ assert result.photovoltaic_array is False
+
+
+class TestBuildingPart:
+ def test_single_building_part(self, result: EpcPropertyData) -> None:
+ assert len(result.sap_building_parts) == 1
+
+ def test_identifier(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].identifier == "main"
+
+ def test_construction_age_band(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].construction_age_band == "1950-1966"
+
+ def test_wall_construction(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].wall_construction == "Cavity"
+
+ def test_wall_insulation_type(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].wall_insulation_type == "Filled Cavity"
+
+ def test_wall_thickness_measured(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].wall_thickness_measured is True
+
+ def test_wall_thickness_mm(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].wall_thickness_mm == 300
+
+ def test_roof_insulation_location(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].roof_insulation_location == "Joists"
+
+ def test_roof_insulation_thickness(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].roof_insulation_thickness == 270
+
+ def test_floor_type(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].floor_type == "Ground floor"
+
+ def test_floor_construction_type(self, result: EpcPropertyData) -> None:
+ assert (
+ result.sap_building_parts[0].floor_construction_type
+ == "Suspended, not timber"
+ )
+
+ def test_floor_insulation_type_str(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].floor_insulation_type_str == "As built"
+
+ def test_floor_u_value_known(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].floor_u_value_known is False
+
+ def test_single_floor_dimension(self, result: EpcPropertyData) -> None:
+ assert len(result.sap_building_parts[0].sap_floor_dimensions) == 1
+
+ def test_floor_dimension_area(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].sap_floor_dimensions[0].total_floor_area_m2 == 44.89
+
+ def test_floor_dimension_room_height(self, result: EpcPropertyData) -> None:
+ assert result.sap_building_parts[0].sap_floor_dimensions[0].room_height_m == 2.24
+
+ def test_floor_dimension_heat_loss_perimeter(self, result: EpcPropertyData) -> None:
+ assert (
+ result.sap_building_parts[0].sap_floor_dimensions[0].heat_loss_perimeter_m
+ == 20.10
+ )
+
+ def test_floor_dimension_party_wall_length(self, result: EpcPropertyData) -> None:
+ assert (
+ result.sap_building_parts[0].sap_floor_dimensions[0].party_wall_length_m
+ == 6.70
+ )
+
+
+class TestWindows:
+ def test_window_count(self, result: EpcPropertyData) -> None:
+ assert len(result.sap_windows) == 4
+
+ def test_first_window_width(self, result: EpcPropertyData) -> None:
+ assert result.sap_windows[0].window_width == 1.30
+
+ def test_first_window_height(self, result: EpcPropertyData) -> None:
+ assert result.sap_windows[0].window_height == 1.10
+
+ def test_first_window_orientation(self, result: EpcPropertyData) -> None:
+ assert result.sap_windows[0].orientation == "North"
+
+ def test_first_window_glazing_type(self, result: EpcPropertyData) -> None:
+ assert result.sap_windows[0].glazing_type == "Double post or during 2022"
+
+ def test_first_window_draught_proofed(self, result: EpcPropertyData) -> None:
+ assert result.sap_windows[0].draught_proofed is True
+
+ def test_third_window_orientation(self, result: EpcPropertyData) -> None:
+ assert result.sap_windows[2].orientation == "South"
+
+ def test_frame_factor(self, result: EpcPropertyData) -> None:
+ assert result.sap_windows[0].frame_factor == 0.7
+
+ def test_transmission_u_value(self, result: EpcPropertyData) -> None:
+ assert result.sap_windows[0].window_transmission_details is not None
+ assert result.sap_windows[0].window_transmission_details.u_value == 1.4
+
+ def test_transmission_solar_transmittance(self, result: EpcPropertyData) -> None:
+ assert result.sap_windows[0].window_transmission_details is not None
+ assert result.sap_windows[0].window_transmission_details.solar_transmittance == 0.72
+
+ def test_transmission_data_source(self, result: EpcPropertyData) -> None:
+ assert result.sap_windows[0].window_transmission_details is not None
+ assert result.sap_windows[0].window_transmission_details.data_source == "Manufacturer"
+
+
+class TestHeating:
+ def test_single_heating_detail(self, result: EpcPropertyData) -> None:
+ assert len(result.sap_heating.main_heating_details) == 1
+
+ def test_fuel_type(self, result: EpcPropertyData) -> None:
+ assert result.sap_heating.main_heating_details[0].main_fuel_type == "Mains gas"
+
+ def test_heat_emitter_type(self, result: EpcPropertyData) -> None:
+ assert (
+ result.sap_heating.main_heating_details[0].heat_emitter_type == "Radiators"
+ )
+
+ def test_emitter_temperature(self, result: EpcPropertyData) -> None:
+ assert (
+ result.sap_heating.main_heating_details[0].emitter_temperature == "Unknown"
+ )
+
+ def test_fan_flue_present(self, result: EpcPropertyData) -> None:
+ assert result.sap_heating.main_heating_details[0].fan_flue_present is True
+
+ def test_has_fghrs(self, result: EpcPropertyData) -> None:
+ assert result.sap_heating.main_heating_details[0].has_fghrs is False
+
+ def test_main_heating_control(self, result: EpcPropertyData) -> None:
+ assert (
+ result.sap_heating.main_heating_details[0].main_heating_control
+ == "Programmer, room thermostat and TRVs"
+ )
+
+ def test_shower_outlet_type(self, result: EpcPropertyData) -> None:
+ assert result.sap_heating.shower_outlets is not None
+ assert (
+ result.sap_heating.shower_outlets.shower_outlet.shower_outlet_type
+ == "Electric shower"
+ )
+
+ def test_no_hot_water_cylinder_size(self, result: EpcPropertyData) -> None:
+ assert result.sap_heating.cylinder_size is None
+
+ def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
+ assert result.sap_heating.has_fixed_air_conditioning is False
+
+ def test_water_heating_code(self, result: EpcPropertyData) -> None:
+ assert result.sap_heating.water_heating_code == 901
+
+
+class TestEnergySource:
+ def test_mains_gas(self, result: EpcPropertyData) -> None:
+ assert result.sap_energy_source.mains_gas is True
+
+ def test_meter_type(self, result: EpcPropertyData) -> None:
+ assert result.sap_energy_source.meter_type == "Single"
+
+ def test_electricity_smart_meter(self, result: EpcPropertyData) -> None:
+ assert result.sap_energy_source.electricity_smart_meter_present is False
+
+ def test_gas_smart_meter(self, result: EpcPropertyData) -> None:
+ assert result.sap_energy_source.gas_smart_meter_present is False
+
+ def test_wind_turbines_count(self, result: EpcPropertyData) -> None:
+ assert result.sap_energy_source.wind_turbines_count == 0
+
+ def test_wind_turbines_terrain_type(self, result: EpcPropertyData) -> None:
+ assert result.sap_energy_source.wind_turbines_terrain_type == "Suburban"
+
+ def test_pv_battery_count(self, result: EpcPropertyData) -> None:
+ assert result.sap_energy_source.pv_battery_count == 0
+
+
+class TestVentilation:
+ def test_draught_lobby(self, result: EpcPropertyData) -> None:
+ assert result.sap_ventilation is not None
+ assert result.sap_ventilation.draught_lobby is False
+
+ def test_pressure_test(self, result: EpcPropertyData) -> None:
+ assert result.sap_ventilation is not None
+ assert result.sap_ventilation.pressure_test == "Not available"
+
+ def test_extract_fans_count(self, result: EpcPropertyData) -> None:
+ assert result.sap_ventilation is not None
+ assert result.sap_ventilation.extract_fans_count == 2
+
+ def test_open_flues_count(self, result: EpcPropertyData) -> None:
+ assert result.sap_ventilation is not None
+ assert result.sap_ventilation.open_flues_count == 0
+
+
+class TestDraughtproofingAndWater:
+ def test_percent_draughtproofed(self, result: EpcPropertyData) -> None:
+ assert result.percent_draughtproofed == 100
+
+ def test_waste_water_heat_recovery_absent(self, result: EpcPropertyData) -> None:
+ assert result.waste_water_heat_recovery == "None"
+
+ def test_any_unheated_rooms_false(self, result: EpcPropertyData) -> None:
+ assert result.any_unheated_rooms is False
+
+
+class TestEnergyPerformance:
+ def test_energy_rating_current(self, result: EpcPropertyData) -> None:
+ assert result.energy_rating_current == 69
+
+ def test_energy_rating_potential(self, result: EpcPropertyData) -> None:
+ assert result.energy_rating_potential == 77
+
+ def test_environmental_impact_current(self, result: EpcPropertyData) -> None:
+ assert result.environmental_impact_current == 76
+
+ def test_environmental_impact_potential(self, result: EpcPropertyData) -> None:
+ assert result.environmental_impact_potential == 81
+
+ def test_co2_emissions_current(self, result: EpcPropertyData) -> None:
+ assert result.co2_emissions_current == 1.683
+
+
+class TestWindowFrameMaterial:
+ def test_frame_material_from_elmhurst(self, result2: EpcPropertyData) -> None:
+ assert result2.sap_windows[0].frame_material == "PVC"
+
+ def test_glazing_gap_from_elmhurst(self, result2: EpcPropertyData) -> None:
+ assert result2.sap_windows[0].glazing_gap == "16 mm or more"
+
+
+class TestLowEnergyLighting:
+ def test_low_energy_fixed_lighting_bulbs_count(self, result2: EpcPropertyData) -> None:
+ assert result2.low_energy_fixed_lighting_bulbs_count == 5
diff --git a/backend/documents_parser/tests/test_elmhurst_extractor.py b/backend/documents_parser/tests/test_elmhurst_extractor.py
new file mode 100644
index 00000000..e0dca443
--- /dev/null
+++ b/backend/documents_parser/tests/test_elmhurst_extractor.py
@@ -0,0 +1,515 @@
+import json
+import os
+from datetime import date
+
+import pytest
+
+from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
+from datatypes.epc.surveys.elmhurst_site_notes import (
+ BathsAndShowers,
+ BuildingPartDimensions,
+ ElmhurstSiteNotes,
+ FloorDetails,
+ FloorDimension,
+ Lighting,
+ MainHeating,
+ Meters,
+ PropertyDetails,
+ Renewables,
+ RoofDetails,
+ Shower,
+ SurveyorInfo,
+ VentilationAndCooling,
+ WallDetails,
+ WaterHeating,
+ Window,
+)
+
+FIXTURE_PATH = os.path.join(
+ os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json"
+)
+FIXTURE_PATH_2 = os.path.join(
+ os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_2_text.json"
+)
+
+
+@pytest.fixture(scope="module")
+def result() -> ElmhurstSiteNotes:
+ with open(FIXTURE_PATH) as f:
+ pages = json.load(f)
+ return ElmhurstSiteNotesExtractor(pages).extract()
+
+
+@pytest.fixture(scope="module")
+def result2() -> ElmhurstSiteNotes:
+ with open(FIXTURE_PATH_2) as f:
+ pages = json.load(f)
+ return ElmhurstSiteNotesExtractor(pages).extract()
+
+
+class TestSurveyorInfo:
+ def test_surveyor_code(self, result: ElmhurstSiteNotes) -> None:
+ assert result.surveyor_info.surveyor_code == "P960-0001"
+
+ def test_name(self, result: ElmhurstSiteNotes) -> None:
+ assert result.surveyor_info.name == "Richard Matthew Ratcliff"
+
+ def test_title(self, result: ElmhurstSiteNotes) -> None:
+ assert result.surveyor_info.title == "Mr."
+
+ def test_tel_number(self, result: ElmhurstSiteNotes) -> None:
+ assert result.surveyor_info.tel_number == "07760 443 469"
+
+ def test_survey_reference(self, result: ElmhurstSiteNotes) -> None:
+ assert result.surveyor_info.survey_reference == "001573"
+
+ def test_my_reference_none(self, result: ElmhurstSiteNotes) -> None:
+ assert result.surveyor_info.my_reference is None
+
+
+class TestPropertyDetails:
+ def test_rdsap_version(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.rdsap_version == "RdSAP10"
+
+ def test_reference_number(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.reference_number == "P960-0001-001573"
+
+ def test_lodgement_required(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.lodgement_required is False
+
+ def test_regs_region(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.regs_region == "England"
+
+ def test_epc_language(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.epc_language == "English"
+
+ def test_uprn_none(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.uprn is None
+
+ def test_postcode(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.postcode == "BB10 1XX"
+
+ def test_region(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.region == "West Pennines"
+
+ def test_house_name_none(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.house_name is None
+
+ def test_house_number(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.house_number == "19"
+
+ def test_street(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.street == "Queens Road"
+
+ def test_locality_none(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.locality is None
+
+ def test_town(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.town == "BURNLEY"
+
+ def test_county_none(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.county is None
+
+ def test_tenure(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.tenure == "Rented (social)"
+
+ def test_transaction_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.transaction_type == "Grant scheme"
+
+ def test_inspection_date(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.inspection_date == date(2026, 3, 6)
+
+ def test_process_date(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.process_date == date(2026, 3, 6)
+
+ def test_epc_exists(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_details.epc_exists is False
+
+
+class TestPropertyDescription:
+ def test_property_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.property_type == "B Bungalow"
+
+ def test_attachment(self, result: ElmhurstSiteNotes) -> None:
+ assert result.attachment == "E End-Terrace"
+
+ def test_number_of_storeys(self, result: ElmhurstSiteNotes) -> None:
+ assert result.number_of_storeys == 1
+
+ def test_habitable_rooms(self, result: ElmhurstSiteNotes) -> None:
+ assert result.habitable_rooms == 2
+
+ def test_heated_habitable_rooms(self, result: ElmhurstSiteNotes) -> None:
+ assert result.heated_habitable_rooms == 2
+
+ def test_construction_age_band(self, result: ElmhurstSiteNotes) -> None:
+ assert result.construction_age_band == "D 1950-1966"
+
+ def test_has_conservatory(self, result: ElmhurstSiteNotes) -> None:
+ assert result.has_conservatory is False
+
+
+class TestDimensions:
+ def test_dimension_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.dimensions.dimension_type == "Internal"
+
+ def test_floor_count(self, result: ElmhurstSiteNotes) -> None:
+ assert len(result.dimensions.floors) == 1
+
+ def test_floor_name(self, result: ElmhurstSiteNotes) -> None:
+ assert result.dimensions.floors[0].name == "Lowest Floor"
+
+ def test_floor_area(self, result: ElmhurstSiteNotes) -> None:
+ assert result.dimensions.floors[0].area_m2 == 44.89
+
+ def test_floor_room_height(self, result: ElmhurstSiteNotes) -> None:
+ assert result.dimensions.floors[0].room_height_m == 2.24
+
+ def test_floor_heat_loss_perimeter(self, result: ElmhurstSiteNotes) -> None:
+ assert result.dimensions.floors[0].heat_loss_perimeter_m == 20.10
+
+ def test_floor_party_wall_length(self, result: ElmhurstSiteNotes) -> None:
+ assert result.dimensions.floors[0].party_wall_length_m == 6.70
+
+
+class TestWalls:
+ def test_wall_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.walls.wall_type == "CA Cavity"
+
+ def test_insulation(self, result: ElmhurstSiteNotes) -> None:
+ assert result.walls.insulation == "F Filled Cavity"
+
+ def test_thickness_unknown(self, result: ElmhurstSiteNotes) -> None:
+ assert result.walls.thickness_unknown is False
+
+ def test_thickness_mm(self, result: ElmhurstSiteNotes) -> None:
+ assert result.walls.thickness_mm == 300
+
+ def test_u_value_known(self, result: ElmhurstSiteNotes) -> None:
+ assert result.walls.u_value_known is False
+
+ def test_party_wall_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.walls.party_wall_type == "U Unable to determine"
+
+
+class TestRoof:
+ def test_roof_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.roof.roof_type == "PA Pitched (slates/tiles), access to loft"
+
+ def test_insulation(self, result: ElmhurstSiteNotes) -> None:
+ assert result.roof.insulation == "J Joists"
+
+ def test_insulation_thickness_mm(self, result: ElmhurstSiteNotes) -> None:
+ assert result.roof.insulation_thickness_mm == 270
+
+ def test_u_value_known(self, result: ElmhurstSiteNotes) -> None:
+ assert result.roof.u_value_known is False
+
+
+class TestFloor:
+ def test_location(self, result: ElmhurstSiteNotes) -> None:
+ assert result.floor.location == "G Ground floor"
+
+ def test_floor_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.floor.floor_type == "N Suspended, not timber"
+
+ def test_insulation(self, result: ElmhurstSiteNotes) -> None:
+ assert result.floor.insulation == "A As built"
+
+ def test_default_u_value(self, result: ElmhurstSiteNotes) -> None:
+ assert result.floor.default_u_value == 0.69
+
+ def test_u_value_known(self, result: ElmhurstSiteNotes) -> None:
+ assert result.floor.u_value_known is False
+
+
+class TestDoors:
+ def test_door_count(self, result: ElmhurstSiteNotes) -> None:
+ assert result.door_count == 0
+
+ def test_insulated_door_count(self, result: ElmhurstSiteNotes) -> None:
+ assert result.insulated_door_count == 0
+
+
+class TestWindows:
+ def test_window_count(self, result: ElmhurstSiteNotes) -> None:
+ assert len(result.windows) == 4
+
+ def test_draught_proofing_percent(self, result: ElmhurstSiteNotes) -> None:
+ assert result.draught_proofing_percent == 100
+
+ def test_first_window_dimensions(self, result: ElmhurstSiteNotes) -> None:
+ w = result.windows[0]
+ assert w.width_m == 1.30
+ assert w.height_m == 1.10
+ assert w.area_m2 == 1.43
+
+ def test_first_window_glazing(self, result: ElmhurstSiteNotes) -> None:
+ w = result.windows[0]
+ assert w.glazing_type == "Double post or during 2022"
+ assert w.frame_factor == 0.70
+
+ def test_first_window_location(self, result: ElmhurstSiteNotes) -> None:
+ w = result.windows[0]
+ assert w.building_part == "Main"
+ assert w.location == "External wall"
+ assert w.orientation == "North"
+
+ def test_first_window_performance(self, result: ElmhurstSiteNotes) -> None:
+ w = result.windows[0]
+ assert w.data_source == "Manufacturer"
+ assert w.u_value == 1.40
+ assert w.g_value == 0.72
+ assert w.draught_proofed is True
+ assert w.permanent_shutters == "None"
+
+ def test_third_window_orientation(self, result: ElmhurstSiteNotes) -> None:
+ assert result.windows[2].orientation == "South"
+
+ def test_fourth_window_dimensions(self, result: ElmhurstSiteNotes) -> None:
+ w = result.windows[3]
+ assert w.width_m == 0.70
+ assert w.height_m == 1.30
+ assert w.area_m2 == 0.91
+
+
+class TestVentilation:
+ def test_open_chimneys(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.open_chimneys_count == 0
+
+ def test_open_flues(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.open_flues_count == 0
+
+ def test_open_chimneys_closed_fire(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.open_chimneys_closed_fire_count == 0
+
+ def test_solid_fuel_boiler_flues(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.solid_fuel_boiler_flues_count == 0
+
+ def test_other_heater_flues(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.other_heater_flues_count == 0
+
+ def test_blocked_chimneys(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.blocked_chimneys_count == 0
+
+ def test_extract_fans(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.extract_fans_count == 2
+
+ def test_passive_vents(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.passive_vents_count == 0
+
+ def test_flueless_gas_fires(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.flueless_gas_fires_count == 0
+
+ def test_fixed_space_cooling(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.fixed_space_cooling is False
+
+ def test_draught_lobby(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.draught_lobby == "Not present"
+
+ def test_mechanical_ventilation(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.mechanical_ventilation is False
+
+ def test_pressure_test_method(self, result: ElmhurstSiteNotes) -> None:
+ assert result.ventilation.pressure_test_method == "Not available"
+
+
+class TestLighting:
+ def test_total_bulbs(self, result: ElmhurstSiteNotes) -> None:
+ assert result.lighting.total_bulbs == 8
+
+ def test_led_cfl_count_known(self, result: ElmhurstSiteNotes) -> None:
+ assert result.lighting.led_cfl_count_known is True
+
+ def test_led_count(self, result: ElmhurstSiteNotes) -> None:
+ assert result.lighting.led_count == 4
+
+ def test_cfl_count(self, result: ElmhurstSiteNotes) -> None:
+ assert result.lighting.cfl_count == 4
+
+ def test_incandescent_count(self, result: ElmhurstSiteNotes) -> None:
+ assert result.lighting.incandescent_count == 0
+
+
+class TestMainHeating:
+ def test_pcdf_boiler_reference(self, result: ElmhurstSiteNotes) -> None:
+ assert (
+ result.main_heating.pcdf_boiler_reference
+ == "17742 Potterton, Promax 33 Combi ErP, 88.30%"
+ )
+
+ def test_heat_emitter(self, result: ElmhurstSiteNotes) -> None:
+ assert result.main_heating.heat_emitter == "Radiators"
+
+ def test_heat_pump_age(self, result: ElmhurstSiteNotes) -> None:
+ assert result.main_heating.heat_pump_age == "Unknown"
+
+ def test_fuel_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.main_heating.fuel_type == "Mains gas"
+
+ def test_flue_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.main_heating.flue_type == "Balanced"
+
+ def test_fan_assisted_flue(self, result: ElmhurstSiteNotes) -> None:
+ assert result.main_heating.fan_assisted_flue is True
+
+ def test_design_flow_temperature(self, result: ElmhurstSiteNotes) -> None:
+ assert result.main_heating.design_flow_temperature == "Unknown"
+
+ def test_heating_controls_ees(self, result: ElmhurstSiteNotes) -> None:
+ assert result.main_heating.heating_controls_ees == "CBE"
+
+ def test_heating_controls_sap(self, result: ElmhurstSiteNotes) -> None:
+ assert (
+ result.main_heating.heating_controls_sap
+ == "SAP code 2106, Programmer, room thermostat and TRVs"
+ )
+
+ def test_percentage_of_heat(self, result: ElmhurstSiteNotes) -> None:
+ assert result.main_heating.percentage_of_heat == 100
+
+
+class TestMeters:
+ def test_electricity_meter_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.meters.electricity_meter_type == "Single"
+
+ def test_main_gas(self, result: ElmhurstSiteNotes) -> None:
+ assert result.meters.main_gas is True
+
+ def test_electricity_smart_meter(self, result: ElmhurstSiteNotes) -> None:
+ assert result.meters.electricity_smart_meter is False
+
+ def test_gas_smart_meter(self, result: ElmhurstSiteNotes) -> None:
+ assert result.meters.gas_smart_meter is False
+
+
+class TestWaterHeating:
+ def test_water_heating_code(self, result: ElmhurstSiteNotes) -> None:
+ assert result.water_heating.water_heating_code == "HWP"
+
+ def test_water_heating_sap_code(self, result: ElmhurstSiteNotes) -> None:
+ assert result.water_heating.water_heating_sap_code == 901
+
+ def test_water_heating_fuel_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.water_heating.water_heating_fuel_type == "Mains gas"
+
+ def test_hot_water_cylinder_present(self, result: ElmhurstSiteNotes) -> None:
+ assert result.water_heating.hot_water_cylinder_present is False
+
+
+class TestBathsAndShowers:
+ def test_number_of_baths(self, result: ElmhurstSiteNotes) -> None:
+ assert result.baths_and_showers.number_of_baths == 0
+
+ def test_number_of_baths_connected(self, result: ElmhurstSiteNotes) -> None:
+ assert result.baths_and_showers.number_of_baths_connected == 0
+
+ def test_shower_count(self, result: ElmhurstSiteNotes) -> None:
+ assert len(result.baths_and_showers.showers) == 1
+
+ def test_shower_number(self, result: ElmhurstSiteNotes) -> None:
+ assert result.baths_and_showers.showers[0].shower_number == 1
+
+ def test_shower_outlet_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.baths_and_showers.showers[0].outlet_type == "Electric shower"
+
+ def test_shower_connected(self, result: ElmhurstSiteNotes) -> None:
+ assert result.baths_and_showers.showers[0].connected == "None"
+
+
+class TestRenewables:
+ def test_solar_water_heating(self, result: ElmhurstSiteNotes) -> None:
+ assert result.renewables.solar_water_heating is False
+
+ def test_wwhrs_present(self, result: ElmhurstSiteNotes) -> None:
+ assert result.renewables.wwhrs_present is False
+
+ def test_flue_gas_heat_recovery_present(self, result: ElmhurstSiteNotes) -> None:
+ assert result.renewables.flue_gas_heat_recovery_present is False
+
+ def test_photovoltaic_panel(self, result: ElmhurstSiteNotes) -> None:
+ assert result.renewables.photovoltaic_panel == "None"
+
+ def test_export_capable_meter(self, result: ElmhurstSiteNotes) -> None:
+ assert result.renewables.export_capable_meter is False
+
+ def test_wind_turbine_present(self, result: ElmhurstSiteNotes) -> None:
+ assert result.renewables.wind_turbine_present is False
+
+ def test_wind_turbines_terrain_type(self, result: ElmhurstSiteNotes) -> None:
+ assert result.renewables.wind_turbines_terrain_type == "Suburban"
+
+ def test_hydro_electricity_generated_kwh(self, result: ElmhurstSiteNotes) -> None:
+ assert result.renewables.hydro_electricity_generated_kwh == 0.0
+
+
+class TestEnergyPerformance:
+ def test_current_sap_rating(self, result: ElmhurstSiteNotes) -> None:
+ assert result.current_sap_rating == 69
+
+ def test_potential_sap_rating(self, result: ElmhurstSiteNotes) -> None:
+ assert result.potential_sap_rating == 77
+
+ def test_current_ei_rating(self, result: ElmhurstSiteNotes) -> None:
+ assert result.current_ei_rating == 76
+
+ def test_potential_ei_rating(self, result: ElmhurstSiteNotes) -> None:
+ assert result.potential_ei_rating == 81
+
+ def test_co2_emissions_current_t(self, result: ElmhurstSiteNotes) -> None:
+ assert result.co2_emissions_current_t == 1.683
+
+
+class TestWindowsWithFrameDetails:
+ def test_window_count(self, result2: ElmhurstSiteNotes) -> None:
+ assert len(result2.windows) == 8
+
+ def test_draught_proofing_percent(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.draught_proofing_percent == 90
+
+ def test_first_window_glazing_type_excludes_frame_type(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.windows[0].glazing_type == "Double with unknown install date"
+
+ def test_first_window_frame_type(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.windows[0].frame_type == "PVC"
+
+ def test_first_window_frame_factor(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.windows[0].frame_factor == 0.70
+
+ def test_first_window_glazing_gap(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.windows[0].glazing_gap == "16 mm or more"
+
+ def test_first_window_location(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.windows[0].building_part == "Main"
+ assert result2.windows[0].location == "External wall"
+ assert result2.windows[0].orientation == "East"
+
+ def test_first_window_performance(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.windows[0].data_source == "Manufacturer"
+ assert result2.windows[0].u_value == 2.70
+ assert result2.windows[0].g_value == 0.76
+ assert result2.windows[0].draught_proofed is True
+ assert result2.windows[0].permanent_shutters == "None"
+
+ def test_fourth_window_orientation(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.windows[3].orientation == "South"
+
+
+class TestLightingLedCflUnknown:
+ def test_total_bulbs(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.lighting.total_bulbs == 10
+
+ def test_led_cfl_count_known_false(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.lighting.led_cfl_count_known is False
+
+ def test_low_energy_count(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.lighting.low_energy_count == 5
+
+ def test_incandescent_count(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.lighting.incandescent_count == 5
+
+ def test_led_count_zero_when_unknown(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.lighting.led_count == 0
+
+ def test_cfl_count_zero_when_unknown(self, result2: ElmhurstSiteNotes) -> None:
+ assert result2.lighting.cfl_count == 0
diff --git a/backend/documents_parser/tests/test_end_to_end.py b/backend/documents_parser/tests/test_end_to_end.py
index 84e611c6..c413b55f 100644
--- a/backend/documents_parser/tests/test_end_to_end.py
+++ b/backend/documents_parser/tests/test_end_to_end.py
@@ -20,9 +20,9 @@ from datatypes.epc.domain.epc_property_data import (
)
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
-PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf")
+PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_1.pdf")
PDF_PATH_2 = os.path.join(
- os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_2.pdf"
+ os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_2.pdf"
)
@@ -71,7 +71,7 @@ class TestPdfToEpcPropertyData:
),
sap_windows=[
SapWindow(
- pvc_frame="Wooden or PVC",
+ frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@@ -84,7 +84,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
- pvc_frame="Wooden or PVC",
+ frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@@ -97,7 +97,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
- pvc_frame="Wooden or PVC",
+ frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
@@ -110,7 +110,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
- pvc_frame="Wooden or PVC",
+ frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North",
window_type="Window",
@@ -123,7 +123,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
- pvc_frame="Wooden or PVC",
+ frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
@@ -136,7 +136,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
- pvc_frame="Wooden or PVC",
+ frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@@ -149,7 +149,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
- pvc_frame="Wooden or PVC",
+ frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
@@ -162,7 +162,7 @@ class TestPdfToEpcPropertyData:
permanent_shutters_present=False,
),
SapWindow(
- pvc_frame="Wooden or PVC",
+ frame_material="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
@@ -302,7 +302,7 @@ class TestPdfToEpcPropertyDataFixture2:
PDF_PATH_3 = os.path.join(
- os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_3.pdf"
+ os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_3.pdf"
)
@@ -339,7 +339,7 @@ class TestPdfToEpcPropertyDataFixture3:
PDF_PATH_4 = os.path.join(
- os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_4.pdf"
+ os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_4.pdf"
)
@@ -369,7 +369,7 @@ class TestPdfToEpcPropertyDataFixture4:
PDF_PATH_5 = os.path.join(
- os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_5.pdf"
+ os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_5.pdf"
)
@@ -401,7 +401,7 @@ class TestPdfToEpcPropertyDataFixture5:
PDF_PATH_6 = os.path.join(
- os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_6.pdf"
+ os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_6.pdf"
)
diff --git a/backend/documents_parser/tests/test_extractor.py b/backend/documents_parser/tests/test_extractor.py
index 66cc4271..be577f1b 100644
--- a/backend/documents_parser/tests/test_extractor.py
+++ b/backend/documents_parser/tests/test_extractor.py
@@ -37,32 +37,37 @@ FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures")
def load_text_fixture() -> list[str]:
- with open(os.path.join(FIXTURES, "site_notes_example_text.json")) as f:
+ with open(os.path.join(FIXTURES, "pashub_site_notes_1_text.json")) as f:
return json.load(f)
def load_text_fixture_2() -> list[str]:
- with open(os.path.join(FIXTURES, "site_notes_example_2_text.json")) as f:
+ with open(os.path.join(FIXTURES, "pashub_site_notes_2_text.json")) as f:
return json.load(f)
def load_text_fixture_3() -> list[str]:
- with open(os.path.join(FIXTURES, "site_notes_example_3_text.json")) as f:
+ with open(os.path.join(FIXTURES, "pashub_site_notes_3_text.json")) as f:
return json.load(f)
def load_text_fixture_4() -> list[str]:
- with open(os.path.join(FIXTURES, "site_notes_example_4_text.json")) as f:
+ with open(os.path.join(FIXTURES, "pashub_site_notes_4_text.json")) as f:
return json.load(f)
def load_text_fixture_5() -> list[str]:
- with open(os.path.join(FIXTURES, "site_notes_example_5_text.json")) as f:
+ with open(os.path.join(FIXTURES, "pashub_site_notes_5_text.json")) as f:
return json.load(f)
def load_text_fixture_6() -> list[str]:
- with open(os.path.join(FIXTURES, "site_notes_example_6_text.json")) as f:
+ with open(os.path.join(FIXTURES, "pashub_site_notes_6_text.json")) as f:
+ return json.load(f)
+
+
+def load_text_fixture_7() -> list[str]:
+ with open(os.path.join(FIXTURES, "pashub_site_notes_7_text.json")) as f:
return json.load(f)
@@ -785,6 +790,38 @@ class TestElectricShowerExtraction:
assert wu.showers[0].outlet_type == "Electric Shower"
+# --- fixture 7: maisonette, 2 extensions, no property photo ---
+
+
+class TestExtractNoPropertyPhoto:
+ def test_address_extracted_when_no_property_photo(self) -> None:
+ result = PasHubRdSapSiteNotesExtractor(load_text_fixture_7()).extract()
+ assert result.inspection_metadata.property_address == "Flat 3, 29 Watcombe Circus, NOTTINGHAM, NG5 2DU"
+ assert result.inspection_metadata.property_photo is False
+ assert result.general.property_type == "Maisonette"
+ assert result.general.number_of_extensions == 2
+
+
+class TestWallThicknessExtraction:
+ def _extractor(self) -> PasHubRdSapSiteNotesExtractor:
+ return PasHubRdSapSiteNotesExtractor([])
+
+ def test_numeric_value_returns_int(self) -> None:
+ assert self._extractor()._wall_thickness_in(["Wall thickness:", "310 mm"]) == 310
+
+ def test_unmeasurable_returns_none(self) -> None:
+ assert self._extractor()._wall_thickness_in(["Wall thickness:", "Unmeasurable"]) is None
+
+ def test_unmeasurable_lowercase_returns_none(self) -> None:
+ assert self._extractor()._wall_thickness_in(["Wall thickness:", "unmeasurable"]) is None
+
+ def test_unmeasurable_uppercase_returns_none(self) -> None:
+ assert self._extractor()._wall_thickness_in(["Wall thickness:", "UNMEASURABLE"]) is None
+
+ def test_missing_field_returns_none(self) -> None:
+ assert self._extractor()._wall_thickness_in([]) is None
+
+
class TestSolidMasonryPartyWall:
@pytest.fixture
def bc(self) -> BuildingConstruction:
diff --git a/backend/documents_parser/tests/test_pdf.py b/backend/documents_parser/tests/test_pdf.py
index 3a6dd2fb..d7492659 100644
--- a/backend/documents_parser/tests/test_pdf.py
+++ b/backend/documents_parser/tests/test_pdf.py
@@ -5,8 +5,8 @@ import pytest
from backend.documents_parser.pdf import pdf_to_text_list
-PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf")
-FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "site_notes_example_text.json")
+PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "PasHubSiteNotes_1.pdf")
+FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "pashub_site_notes_1_text.json")
@pytest.fixture
diff --git a/backend/ecmk_fetcher/ecmk_service.py b/backend/ecmk_fetcher/ecmk_service.py
new file mode 100644
index 00000000..35b8f552
--- /dev/null
+++ b/backend/ecmk_fetcher/ecmk_service.py
@@ -0,0 +1,257 @@
+import os
+from typing import Dict
+
+from playwright.sync_api import Browser, BrowserContext, Locator, Page, sync_playwright
+
+from backend.app.db.connection import db_session
+from backend.app.db.functions.uploaded_files_functions import (
+ get_uploaded_file_by_listing_type_and_source,
+)
+from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
+from backend.documents_parser.db_writer import save_epc_property_data
+from backend.documents_parser.parser import parse_site_notes_pdf
+from backend.ecmk_fetcher.address_list import (
+ PropertyRow,
+ extract_addresses_from_spreadsheet,
+)
+from backend.ecmk_fetcher.browser import (
+ attach_debug_listeners,
+ download_with_retry,
+ go_to_assessment_details,
+ go_to_assessments,
+ go_to_next_page,
+ login,
+)
+from backend.ecmk_fetcher.excel_writer import write_row
+from backend.ecmk_fetcher.reports import (
+ REPORT_TYPES,
+ FileDownloadButtonType,
+ build_property_id,
+ map_report_type_to_db_file_type,
+)
+from backend.ecmk_fetcher.upload import (
+ upload_excel_to_sharepoint,
+ upload_file_to_s3_and_record,
+ upload_file_to_sharepoint,
+)
+from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
+from utils.logger import setup_logger
+from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
+
+logger = setup_logger()
+
+DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
+
+
+class EcmkService:
+ def __init__(
+ self,
+ sharepoint_client: DomnaSharepointClient,
+ s3_bucket: str,
+ property_list_filepath: str,
+ sharepoint_base_path: str,
+ sharepoint_excel_path: str,
+ local_dimensions_path: str,
+ ) -> None:
+ self._sharepoint_client = sharepoint_client
+ self._s3_bucket = s3_bucket
+ self._sharepoint_base_path = sharepoint_base_path
+ self._sharepoint_excel_path = sharepoint_excel_path
+ self._local_dimensions_path = local_dimensions_path
+ self._property_map: Dict[str, PropertyRow] = extract_addresses_from_spreadsheet(
+ property_list_filepath
+ )
+
+ def run(self) -> None:
+ self._sharepoint_client.download_file(
+ sharepoint_path=f"{self._sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
+ local_path=self._local_dimensions_path,
+ )
+
+ with sync_playwright() as p:
+ browser: Browser = p.chromium.launch(headless=True)
+ context: BrowserContext = browser.new_context()
+ page: Page = context.new_page()
+ try:
+ self._run_browser_session(page)
+ finally:
+ context.close()
+ browser.close()
+
+ def _run_browser_session(self, page: Page) -> None:
+ username: str = "" # TODO: get from github secrets
+ password: str = ""
+
+ attach_debug_listeners(page)
+
+ login(page, username, password)
+ go_to_assessments(page)
+
+ while True:
+ rows: Locator = page.locator("#assessmentDatatable tbody tr")
+ row_count: int = rows.count()
+
+ for i in range(row_count):
+ row: Locator = rows.nth(i)
+
+ try:
+ cells: Locator = row.locator("td")
+
+ first_name: str = cells.nth(1).inner_text().strip()
+ last_name: str = cells.nth(2).inner_text().strip()
+ address: str = cells.nth(5).inner_text().strip()
+ postcode: str = cells.nth(7).inner_text().strip()
+ status: str = cells.nth(9).inner_text().strip()
+
+ if first_name == "Oliver" and last_name == "Stephens":
+ continue
+
+ if status != "Submitted (not Lodged)":
+ continue
+
+ property_id: str = build_property_id(address, postcode)
+
+ property_row: PropertyRow | None = self._property_map.get(
+ property_id
+ )
+
+ if not property_row:
+ continue
+
+ logger.info(f"Match found for property {address}")
+
+ sharepoint_address: str = property_row.address
+
+ go_to_assessment_details(page, row)
+
+ for report_type in REPORT_TYPES:
+ hubspot_listing_id: str = property_row.listing_id
+ try:
+ db_file_type: FileTypeEnum = (
+ map_report_type_to_db_file_type(report_type)
+ )
+ except ValueError:
+ logger.error(
+ f"Unknown report type {report_type}, skipping file"
+ )
+ continue
+
+ if get_uploaded_file_by_listing_type_and_source(
+ hubspot_listing_id=int(hubspot_listing_id),
+ file_type=db_file_type,
+ file_source=FileSourceEnum.ECMK,
+ ):
+ logger.debug("File already uploaded to s3, skipping")
+ continue
+
+ file_path: str | None = download_with_retry(page, report_type)
+
+ if not file_path:
+ continue
+
+ logger.info(
+ f"Successfully downloaded file {os.path.basename(file_path)} from ECMK"
+ )
+
+ try:
+ self._process_file(
+ file_path=file_path,
+ report_type=report_type,
+ db_file_type=db_file_type,
+ sharepoint_address=sharepoint_address,
+ hubspot_listing_id=hubspot_listing_id,
+ )
+ except Exception:
+ raise
+ finally:
+ if os.path.exists(file_path):
+ os.remove(file_path)
+
+ page.go_back()
+ page.wait_for_selector(
+ "#assessmentDatatable tbody tr", timeout=15000
+ )
+
+ except Exception as e:
+ raise Exception(f"Row processing failed: {str(e)}") from e
+
+ if not go_to_next_page(page):
+ break
+
+ def _process_file(
+ self,
+ file_path: str,
+ report_type: int,
+ db_file_type: FileTypeEnum,
+ sharepoint_address: str,
+ hubspot_listing_id: str,
+ ) -> None:
+ if report_type == FileDownloadButtonType.RAW_XML.value:
+ self._process_xml_file(
+ file_path=file_path,
+ db_file_type=db_file_type,
+ hubspot_listing_id=hubspot_listing_id,
+ )
+ else:
+ self._process_pdf_file(
+ file_path=file_path,
+ file_type=db_file_type,
+ sharepoint_address=sharepoint_address,
+ hubspot_listing_id=hubspot_listing_id,
+ )
+
+ def _process_xml_file(
+ self,
+ file_path: str,
+ db_file_type: FileTypeEnum,
+ hubspot_listing_id: str,
+ ) -> None:
+ with open(file_path, "r", encoding="utf-8") as f:
+ xml_string: str = f.read()
+ details = parse_rdsap(xml_string)
+ row_data = flatten_sap_property(details)
+ write_row(self._local_dimensions_path, row_data)
+ upload_excel_to_sharepoint(
+ client=self._sharepoint_client,
+ file_path=self._local_dimensions_path,
+ sharepoint_path=self._sharepoint_excel_path,
+ )
+ upload_file_to_s3_and_record(
+ bucket=self._s3_bucket,
+ file_path=file_path,
+ hubspot_listing_id=hubspot_listing_id,
+ file_type=db_file_type,
+ )
+
+ def _process_pdf_file(
+ self,
+ file_path: str,
+ file_type: FileTypeEnum,
+ sharepoint_address: str,
+ hubspot_listing_id: str,
+ ) -> None:
+ upload_file_to_sharepoint(
+ client=self._sharepoint_client,
+ file_path=file_path,
+ base_path=self._sharepoint_base_path,
+ subpath=sharepoint_address,
+ )
+ uploaded_file_id: int = upload_file_to_s3_and_record(
+ bucket=self._s3_bucket,
+ file_path=file_path,
+ hubspot_listing_id=hubspot_listing_id,
+ file_type=file_type,
+ )
+ if file_type == FileTypeEnum.ECMK_RD_SAP_SITE_NOTE:
+ try:
+ epc_data = parse_site_notes_pdf(file_path)
+ with db_session() as session:
+ save_epc_property_data(
+ session=session,
+ data=epc_data,
+ uploaded_file_id=uploaded_file_id,
+ )
+ except Exception:
+ logger.warning(
+ f"EPC extraction failed for {os.path.basename(file_path)} — file record retained"
+ )
diff --git a/backend/ecmk_fetcher/handler/handler.py b/backend/ecmk_fetcher/handler/handler.py
index b777cc9f..b49518bf 100644
--- a/backend/ecmk_fetcher/handler/handler.py
+++ b/backend/ecmk_fetcher/handler/handler.py
@@ -1,14 +1,32 @@
+import os
from typing import Any, Mapping
-from backend.ecmk_fetcher.processor import run_job
+from backend.ecmk_fetcher.ecmk_service import EcmkService
from utils.logger import setup_logger
+from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
+from utils.sharepoint.domna_sites import DomnaSites
logger = setup_logger()
+_PROPERTY_LIST_FILE: str = (
+ "hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
+)
+_BASE_DIR: str = os.path.dirname(os.path.dirname(__file__))
+
def handler(event: Mapping[str, Any], context: Any) -> None:
logger.info("Entered handler")
- run_job()
+ service = EcmkService(
+ sharepoint_client=DomnaSharepointClient(
+ sharepoint_location=DomnaSites.PRIVATE_PAY
+ ),
+ s3_bucket="retrofit-energy-assessments-dev",
+ property_list_filepath=os.path.join(_BASE_DIR, _PROPERTY_LIST_FILE),
+ sharepoint_base_path="/Projects/Southern Housing/SH-SURV-26-001/Assessments",
+ sharepoint_excel_path="/Projects/Southern Housing/SH-SURV-26-001/Modelling",
+ local_dimensions_path=os.path.join(_BASE_DIR, "Dimensions.xlsx"),
+ )
+ service.run()
if __name__ == "__main__":
diff --git a/backend/ecmk_fetcher/processor.py b/backend/ecmk_fetcher/processor.py
deleted file mode 100644
index 4f8c24ea..00000000
--- a/backend/ecmk_fetcher/processor.py
+++ /dev/null
@@ -1,209 +0,0 @@
-import os
-from typing import Dict
-from playwright.sync_api import (
- sync_playwright,
- Locator,
- Page,
- Browser,
- BrowserContext,
-)
-
-from backend.app.db.functions.uploaded_files_functions import (
- get_uploaded_file_by_listing_type_and_source,
-)
-from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
-from backend.ecmk_fetcher.address_list import (
- PropertyRow,
- extract_addresses_from_spreadsheet,
-)
-from backend.ecmk_fetcher.browser import (
- attach_debug_listeners,
- download_with_retry,
- go_to_assessment_details,
- go_to_assessments,
- go_to_next_page,
- login,
-)
-from backend.ecmk_fetcher.reports import (
- REPORT_TYPES,
- FileDownloadButtonType,
- build_property_id,
- map_report_type_to_db_file_type,
-)
-from backend.ecmk_fetcher.excel_writer import write_row
-from backend.ecmk_fetcher.upload import (
- upload_excel_to_sharepoint,
- upload_file_to_s3_and_update_db,
- upload_file_to_sharepoint,
-)
-from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
-from utils.logger import setup_logger
-from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
-from utils.sharepoint.domna_sites import DomnaSites
-
-logger = setup_logger()
-
-
-def run_job() -> None:
-
- username: str = "" # TODO: get from github secrets
- password: str = ""
-
- property_list_file: str = (
- "hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
- )
-
- BASE_DIR: str = os.path.dirname(__file__)
- filepath: str = os.path.join(BASE_DIR, property_list_file)
-
- property_map: Dict[str, PropertyRow] = extract_addresses_from_spreadsheet(filepath)
-
- sharepoint_client: DomnaSharepointClient = DomnaSharepointClient(
- sharepoint_location=DomnaSites.PRIVATE_PAY
- )
-
- sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
- sharepoint_excel_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
-
- DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
- local_dimensions_path: str = os.path.join(BASE_DIR, DIMENSIONS_FILENAME)
-
- sharepoint_client.download_file(
- sharepoint_path=f"{sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
- local_path=local_dimensions_path,
- )
-
- s3_bucket: str = "retrofit-energy-assessments-dev"
-
- with sync_playwright() as p:
- browser: Browser = p.chromium.launch(headless=True)
- context: BrowserContext = browser.new_context()
- page: Page = context.new_page()
-
- attach_debug_listeners(page)
-
- try:
- login(page, username, password)
- go_to_assessments(page)
-
- while True:
- rows: Locator = page.locator("#assessmentDatatable tbody tr")
- row_count: int = rows.count()
-
- for i in range(row_count):
- row: Locator = rows.nth(i)
-
- try:
- cells: Locator = row.locator("td")
-
- first_name: str = cells.nth(1).inner_text().strip()
- last_name: str = cells.nth(2).inner_text().strip()
- address: str = cells.nth(5).inner_text().strip()
- postcode: str = cells.nth(7).inner_text().strip()
- status: str = cells.nth(9).inner_text().strip()
-
- if first_name == "Oliver" and last_name == "Stephens":
- continue
-
- if status != "Submitted (not Lodged)":
- continue
-
- property_id: str = build_property_id(address, postcode)
-
- property_row: PropertyRow | None = property_map.get(property_id)
-
- if not property_row:
- continue
-
- logger.info(f"Match found for property {address}")
-
- sharepoint_address: str = property_row.address
-
- go_to_assessment_details(page, row)
-
- for report_type in REPORT_TYPES:
- hubspot_listing_id: str = property_row.listing_id
- try:
- db_file_type: FileTypeEnum = (
- map_report_type_to_db_file_type(report_type)
- )
-
- except ValueError:
- logger.error(
- f"Unknown report type {report_type}, skipping file"
- )
- continue
-
- if get_uploaded_file_by_listing_type_and_source(
- hubspot_listing_id=int(hubspot_listing_id),
- file_type=db_file_type,
- file_source=FileSourceEnum.ECMK,
- ):
- logger.debug("File already uploaded to s3, skipping")
- continue
-
- file_path: str | None = download_with_retry(
- page, report_type
- )
-
- if not file_path:
- continue
-
- logger.info(
- f"Successfully downloaded file {os.path.basename(file_path)} from ECMK"
- )
-
- try:
- if report_type == FileDownloadButtonType.RAW_XML.value:
- with open(file_path, "r", encoding="utf-8") as f:
- xml_string = f.read()
- details = parse_rdsap(xml_string)
- row_data = flatten_sap_property(details)
- write_row(local_dimensions_path, row_data)
- upload_excel_to_sharepoint(
- client=sharepoint_client,
- file_path=local_dimensions_path,
- sharepoint_path=sharepoint_excel_path,
- )
- logger.info(
- f"Written dimensions row and uploaded Dimensions.xlsx for {address}"
- )
- else:
- upload_file_to_sharepoint(
- client=sharepoint_client,
- file_path=file_path,
- base_path=sharepoint_base_path,
- subpath=sharepoint_address,
- )
- logger.info(
- f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
- )
-
- # Upload to s3 and update db
- upload_file_to_s3_and_update_db(
- bucket=s3_bucket,
- file_path=file_path,
- hubspot_listing_id=hubspot_listing_id,
- file_type=db_file_type,
- )
-
- except Exception:
- raise
- finally:
- if os.path.exists(file_path):
- os.remove(file_path)
-
- page.go_back()
- page.wait_for_selector(
- "#assessmentDatatable tbody tr", timeout=15000
- )
-
- except Exception as e:
- raise Exception(f"Row processing failed: {str(e)}") from e
-
- if not go_to_next_page(page):
- break
-
- finally:
- context.close()
- browser.close()
diff --git a/backend/ecmk_fetcher/tests/test_ecmk_service.py b/backend/ecmk_fetcher/tests/test_ecmk_service.py
new file mode 100644
index 00000000..703bc4c5
--- /dev/null
+++ b/backend/ecmk_fetcher/tests/test_ecmk_service.py
@@ -0,0 +1,594 @@
+from typing import Dict
+from unittest.mock import MagicMock, call, patch
+
+from backend.app.db.models.uploaded_file import FileTypeEnum
+from backend.ecmk_fetcher.address_list import PropertyRow
+from backend.ecmk_fetcher.ecmk_service import EcmkService
+from backend.ecmk_fetcher.reports import FileDownloadButtonType
+from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
+
+
+FAKE_PROPERTY_MAP: Dict[str, PropertyRow] = {
+ "10 FAKE ST SW1A 1AA": PropertyRow(
+ row_index=2, address="10 Fake St SW1A 1AA", listing_id="hs-001"
+ )
+}
+
+
+def make_service(
+ sharepoint_client: DomnaSharepointClient | None = None,
+ s3_bucket: str = "test-bucket",
+ property_list_filepath: str = "/fake/properties.xlsx",
+ sharepoint_base_path: str = "/base",
+ sharepoint_excel_path: str = "/excel",
+ local_dimensions_path: str = "/fake/Dimensions.xlsx",
+) -> EcmkService:
+ return EcmkService(
+ sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient),
+ s3_bucket=s3_bucket,
+ property_list_filepath=property_list_filepath,
+ sharepoint_base_path=sharepoint_base_path,
+ sharepoint_excel_path=sharepoint_excel_path,
+ local_dimensions_path=local_dimensions_path,
+ )
+
+
+# ---------------------------------------------------------------------------
+# __init__: loads property map from spreadsheet filepath
+# ---------------------------------------------------------------------------
+
+
+def test_init_loads_property_map_from_filepath() -> None:
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ) as mock_extract:
+ _ = make_service(property_list_filepath="/some/props.xlsx")
+
+ mock_extract.assert_called_once_with("/some/props.xlsx")
+
+
+# ---------------------------------------------------------------------------
+# run(): downloads Dimensions.xlsx before Playwright browser launches
+# ---------------------------------------------------------------------------
+
+
+def _make_playwright_mocks() -> tuple[MagicMock, MagicMock, MagicMock, MagicMock]:
+ mock_page = MagicMock()
+ mock_context = MagicMock()
+ mock_context.new_page.return_value = mock_page
+ mock_browser = MagicMock()
+ mock_browser.new_context.return_value = mock_context
+ mock_playwright = MagicMock()
+ mock_playwright.chromium.launch.return_value = mock_browser
+ return mock_page, mock_context, mock_browser, mock_playwright
+
+
+def test_run_downloads_dimensions_before_browser_launch() -> None:
+ call_order: list[str] = []
+
+ mock_client = MagicMock(spec=DomnaSharepointClient)
+
+ def _on_download(**_: object) -> None:
+ call_order.append("download")
+
+ mock_client.download_file.side_effect = _on_download
+
+ _, _, mock_browser, mock_playwright = _make_playwright_mocks()
+
+ def _on_launch(**_: object) -> MagicMock:
+ call_order.append("browser")
+ return mock_browser
+
+ mock_playwright.chromium.launch.side_effect = _on_launch
+
+ with (
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ),
+ patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
+ ):
+ mock_sync_pw.return_value.__enter__.return_value = mock_playwright
+ service = make_service(
+ sharepoint_client=mock_client,
+ sharepoint_excel_path="/excel",
+ local_dimensions_path="/fake/Dimensions.xlsx",
+ )
+ with patch.object(service, "_run_browser_session"):
+ service.run()
+
+ assert call_order == ["download", "browser"]
+
+
+def test_run_downloads_dimensions_with_correct_paths() -> None:
+ mock_client = MagicMock(spec=DomnaSharepointClient)
+ _, _, _, mock_playwright = _make_playwright_mocks()
+
+ with (
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ),
+ patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
+ ):
+ mock_sync_pw.return_value.__enter__.return_value = mock_playwright
+ service = make_service(
+ sharepoint_client=mock_client,
+ sharepoint_excel_path="/excel",
+ local_dimensions_path="/fake/Dimensions.xlsx",
+ )
+ with patch.object(service, "_run_browser_session"):
+ service.run()
+
+ mock_client.download_file.assert_called_once_with(
+ sharepoint_path="/excel/Dimensions.xlsx",
+ local_path="/fake/Dimensions.xlsx",
+ )
+
+
+# ---------------------------------------------------------------------------
+# run(): passes the Playwright Page into _run_browser_session
+# ---------------------------------------------------------------------------
+
+
+def test_run_passes_page_to_run_browser_session() -> None:
+ mock_page, _, _, mock_playwright = _make_playwright_mocks()
+
+ with (
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ),
+ patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
+ ):
+ mock_sync_pw.return_value.__enter__.return_value = mock_playwright
+ service = make_service()
+ with patch.object(service, "_run_browser_session") as mock_session:
+ service.run()
+
+ mock_session.assert_called_once_with(mock_page)
+
+
+# ---------------------------------------------------------------------------
+# _process_file: dispatches based on report_type
+# ---------------------------------------------------------------------------
+
+
+def test_process_file_dispatches_to_xml_for_raw_xml() -> None:
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ):
+ service = make_service()
+
+ with (
+ patch.object(service, "_process_xml_file") as mock_xml,
+ patch.object(service, "_process_pdf_file") as mock_pdf,
+ ):
+ service._process_file(
+ file_path="/tmp/file.xml",
+ report_type=FileDownloadButtonType.RAW_XML.value,
+ db_file_type=FileTypeEnum.ECMK_SURVEY_XML,
+ sharepoint_address="10 Fake St",
+ hubspot_listing_id="hs-001",
+ )
+
+ mock_xml.assert_called_once()
+ mock_pdf.assert_not_called()
+
+
+def test_process_file_dispatches_to_pdf_for_non_xml() -> None:
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ):
+ service = make_service()
+
+ with (
+ patch.object(service, "_process_xml_file") as mock_xml,
+ patch.object(service, "_process_pdf_file") as mock_pdf,
+ ):
+ service._process_file(
+ file_path="/tmp/file.pdf",
+ report_type=FileDownloadButtonType.SITENOTE_REPORT.value,
+ db_file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
+ sharepoint_address="10 Fake St",
+ hubspot_listing_id="hs-001",
+ )
+
+ mock_pdf.assert_called_once()
+ mock_xml.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# _process_xml_file: parse → flatten → write row → upload excel → S3
+# ---------------------------------------------------------------------------
+
+
+def test_process_xml_file_full_chain() -> None:
+ fake_details = MagicMock()
+ fake_row_data = MagicMock()
+
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ):
+ service = make_service(
+ s3_bucket="my-bucket",
+ sharepoint_excel_path="/excel",
+ local_dimensions_path="/dims/Dimensions.xlsx",
+ )
+
+ with (
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.parse_rdsap", return_value=fake_details
+ ) as mock_parse,
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.flatten_sap_property",
+ return_value=fake_row_data,
+ ) as mock_flatten,
+ patch("backend.ecmk_fetcher.ecmk_service.write_row") as mock_write,
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.upload_excel_to_sharepoint"
+ ) as mock_upload_excel,
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record"
+ ) as mock_s3,
+ patch(
+ "builtins.open",
+ MagicMock(return_value=MagicMock(
+ __enter__=lambda s: MagicMock(read=lambda: ""),
+ __exit__=MagicMock(return_value=False),
+ )),
+ ),
+ ):
+ service._process_xml_file(
+ file_path="/tmp/report.xml",
+ db_file_type=FileTypeEnum.ECMK_SURVEY_XML,
+ hubspot_listing_id="hs-001",
+ )
+
+ mock_parse.assert_called_once()
+ mock_flatten.assert_called_once_with(fake_details)
+ mock_write.assert_called_once_with("/dims/Dimensions.xlsx", fake_row_data)
+ mock_upload_excel.assert_called_once_with(
+ client=service._sharepoint_client,
+ file_path="/dims/Dimensions.xlsx",
+ sharepoint_path="/excel",
+ )
+ mock_s3.assert_called_once_with(
+ bucket="my-bucket",
+ file_path="/tmp/report.xml",
+ hubspot_listing_id="hs-001",
+ file_type=FileTypeEnum.ECMK_SURVEY_XML,
+ )
+
+
+# ---------------------------------------------------------------------------
+# _process_pdf_file: sharepoint upload → S3 upload
+# ---------------------------------------------------------------------------
+
+
+def test_process_pdf_file_uploads_to_sharepoint_then_s3() -> None:
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ):
+ service = make_service(
+ s3_bucket="my-bucket",
+ sharepoint_base_path="/base",
+ )
+
+ with (
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"
+ ) as mock_sp,
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
+ return_value=42,
+ ) as mock_s3,
+ patch("backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"),
+ patch("backend.ecmk_fetcher.ecmk_service.db_session"),
+ ):
+ service._process_pdf_file(
+ file_path="/tmp/report.pdf",
+ file_type=FileTypeEnum.ECMK_SITE_NOTE,
+ sharepoint_address="10 Fake St",
+ hubspot_listing_id="hs-001",
+ )
+
+ mock_sp.assert_called_once_with(
+ client=service._sharepoint_client,
+ file_path="/tmp/report.pdf",
+ base_path="/base",
+ subpath="10 Fake St",
+ )
+ mock_s3.assert_called_once_with(
+ bucket="my-bucket",
+ file_path="/tmp/report.pdf",
+ hubspot_listing_id="hs-001",
+ file_type=FileTypeEnum.ECMK_SITE_NOTE,
+ )
+
+
+# ---------------------------------------------------------------------------
+# _process_pdf_file: EPC extraction conditional on file_type
+# ---------------------------------------------------------------------------
+
+
+def test_process_pdf_file_runs_epc_extraction_for_rd_sap_site_note() -> None:
+ fake_epc_data = MagicMock()
+ fake_session = MagicMock()
+
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ):
+ service = make_service()
+
+ with (
+ patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
+ return_value=99,
+ ),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
+ return_value=fake_epc_data,
+ ) as mock_parse,
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.save_epc_property_data"
+ ) as mock_save,
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.db_session"
+ ) as mock_db_session,
+ ):
+ mock_db_session.return_value.__enter__.return_value = fake_session
+ service._process_pdf_file(
+ file_path="/tmp/sitenote.pdf",
+ file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
+ sharepoint_address="10 Fake St",
+ hubspot_listing_id="hs-001",
+ )
+
+ mock_parse.assert_called_once_with("/tmp/sitenote.pdf")
+ mock_save.assert_called_once_with(
+ session=fake_session,
+ data=fake_epc_data,
+ uploaded_file_id=99,
+ )
+
+
+def test_process_pdf_file_skips_epc_extraction_for_ecmk_site_note() -> None:
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ):
+ service = make_service()
+
+ with (
+ patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
+ return_value=42,
+ ),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"
+ ) as mock_parse,
+ patch("backend.ecmk_fetcher.ecmk_service.db_session") as mock_db_session,
+ ):
+ service._process_pdf_file(
+ file_path="/tmp/sitenote.pdf",
+ file_type=FileTypeEnum.ECMK_SITE_NOTE,
+ sharepoint_address="10 Fake St",
+ hubspot_listing_id="hs-001",
+ )
+
+ mock_parse.assert_not_called()
+ mock_db_session.assert_not_called()
+
+
+def test_process_pdf_file_epc_uses_separate_db_session_from_s3_upload() -> None:
+ """EPC db_session opens only after upload_file_to_s3_and_record returns."""
+ call_order: list[str] = []
+
+ def _on_s3(**_: object) -> int:
+ call_order.append("s3")
+ return 77
+
+ def _on_db_session() -> MagicMock:
+ call_order.append("db_session")
+ ctx = MagicMock()
+ ctx.__enter__ = MagicMock(return_value=MagicMock())
+ ctx.__exit__ = MagicMock(return_value=False)
+ return ctx
+
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ):
+ service = make_service()
+
+ with (
+ patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
+ side_effect=_on_s3,
+ ),
+ patch("backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"),
+ patch("backend.ecmk_fetcher.ecmk_service.save_epc_property_data"),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.db_session",
+ side_effect=_on_db_session,
+ ),
+ ):
+ service._process_pdf_file(
+ file_path="/tmp/sitenote.pdf",
+ file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
+ sharepoint_address="10 Fake St",
+ hubspot_listing_id="hs-001",
+ )
+
+ assert call_order == ["s3", "db_session"]
+
+
+# ---------------------------------------------------------------------------
+# _process_pdf_file: EPC failures swallowed with warning
+# ---------------------------------------------------------------------------
+
+
+def _pdf_file_patches_for_failure() -> tuple: # type: ignore[type-arg]
+ return (
+ patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
+ return_value=1,
+ ),
+ )
+
+
+def test_process_pdf_file_parse_failure_logged_as_warning_not_raised() -> None:
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ):
+ service = make_service()
+
+ sp_patch, s3_patch = _pdf_file_patches_for_failure()
+ with (
+ sp_patch,
+ s3_patch,
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
+ side_effect=ValueError("bad pdf"),
+ ),
+ patch("backend.ecmk_fetcher.ecmk_service.save_epc_property_data") as mock_save,
+ patch("backend.ecmk_fetcher.ecmk_service.db_session"),
+ patch("backend.ecmk_fetcher.ecmk_service.logger") as mock_logger,
+ ):
+ service._process_pdf_file(
+ file_path="/tmp/sitenote.pdf",
+ file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
+ sharepoint_address="10 Fake St",
+ hubspot_listing_id="hs-001",
+ )
+
+ mock_logger.warning.assert_called_once()
+ mock_save.assert_not_called()
+
+
+def test_process_pdf_file_save_failure_logged_as_warning_not_raised() -> None:
+ fake_session = MagicMock()
+
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=FAKE_PROPERTY_MAP,
+ ):
+ service = make_service()
+
+ sp_patch, s3_patch = _pdf_file_patches_for_failure()
+ with (
+ sp_patch,
+ s3_patch,
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
+ return_value=MagicMock(),
+ ),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.save_epc_property_data",
+ side_effect=RuntimeError("db exploded"),
+ ),
+ patch("backend.ecmk_fetcher.ecmk_service.db_session") as mock_db_session,
+ patch("backend.ecmk_fetcher.ecmk_service.logger") as mock_logger,
+ ):
+ mock_db_session.return_value.__enter__.return_value = fake_session
+ service._process_pdf_file(
+ file_path="/tmp/sitenote.pdf",
+ file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
+ sharepoint_address="10 Fake St",
+ hubspot_listing_id="hs-001",
+ )
+
+ mock_logger.warning.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# _run_browser_session: delegates file processing to _process_file
+# ---------------------------------------------------------------------------
+
+
+def _make_page_mock_with_one_matching_row() -> MagicMock:
+ cells_nth: dict[int, MagicMock] = {n: MagicMock() for n in (1, 2, 5, 7, 9)}
+ cells_nth[1].inner_text.return_value = "John"
+ cells_nth[2].inner_text.return_value = "Doe"
+ cells_nth[5].inner_text.return_value = "10 FAKE ST"
+ cells_nth[7].inner_text.return_value = "SW1A 1AA"
+ cells_nth[9].inner_text.return_value = "Submitted (not Lodged)"
+
+ cells_mock = MagicMock()
+ cells_mock.nth.side_effect = lambda n: cells_nth[n]
+
+ row_mock = MagicMock()
+ row_mock.locator.return_value = cells_mock
+
+ rows_mock = MagicMock()
+ rows_mock.count.return_value = 1
+ rows_mock.nth.return_value = row_mock
+
+ page = MagicMock()
+ page.locator.return_value = rows_mock
+ return page
+
+
+# address "10 FAKE ST" + postcode "SW1A 1AA" → build_property_id → "10SW1A1AA"
+_BROWSER_SESSION_PROPERTY_MAP: Dict[str, PropertyRow] = {
+ "10SW1A1AA": PropertyRow(
+ row_index=2, address="10 Fake St SW1A 1AA", listing_id="12345"
+ )
+}
+
+
+def test_run_browser_session_calls_process_file_for_downloaded_file() -> None:
+ mock_page = _make_page_mock_with_one_matching_row()
+
+ with patch(
+ "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
+ return_value=_BROWSER_SESSION_PROPERTY_MAP,
+ ):
+ service = make_service()
+
+ with (
+ patch("backend.ecmk_fetcher.ecmk_service.attach_debug_listeners"),
+ patch("backend.ecmk_fetcher.ecmk_service.login"),
+ patch("backend.ecmk_fetcher.ecmk_service.go_to_assessments"),
+ patch("backend.ecmk_fetcher.ecmk_service.go_to_assessment_details"),
+ patch("backend.ecmk_fetcher.ecmk_service.go_to_next_page", return_value=False),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.get_uploaded_file_by_listing_type_and_source",
+ return_value=None,
+ ),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.download_with_retry",
+ return_value="/tmp/fake.pdf",
+ ),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.map_report_type_to_db_file_type",
+ return_value=FileTypeEnum.ECMK_SITE_NOTE,
+ ),
+ patch(
+ "backend.ecmk_fetcher.ecmk_service.REPORT_TYPES",
+ [FileDownloadButtonType.SITENOTE_REPORT.value],
+ ),
+ patch.object(service, "_process_file") as mock_process_file,
+ patch("os.path.exists", return_value=False),
+ ):
+ service._run_browser_session(mock_page)
+
+ mock_process_file.assert_called_once_with(
+ file_path="/tmp/fake.pdf",
+ report_type=FileDownloadButtonType.SITENOTE_REPORT.value,
+ db_file_type=FileTypeEnum.ECMK_SITE_NOTE,
+ sharepoint_address="10 Fake St SW1A 1AA",
+ hubspot_listing_id="12345",
+ )
diff --git a/backend/ecmk_fetcher/tests/test_handler.py b/backend/ecmk_fetcher/tests/test_handler.py
new file mode 100644
index 00000000..9de97253
--- /dev/null
+++ b/backend/ecmk_fetcher/tests/test_handler.py
@@ -0,0 +1,59 @@
+from unittest.mock import MagicMock, patch
+
+from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
+
+
+def test_handler_constructs_ecmk_service_and_calls_run() -> None:
+ mock_service = MagicMock()
+ mock_service_cls = MagicMock(return_value=mock_service)
+
+ with (
+ patch(
+ "backend.ecmk_fetcher.handler.handler.EcmkService",
+ mock_service_cls,
+ ),
+ patch(
+ "backend.ecmk_fetcher.handler.handler.DomnaSharepointClient",
+ return_value=MagicMock(spec=DomnaSharepointClient),
+ ),
+ ):
+ from backend.ecmk_fetcher.handler.handler import handler
+
+ handler({}, None)
+
+ mock_service_cls.assert_called_once()
+ mock_service.run.assert_called_once()
+
+
+def test_handler_passes_correct_config_to_ecmk_service() -> None:
+ mock_service = MagicMock()
+ mock_service_cls = MagicMock(return_value=mock_service)
+
+ with (
+ patch(
+ "backend.ecmk_fetcher.handler.handler.EcmkService",
+ mock_service_cls,
+ ),
+ patch(
+ "backend.ecmk_fetcher.handler.handler.DomnaSharepointClient",
+ return_value=MagicMock(spec=DomnaSharepointClient),
+ ),
+ ):
+ from backend.ecmk_fetcher.handler.handler import handler
+
+ handler({}, None)
+
+ _, kwargs = mock_service_cls.call_args
+ assert kwargs["s3_bucket"] == "retrofit-energy-assessments-dev"
+ assert (
+ kwargs["sharepoint_base_path"]
+ == "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
+ )
+ assert (
+ kwargs["sharepoint_excel_path"]
+ == "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
+ )
+ assert kwargs["property_list_filepath"].endswith(
+ "hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
+ )
+ assert kwargs["local_dimensions_path"].endswith("Dimensions.xlsx")
diff --git a/backend/ecmk_fetcher/tests/test_upload.py b/backend/ecmk_fetcher/tests/test_upload.py
new file mode 100644
index 00000000..79823e8e
--- /dev/null
+++ b/backend/ecmk_fetcher/tests/test_upload.py
@@ -0,0 +1,108 @@
+from typing import Generator
+from unittest.mock import MagicMock, call, patch
+
+import pytest
+
+from backend.app.db.models.uploaded_file import FileTypeEnum
+from backend.ecmk_fetcher.upload import upload_file_to_s3_and_record
+
+
+@pytest.fixture
+def mock_uploaded_file() -> MagicMock:
+ obj = MagicMock()
+ obj.id = 42
+ return obj
+
+
+@pytest.fixture
+def mock_session() -> MagicMock:
+ return MagicMock()
+
+
+@pytest.fixture
+def patched_deps(
+ mock_uploaded_file: MagicMock, mock_session: MagicMock
+) -> Generator[dict[str, MagicMock], None, None]:
+ with (
+ patch(
+ "backend.ecmk_fetcher.upload.upload_file_to_s3"
+ ) as mock_s3,
+ patch(
+ "backend.ecmk_fetcher.upload.db_session"
+ ) as mock_db_ctx,
+ patch(
+ "backend.ecmk_fetcher.upload.UploadedFile",
+ return_value=mock_uploaded_file,
+ ) as mock_model,
+ ):
+ mock_db_ctx.return_value.__enter__.return_value = mock_session
+ mock_db_ctx.return_value.__exit__.return_value = False
+ yield {
+ "s3": mock_s3,
+ "db_ctx": mock_db_ctx,
+ "model": mock_model,
+ "session": mock_session,
+ "uploaded_file": mock_uploaded_file,
+ }
+
+
+def test_returns_uploaded_file_id_as_int(
+ patched_deps: dict[str, MagicMock],
+) -> None:
+ result = upload_file_to_s3_and_record(
+ bucket="test-bucket",
+ file_path="/tmp/report.pdf",
+ hubspot_listing_id="hs-001",
+ file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
+ )
+
+ assert isinstance(result, int)
+ assert result == 42
+
+
+def test_uploads_to_s3_with_key_derived_from_listing_id_and_filename(
+ patched_deps: dict[str, MagicMock],
+) -> None:
+ upload_file_to_s3_and_record(
+ bucket="my-bucket",
+ file_path="/some/path/site_note.pdf",
+ hubspot_listing_id="hs-999",
+ file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
+ )
+
+ patched_deps["s3"].assert_called_once_with(
+ "/some/path/site_note.pdf",
+ "my-bucket",
+ "documents/hubspot_listing_id/hs-999/site_note.pdf",
+ )
+
+
+def test_adds_uploaded_file_record_to_session(
+ patched_deps: dict[str, MagicMock],
+) -> None:
+ upload_file_to_s3_and_record(
+ bucket="test-bucket",
+ file_path="/tmp/report.pdf",
+ hubspot_listing_id="hs-001",
+ file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
+ )
+
+ patched_deps["session"].add.assert_called_once_with(
+ patched_deps["uploaded_file"]
+ )
+ patched_deps["session"].flush.assert_called_once()
+
+
+def test_site_note_type_does_not_trigger_pdf_parsing(
+ patched_deps: dict[str, MagicMock],
+) -> None:
+ # If parsing branch still existed, this would blow up without a
+ # parse_site_notes_pdf mock — test passes only when branch is absent.
+ result = upload_file_to_s3_and_record(
+ bucket="test-bucket",
+ file_path="/tmp/site_note.pdf",
+ hubspot_listing_id="hs-002",
+ file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
+ )
+
+ assert result == 42
diff --git a/backend/ecmk_fetcher/upload.py b/backend/ecmk_fetcher/upload.py
index cc2c908d..fc05363c 100644
--- a/backend/ecmk_fetcher/upload.py
+++ b/backend/ecmk_fetcher/upload.py
@@ -1,5 +1,6 @@
from datetime import datetime, timezone
import os
+from typing import cast
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
@@ -7,9 +8,12 @@ from backend.app.db.models.uploaded_file import (
FileTypeEnum,
UploadedFile,
)
+from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
+logger = setup_logger()
+
def upload_file_to_sharepoint(
client: DomnaSharepointClient,
@@ -41,9 +45,9 @@ def upload_excel_to_sharepoint(
# TODO: this should be moved to somewhere common and called by pashub fetcher
-def upload_file_to_s3_and_update_db(
+def upload_file_to_s3_and_record(
bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum
-) -> None:
+) -> int:
filename: str = os.path.basename(file_path)
key: str = f"documents/hubspot_listing_id/{hubspot_listing_id}/{filename}"
@@ -61,4 +65,7 @@ def upload_file_to_s3_and_update_db(
with db_session() as session:
# TODO: we should do multiple files at once to reduce db trips
session.add(uploaded_file)
- session.commit()
+ session.flush()
+ uploaded_file_id: int = int(cast(int, uploaded_file.id))
+
+ return uploaded_file_id
diff --git a/backend/engine/engine.py b/backend/engine/engine.py
index f7a374e0..8b4ee821 100644
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@@ -656,6 +656,15 @@ async def model_engine(body: PlanTriggerRequest):
# address_metadata=addr Switched off to remove injecting landlord inputs
)
+ # Warning! The EPC API is broken and we are getting missing data for local authority and
+ # constituency. We're going to add some verbose handling here but there may be problems
+ if prepared_epc.local_authority is None:
+ # Fill
+ prepared_epc.local_authority = ""
+
+ if prepared_epc.constituency is None:
+ prepared_epc.constituency = ""
+
input_properties.append(
Property(
id=property_id,
diff --git a/backend/etl/__init__.py b/backend/etl/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/etl/etl_opendatacommunities/README.md b/backend/etl/etl_opendatacommunities/README.md
new file mode 100644
index 00000000..728ac468
--- /dev/null
+++ b/backend/etl/etl_opendatacommunities/README.md
@@ -0,0 +1,14 @@
+This website https://epc.opendatacommunities.org/ has closed down on 30th May 2026
+
+So we downloaded the data and moved everything to S3 ( s3://retrofit-data-dev/histroical_epc/0_master_backup/ )
+
+This scripts assumes the following:
+
+1) You downloaded the master copy, uncompressed it and set it to a path so we can read the csv
+
+
+The script funciton is:
+
+1) reads csv for all data, seperate each iteration by postcode
+2) compresses the csv and save it in the location
+3) location s3://retrofit-data-dev/historical_epc//compressed data.csv
\ No newline at end of file
diff --git a/backend/etl/etl_opendatacommunities/__init__.py b/backend/etl/etl_opendatacommunities/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/etl/etl_opendatacommunities/main.py b/backend/etl/etl_opendatacommunities/main.py
new file mode 100644
index 00000000..2bd41005
--- /dev/null
+++ b/backend/etl/etl_opendatacommunities/main.py
@@ -0,0 +1,133 @@
+from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, wait
+from io import BytesIO
+from pathlib import Path
+from typing import Any
+
+import boto3
+import pandas as pd
+from botocore.config import Config
+from tqdm import tqdm
+
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+SRC_ROOT = Path("/workspaces/home/epc_data")
+TMP_ROOT = Path("/tmp/epc_postcodes")
+S3_BUCKET = "retrofit-data-dev"
+S3_PREFIX = "historical_epc"
+
+# This scripts assume you downloading the zip, unzip it, and running it locally
+
+
+def sanitise(pc: pd.Series) -> pd.Series:
+ return pc.astype("string").str.upper().str.replace(" ", "", regex=False)
+
+
+def shard_la(la_dir: Path) -> None:
+ certs = pd.read_csv(la_dir / "certificates.csv", low_memory=False)
+
+ certs["POSTCODE_CLEAN"] = sanitise(certs["POSTCODE"])
+ before = len(certs)
+ certs = certs.dropna(subset=["POSTCODE_CLEAN"])
+ certs = certs[certs["POSTCODE_CLEAN"] != ""]
+ dropped = before - len(certs)
+ if dropped:
+ logger.warning(f"{la_dir.name}: dropped {dropped} rows with empty postcode")
+
+ for pc, group in certs.groupby("POSTCODE_CLEAN", sort=False):
+ out = TMP_ROOT / f"{pc}.csv"
+ group.drop(columns=["POSTCODE_CLEAN"]).to_csv(
+ out, mode="a", header=not out.exists(), index=False
+ )
+
+
+def list_existing_keys(s3: Any) -> set[str]:
+ existing: set[str] = set()
+ paginator = s3.get_paginator("list_objects_v2")
+ pages = paginator.paginate(Bucket=S3_BUCKET, Prefix=f"{S3_PREFIX}/")
+ for page in tqdm(pages, desc="list s3"):
+ for obj in page.get("Contents", []):
+ existing.add(obj["Key"])
+ logger.info(f"Found {len(existing)} existing objects under {S3_PREFIX}/")
+ return existing
+
+
+def upload_postcode(path: Path, s3: Any) -> None:
+ df = pd.read_csv(path, low_memory=False).drop_duplicates()
+
+ dupes = df["LMK_KEY"].value_counts()
+ bad = dupes[dupes > 1]
+ if not bad.empty:
+ raise ValueError(
+ f"Postcode {path.stem}: LMK_KEY appears with conflicting cert data: "
+ f"{bad.index.tolist()[:5]}"
+ )
+
+ buf = BytesIO()
+ df.to_csv(buf, index=False, compression="gzip")
+ s3.put_object(
+ Bucket=S3_BUCKET,
+ Key=f"{S3_PREFIX}/{path.stem}/data.csv.gz",
+ Body=buf.getvalue(),
+ ContentType="text/csv",
+ ContentEncoding="gzip",
+ )
+
+
+def main():
+ TMP_ROOT.mkdir(parents=True, exist_ok=True)
+ la_dirs = sorted(
+ p for p in SRC_ROOT.iterdir() if p.is_dir() and p.name.startswith("domestic-")
+ )
+ logger.info(f"Sharding {len(la_dirs)} LA folders -> {TMP_ROOT}")
+
+ for la in tqdm(la_dirs, desc="shard"):
+ shard_la(la)
+
+ s3 = boto3.client(
+ "s3",
+ config=Config(
+ max_pool_connections=512, retries={"max_attempts": 5, "mode": "standard"}
+ ),
+ )
+ pc_files = sorted(TMP_ROOT.glob("*.csv"))
+ logger.info(f"Found {len(pc_files)} local shards")
+
+ existing = list_existing_keys(s3)
+ todo = [p for p in pc_files if f"{S3_PREFIX}/{p.stem}/data.csv.gz" not in existing]
+ skipped = len(pc_files) - len(todo)
+ logger.info(
+ f"Uploading {len(todo)} shards (skipping {skipped} already in S3) -> "
+ f"s3://{S3_BUCKET}/{S3_PREFIX}/"
+ )
+
+ workers = 256
+ todo_iter = iter(todo)
+ inflight: dict[Any, Path] = {}
+ pbar = tqdm(total=len(todo), desc="upload")
+ with ThreadPoolExecutor(max_workers=workers) as pool:
+ for _ in range(workers * 2):
+ pc = next(todo_iter, None)
+ if pc is None:
+ break
+ inflight[pool.submit(upload_postcode, pc, s3)] = pc
+
+ while inflight:
+ done, _ = wait(inflight.keys(), return_when=FIRST_COMPLETED)
+ for fut in done:
+ pc = inflight.pop(fut)
+ try:
+ fut.result()
+ except Exception as e:
+ logger.error(f"{pc.name}: {e}")
+ raise
+ pbar.update(1)
+ nxt = next(todo_iter, None)
+ if nxt is not None:
+ inflight[pool.submit(upload_postcode, nxt, s3)] = nxt
+ pbar.close()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/backend/export/tests/test_export.py b/backend/export/tests/test_export.py
index f13ef374..42177749 100644
--- a/backend/export/tests/test_export.py
+++ b/backend/export/tests/test_export.py
@@ -282,11 +282,6 @@ def test_default_export_integration(db_session):
df["sap_points"].sum()
)
- assert df.shape == (
- 10,
- 101,
- ), "Expected dataframe shape to be (10, 101), got {}".format(df.shape)
-
def test_solar_with_battery_example(db_session):
test_portfolio_id = 1
@@ -337,7 +332,7 @@ def test_solar_with_battery_example(db_session):
"creation_status": "PropertyCreationStatus.READY",
"uprn": 100090438731,
"landlord_property_id": "BARR052",
- "building_reference_number": 3460742868.0,
+ "building_reference_number": 3460742868,
"status": "PortfolioStatus.ASSESSMENT",
"address": "52, Barrack Street",
"postcode": "CO1 2LR",
@@ -566,6 +561,8 @@ def test_solar_with_battery_example(db_session):
creation_status=PropertyCreationStatus[row.creation_status.split(".")[-1]],
status=PortfolioStatus[row.status.split(".")[-1]],
uprn=row.uprn,
+ address=row.address,
+ postcode=row.postcode,
property_type=row.property_type,
current_sap_points=row.current_sap_points,
current_epc_rating=Epc[row.current_epc_rating.split(".")[-1]],
diff --git a/backend/magic_plan/__init__.py b/backend/magic_plan/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/magic_plan/address_matcher.py b/backend/magic_plan/address_matcher.py
new file mode 100644
index 00000000..3477c535
--- /dev/null
+++ b/backend/magic_plan/address_matcher.py
@@ -0,0 +1,46 @@
+import re
+from typing import Optional
+
+from datatypes.magicplan.api.response import PlanSummary
+
+_UK_POSTCODE_RE = re.compile(r"[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}", re.IGNORECASE)
+
+
+def _extract_postcode(address: str) -> str | None:
+ match = _UK_POSTCODE_RE.search(address)
+ if match is None:
+ return None
+ return match.group().replace(" ", "").upper()
+
+
+def _normalize_postcode(postcode: str) -> str:
+ return postcode.replace(" ", "").upper()
+
+
+def find_matching_plan(plans: list[PlanSummary], address: str) -> Optional[PlanSummary]:
+ postcode = _extract_postcode(address)
+ if postcode is None:
+ return None
+
+ address_lower = address.lower()
+
+ for plan in plans:
+ if plan.address is None:
+ continue
+
+ plan_postcode = plan.address.postal_code
+ if plan_postcode is None:
+ continue
+
+ if _normalize_postcode(plan_postcode) != postcode:
+ continue
+
+ street_parts = [
+ p for p in [plan.address.street_number, plan.address.street] if p
+ ]
+ plan_street = " ".join(street_parts).lower()
+
+ if plan_street and plan_street in address_lower:
+ return plan
+
+ return None
diff --git a/backend/magic_plan/handler.py b/backend/magic_plan/handler.py
new file mode 100644
index 00000000..a592cc6a
--- /dev/null
+++ b/backend/magic_plan/handler.py
@@ -0,0 +1,36 @@
+from typing import Any
+
+from backend.app.config import get_settings
+from backend.magic_plan.magic_plan_client import MagicPlanClient
+from backend.magic_plan.magic_plan_service import MagicPlanService
+from backend.magic_plan.magic_plan_trigger_request import MagicPlanTriggerRequest
+from datatypes.magicplan.domain.models import Plan
+from backend.utils.subtasks import task_handler
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+@task_handler()
+def handler(body: dict[str, Any], context: Any) -> str:
+ settings = get_settings()
+ payload = MagicPlanTriggerRequest.model_validate(body)
+ client = MagicPlanClient(
+ customer_id=settings.MAGICPLAN_CUSTOMER_ID,
+ api_key=settings.MAGICPLAN_API_KEY,
+ )
+ plan: Plan = MagicPlanService(client).run(payload.address, payload.uprn)
+ logger.info("Saved MagicPlan plan uid=%s", plan.uid)
+ return plan.uid
+
+
+if __name__ == "__main__":
+ event = {
+ "Records": [
+ {
+ "body": '{"address": "2 Laburnum Way Bromley BR2 8BZ"}',
+ "messageId": "local-test",
+ }
+ ]
+ }
+ handler(event, None)
diff --git a/backend/magic_plan/magic_plan_client.py b/backend/magic_plan/magic_plan_client.py
new file mode 100644
index 00000000..60f70fb1
--- /dev/null
+++ b/backend/magic_plan/magic_plan_client.py
@@ -0,0 +1,24 @@
+import requests
+
+from datatypes.magicplan.api.response import MagicPlanPlan, PlansListResponse
+
+_BASE_URL = "https://cloud.magicplan.app/api/v2"
+
+
+class MagicPlanClient:
+ def __init__(self, customer_id: str, api_key: str) -> None:
+ self._api_key = api_key
+ self._session = requests.Session()
+ self._session.headers.update({"customer": customer_id})
+
+ def get_plans(self) -> PlansListResponse:
+ r = self._session.get(f"{_BASE_URL}/plans", params={"key": self._api_key})
+ r.raise_for_status()
+ return PlansListResponse.model_validate(r.json()["data"])
+
+ def get_plan(self, plan_id: str) -> MagicPlanPlan:
+ r = self._session.get(
+ f"{_BASE_URL}/plans/{plan_id}", params={"key": self._api_key}
+ )
+ r.raise_for_status()
+ return MagicPlanPlan.model_validate(r.json()["data"])
diff --git a/backend/magic_plan/magic_plan_service.py b/backend/magic_plan/magic_plan_service.py
new file mode 100644
index 00000000..91b3cd13
--- /dev/null
+++ b/backend/magic_plan/magic_plan_service.py
@@ -0,0 +1,42 @@
+from typing import Optional
+
+from datatypes.magicplan.api.response import (
+ MagicPlanPlan,
+ PlanSummary,
+ PlansListResponse,
+)
+from datatypes.magicplan.domain.mapper import map_plan
+from datatypes.magicplan.domain.models import Plan
+
+from backend.app.db.connection import db_session
+from backend.app.db.functions.magic_plan_functions import save_plan
+from backend.magic_plan.address_matcher import find_matching_plan
+from backend.magic_plan.magic_plan_client import MagicPlanClient
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class MagicPlanService:
+ def __init__(self, client: MagicPlanClient) -> None:
+ self._client = client
+
+ def run(self, address: str, uprn: Optional[str] = None) -> Plan:
+ if uprn is not None:
+ logger.info("MagicPlanService.run uprn=%s", uprn)
+
+ plans_response: PlansListResponse = self._client.get_plans()
+ matched: Optional[PlanSummary] = find_matching_plan(
+ plans_response.plans, address
+ ) # TODO: use address2UPRN instead? or create AddressMatch domain class
+
+ if matched is None:
+ raise ValueError(f"No MagicPlan found for address: {address!r}")
+
+ magic_plan: MagicPlanPlan = self._client.get_plan(matched.id)
+ plan: Plan = map_plan(magic_plan)
+
+ with db_session() as session:
+ save_plan(session, plan)
+
+ return plan
diff --git a/backend/magic_plan/magic_plan_trigger_request.py b/backend/magic_plan/magic_plan_trigger_request.py
new file mode 100644
index 00000000..bb0151e4
--- /dev/null
+++ b/backend/magic_plan/magic_plan_trigger_request.py
@@ -0,0 +1,10 @@
+from typing import Optional
+
+from pydantic import BaseModel, ConfigDict
+
+
+class MagicPlanTriggerRequest(BaseModel):
+ model_config = ConfigDict(extra="ignore")
+
+ address: str
+ uprn: Optional[str] = None
diff --git a/backend/magic_plan/magicplan_api_plan_response_example.json b/backend/magic_plan/magicplan_api_plan_response_example.json
new file mode 100644
index 00000000..d76b3540
--- /dev/null
+++ b/backend/magic_plan/magicplan_api_plan_response_example.json
@@ -0,0 +1,136742 @@
+{
+ "message": "OK",
+ "data": {
+ "plan": {
+ "id": "a7285ed1-878d-47eb-8aa6-85ef9e187516",
+ "project_id": "9f8f3208-0f04-466f-9c4c-e776532183c8",
+ "name": "2, Br2 8bz",
+ "address": {
+ "street": "2 Laburnum Way",
+ "street_number": null,
+ "postal_code": "BR2 8BZ",
+ "city": "Bromley",
+ "country": "GB",
+ "longitude": 0.0616749,
+ "latitude": 51.3835182
+ },
+ "creation_date": "2026-04-28T08:32:58+00:00",
+ "update_date": "2026-04-29T14:58:54+00:00",
+ "thumbnail_url": "https:\/\/s3.amazonaws.com\/prod.plans.sensopia.com\/a7285ed1-878d-47eb-8aa6-85ef9e187516\/plan.thumb",
+ "public_url": "https:\/\/cloud.magicplan.app\/plan\/a7285ed1-878d-47eb-8aa6-85ef9e187516",
+ "cloud_url": "https:\/\/cloud.magicplan.app\/projects\/a7285ed1-878d-47eb-8aa6-85ef9e187516",
+ "3d_url": "https:\/\/3d.magicplan.app\/#embed\/?key=YzBkMTQyZDRlY2E5MmEzMWQ4NWE1NWJmMGE4OTQ5ZjMwOTNlZjcwNjhkN2U4ODg5ZDZiMDI1OTRkNWU5ZTY0N%2B9n3Xg%2FF422BetMnabb%2FwQI3XiEQbNltioOXI05WueYapFlJvuxgPLnzxjLI1eFcsii6s7vRgs71gHD1LPsSBcNGjF424hTkMCt9hxbCryf",
+ "workgroup_id": "677d01685458a",
+ "team_id": null,
+ "created_by": {
+ "id": "49c5fd0d-5031-4a7d-aa59-3cc1b64d18aa",
+ "firstname": null,
+ "lastname": null,
+ "email": "sebastian@osmosis-acd.com"
+ }
+ },
+ "plan_detail": {
+ "magicplan_format_xml": "\n2026-04-24<\/value>2.134<\/value>100<\/value>0<\/value>0<\/value><\/values>Ground Floor<\/name>2.450007<\/value>Total m2 =1.196 yd\u00b2 <\/value><\/values><\/symbolInstance>m<\/value>m<\/value>m2<\/value>outdoors<\/value>m3<\/value><\/values><\/symbolInstance>annotations<\/value>3<\/value>left<\/value>M2 - 44.19\nHeight - 2.43\nHLP - 20.56\nPWL - 6.12<\/value>top<\/value><\/values><\/symbolInstance>677d01685458a<\/value><\/values><\/symbolInstance>annotations<\/value>7.22m<\/value><\/values><\/symbolInstance>annotations<\/value>6.12m<\/value><\/values><\/symbolInstance>m<\/value>0<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>2.026217<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>0.905517<\/value>1.20394<\/value><\/values><\/symbolInstance>0.496099<\/value>0.241025<\/value>plumbing<\/value>0.682423<\/value>0.241025<\/value>0.454712<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>2.014143<\/value><\/values><\/symbolInstance>m<\/value>0<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>1.963297<\/value><\/values><\/symbolInstance>677d01685458a<\/value><\/values><\/symbolInstance>677d01685458a<\/value><\/values><\/symbolInstance>m<\/value>m<\/value>m2<\/value>hvac<\/value>m3<\/value>0.1500<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>2.014739<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>0.985417<\/value>1.099043<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>0.911601<\/value>1.123649<\/value><\/values><\/symbolInstance>m<\/value>0<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>2.057803<\/value><\/values><\/symbolInstance>677d01685458a<\/value><\/values><\/symbolInstance>m<\/value>m<\/value>m2<\/value>hvac<\/value>m3<\/value>0.1500<\/value><\/values><\/symbolInstance>677d01685458a<\/value><\/values><\/symbolInstance>677d01685458a<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>1.942764<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>0.968262<\/value>1.063659<\/value><\/values><\/symbolInstance>677d01685458a<\/value><\/values><\/symbolInstance>677d01685458a<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>2.014143<\/value><\/values><\/symbolInstance>m<\/value>0<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>2.133701<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>2.014739<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>1.942764<\/value><\/values><\/symbolInstance>m<\/value>1<\/value>m<\/value>m2<\/value>doors<\/value>m3<\/value>0.867147<\/value>1.202909<\/value><\/values><\/symbolInstance>677d01685458a<\/value><\/values><\/symbolInstance>m<\/value>m<\/value>m2<\/value>hvac<\/value>m3<\/value>0.1500<\/value><\/values><\/symbolInstance>m<\/value>m<\/value>