mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #694 from Hestia-Homes/feature/address_to_uprn
Feature/address to uprn
This commit is contained in:
commit
535e5befb5
32 changed files with 1398 additions and 433 deletions
39
.devcontainer/asset_list/Dockerfile
Normal file
39
.devcontainer/asset_list/Dockerfile
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
FROM python:3.11.10-bullseye
|
||||
|
||||
|
||||
ARG USER=vscode
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# 1) Toolchain + utilities for building libpostal
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
sudo jq vim curl git ca-certificates \
|
||||
build-essential pkg-config automake autoconf libtool \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# # 2) Build and install libpostal from source
|
||||
RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
|
||||
&& cd /tmp/libpostal \
|
||||
&& ./bootstrap.sh \
|
||||
&& ./configure --datadir=/usr/local/share/libpostal \
|
||||
&& make -j"$(nproc)" \
|
||||
&& make install \
|
||||
&& ldconfig \
|
||||
&& rm -rf /tmp/libpostal
|
||||
|
||||
# 3) Create the user and grant sudo privileges
|
||||
RUN useradd -m -s /usr/bin/bash ${USER} \
|
||||
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
|
||||
&& chmod 0440 /etc/sudoers.d/${USER}
|
||||
|
||||
# # 4) Python deps - if you want to run assest list
|
||||
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
ADD asset_list/requirements.txt requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
# 5) Workdir
|
||||
WORKDIR /workspaces/model
|
||||
|
||||
# 6) Make Python find your package
|
||||
# Add project root to PYTHONPATH for all processes
|
||||
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "Basic Python",
|
||||
"name": "SAL ENV",
|
||||
"dockerComposeFile": "docker-compose.yml",
|
||||
"service": "model",
|
||||
"service": "model-sal",
|
||||
"remoteUser": "vscode",
|
||||
"workspaceFolder": "/workspaces/model",
|
||||
"postStartCommand": "bash .devcontainer/post-install.sh",
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
model:
|
||||
model-sal:
|
||||
user: "${UID}:${GID}"
|
||||
build:
|
||||
context: ..
|
||||
|
|
@ -11,4 +11,4 @@ if os.path.exists(env_path):
|
|||
print("✔ Loaded .env into Jupyter kernel")
|
||||
else:
|
||||
print("⚠ No .env file found to load")
|
||||
EOF
|
||||
EOF
|
||||
23
.devcontainer/asset_list/requirements.txt
Normal file
23
.devcontainer/asset_list/requirements.txt
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
fastapi==0.115.2
|
||||
sqlalchemy==2.0.36
|
||||
psycopg2-binary==2.9.10
|
||||
python-jose==3.3.0
|
||||
cryptography==43.0.3
|
||||
mangum==0.19.0
|
||||
# AWS
|
||||
boto3==1.35.44
|
||||
# Data
|
||||
openpyxl==3.1.2
|
||||
# Basic
|
||||
pytz
|
||||
uvicorn[standard]
|
||||
# Testing
|
||||
pytest==9.0.2
|
||||
pytest-cov==7.0.0
|
||||
ipykernel>=6.25,<7
|
||||
pydantic-settings<2
|
||||
pyyaml>=6.0.1
|
||||
pydantic>=1.10.7,<2
|
||||
sqlmodel
|
||||
# Formatting
|
||||
black==26.1.0
|
||||
|
|
@ -34,7 +34,7 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
|
|||
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
ADD backend/engine/requirements.txt requirements1.txt
|
||||
ADD backend/app/requirements/requirements.txt requirements2.txt
|
||||
ADD .devcontainer/requirements.txt requirements3.txt
|
||||
ADD .devcontainer/backend/requirements.txt requirements3.txt
|
||||
RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
40
.devcontainer/backend/devcontainer.json
Normal file
40
.devcontainer/backend/devcontainer.json
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
{
|
||||
"name": "Backend Model Env",
|
||||
"dockerComposeFile": "docker-compose.yml",
|
||||
"service": "model-backend",
|
||||
"remoteUser": "vscode",
|
||||
"workspaceFolder": "/workspaces/model",
|
||||
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
|
||||
"mounts": [
|
||||
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ms-python.python",
|
||||
"ms-toolsai.jupyter",
|
||||
"mechatroner.rainbow-csv",
|
||||
"ms-toolsai.datawrangler",
|
||||
"lindacong.vscode-book-reader",
|
||||
"4ops.terraform",
|
||||
"fabiospampinato.vscode-todo-plus",
|
||||
"jgclark.vscode-todo-highlight",
|
||||
"corentinartaud.pdfpreview",
|
||||
"ms-python.vscode-python-envs",
|
||||
"ms-python.black-formatter",
|
||||
"waderyan.gitblame"
|
||||
],
|
||||
"settings": {
|
||||
"files.defaultWorkspace": "/workspaces/model",
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||
"editor.formatOnSave": true
|
||||
},
|
||||
"python.formatting.provider": "none"
|
||||
}
|
||||
}
|
||||
},
|
||||
"containerEnv": {
|
||||
"PYTHONFLAGS": "-Xfrozen_modules=off"
|
||||
}
|
||||
}
|
||||
18
.devcontainer/backend/docker-compose.yml
Normal file
18
.devcontainer/backend/docker-compose.yml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
model-backend:
|
||||
user: "${UID}:${GID}"
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: .devcontainer/backend/Dockerfile
|
||||
command: sleep infinity
|
||||
volumes:
|
||||
- ../../:/workspaces/model
|
||||
networks:
|
||||
- model-net
|
||||
|
||||
networks:
|
||||
model-net:
|
||||
driver: bridge
|
||||
|
||||
14
.devcontainer/backend/post-install.sh
Normal file
14
.devcontainer/backend/post-install.sh
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
mkdir -p ~/.ipython/profile_default/startup
|
||||
|
||||
cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
# Adjust path as needed
|
||||
env_path = "/workspaces/model/backend/.env"
|
||||
if os.path.exists(env_path):
|
||||
load_dotenv(env_path)
|
||||
print("✔ Loaded .env into Jupyter kernel")
|
||||
else:
|
||||
print("⚠ No .env file found to load")
|
||||
EOF
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
# fastapi
|
||||
|
||||
fastapi==0.115.2
|
||||
sqlalchemy==2.0.36
|
||||
pydantic-settings==2.6.0
|
||||
6
.github/workflows/unit_tests.yml
vendored
6
.github/workflows/unit_tests.yml
vendored
|
|
@ -2,6 +2,12 @@ name: Run unit tests
|
|||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
push:
|
||||
branches:
|
||||
- "**"
|
||||
|
||||
|
||||
jobs:
|
||||
test:
|
||||
|
|
|
|||
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
|
|
@ -9,9 +9,12 @@
|
|||
"path": "/bin/bash"
|
||||
}
|
||||
},
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
|
||||
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
|
||||
|
||||
// Hot reload setting that needs to be in user settings
|
||||
// "jupyter.runStartupCommands": [
|
||||
|
|
|
|||
|
|
@ -34,7 +34,8 @@ from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
|
|||
logger = setup_logger()
|
||||
|
||||
# OpenAI API Key (set this in your environment variables for security)
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
|
||||
|
||||
|
||||
|
||||
class DataRemapper:
|
||||
|
|
@ -1159,13 +1160,17 @@ class AssetList:
|
|||
),
|
||||
axis=1
|
||||
)
|
||||
|
||||
col = self.EPC_API_DATA_NAMES["roof-description"]
|
||||
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply(
|
||||
lambda x: RoofAttributes(description=x[self.EPC_API_DATA_NAMES["roof-description"]]).process()[
|
||||
lambda x: RoofAttributes(description=x[col]).process()[
|
||||
"insulation_thickness"] if not pd.isnull(
|
||||
x[self.EPC_API_DATA_NAMES["roof-description"]]) else None,
|
||||
x[col]) else None,
|
||||
axis=1
|
||||
)
|
||||
|
||||
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = (
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].str.replace("+", "")
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# OpenAI API Key (set this in your environment variables for security)
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
|
||||
|
||||
|
||||
class DataRemapper:
|
||||
|
|
|
|||
0
asset_list/__init__.py
Normal file
0
asset_list/__init__.py
Normal file
|
|
@ -12,9 +12,8 @@ from asset_list.utils import get_data
|
|||
|
||||
from dotenv import load_dotenv
|
||||
from backend.SearchEpc import SearchEpc
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=")
|
||||
|
||||
|
||||
def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
|
||||
|
|
@ -58,6 +57,10 @@ def app():
|
|||
EPC recommendations
|
||||
Property UPRN
|
||||
"""
|
||||
<<<<<<< HEAD
|
||||
data_folder = ("/workspaces/model/asset_list")
|
||||
data_filename = "assets.xlsx"
|
||||
=======
|
||||
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
|
||||
data_filename = "Domna SHF Wave 3 (3).xlsx"
|
||||
|
|
@ -96,22 +99,23 @@ def app():
|
|||
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
||||
"Project/data_validation")
|
||||
data_filename = "to_standardise_uprns.xlsx"
|
||||
>>>>>>> 3874da6177cbcc37f7a488bec0a06e387906653c
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'Postcode'
|
||||
address1_column = "Address 1"
|
||||
address1_method = None
|
||||
fulladdress_column = None
|
||||
address_cols_to_concat = ["Address 1", "Address 2", "Address 3"]
|
||||
address1_column = None
|
||||
address1_method = 'house_number_extraction'
|
||||
fulladdress_column = 'Address'
|
||||
address_cols_to_concat = None
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Type"
|
||||
landlord_built_form = "Attachment"
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Org Ref"
|
||||
landlord_property_id = "LLUPRN"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
@ -127,40 +131,6 @@ def app():
|
|||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
|
||||
# Lambeth:
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
|
||||
# data_filename = "lambeth_sw2_leigham court estate.xlsx"
|
||||
# sheet_name = "Sheet1"
|
||||
# postcode_column = 'Postcode'
|
||||
# address1_column = "Address"
|
||||
# address1_method = None
|
||||
# fulladdress_column = None
|
||||
# address_cols_to_concat = ["Address"]
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = None
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = None
|
||||
# landlord_built_form = None
|
||||
# landlord_wall_construction = None
|
||||
# landlord_roof_construction = None
|
||||
# landlord_heating_system = None
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "row_id"
|
||||
# landlord_sap = None
|
||||
# outcomes_filename = None
|
||||
# outcomes_sheetname = None
|
||||
# outcomes_postcode = None
|
||||
# outcomes_houseno = None
|
||||
# outcomes_id = None
|
||||
# outcomes_address = None
|
||||
# master_filepaths = []
|
||||
# master_id_colnames = []
|
||||
# master_to_asset_list_filepath = None
|
||||
# phase = False
|
||||
# ecosurv_landlords = None
|
||||
# asset_list_header = 0
|
||||
# landlord_block_reference = None
|
||||
|
||||
# Maps addresses to uprn in problematic cases
|
||||
manual_uprn_map = {}
|
||||
|
||||
|
|
@ -439,6 +409,10 @@ def app():
|
|||
)
|
||||
|
||||
asset_list.merge_data(epc_df)
|
||||
# asset_list.standardised_asset_list = asset_list.standardised_asset_list[
|
||||
# asset_list.standardised_asset_list["domna_full_address"]
|
||||
# != "120 Airdrie Crescent, Burnley, Lancashire"
|
||||
# ]
|
||||
asset_list.extract_attributes()
|
||||
asset_list.identify_worktypes()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
postal
|
||||
pandas
|
||||
usaddress
|
||||
pydantic-settings==2.6.0
|
||||
epc-api-python==1.0.2
|
||||
thefuzz
|
||||
boto3
|
||||
|
|
@ -10,6 +9,5 @@ openai>=1.3.5
|
|||
tiktoken
|
||||
msgpack
|
||||
beautifulsoup4
|
||||
pydantic>=1.10.7
|
||||
typing-extensions>=4.5.0
|
||||
requests>=2.28.2
|
||||
requests>=2.28.2
|
||||
20
backend/address2UPRN/README.md
Normal file
20
backend/address2UPRN/README.md
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
We have list of address as input.
|
||||
|
||||
It'll come in batches of the same post code and from then we want to somehow convert that into UPRN
|
||||
|
||||
if this lambda/function can do that we'll be speeding ahead
|
||||
|
||||
|
||||
Energy Performance Information: https://epc.opendatacommunities.org/
|
||||
|
||||
guidance page: https://epc.opendatacommunities.org/docs/guidance#field_domestic_LMK_KEY
|
||||
|
||||
Example of past khalims code that he wrote some tests for: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/tests/test_search_epc.py#L11
|
||||
|
||||
|
||||
Example of EPC search: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/SearchEpc.py#L118
|
||||
|
||||
|
||||
|
||||
Khalim has made a python package to help scrape data: https://github.com/KhalimCK/epc-api-python
|
||||
|
||||
0
backend/address2UPRN/__init__.py
Normal file
0
backend/address2UPRN/__init__.py
Normal file
520
backend/address2UPRN/main.py
Normal file
520
backend/address2UPRN/main.py
Normal file
|
|
@ -0,0 +1,520 @@
|
|||
from epc_api.client import EpcClient
|
||||
import os
|
||||
from urllib.parse import urlencode
|
||||
import pandas as pd
|
||||
from difflib import SequenceMatcher
|
||||
from tqdm import tqdm
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
import re
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
)
|
||||
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Set
|
||||
|
||||
|
||||
def levenshtein(a: str, b: str) -> float:
|
||||
"""
|
||||
Address similarity score in [0, 1].
|
||||
|
||||
Strategy:
|
||||
- Normalise
|
||||
- Strongly penalise mismatched house/flat numbers
|
||||
- Combine token overlap + character similarity
|
||||
"""
|
||||
|
||||
def extract_number_sequence(s: str) -> list[str]:
|
||||
return re.findall(r"\d+[a-z]?", s)
|
||||
|
||||
def extract_numbers(s: str) -> Set[str]:
|
||||
return set(extract_number_sequence(s))
|
||||
|
||||
def tokenise(s: str) -> Set[str]:
|
||||
return set(s.split())
|
||||
|
||||
a_norm = normalise_address(a)
|
||||
b_norm = normalise_address(b)
|
||||
|
||||
# --- hard signal: numbers ---
|
||||
nums_a = extract_numbers(a_norm)
|
||||
nums_b = extract_numbers(b_norm)
|
||||
|
||||
if nums_a and not nums_b:
|
||||
return 0.0
|
||||
|
||||
# No shared numbers at all → impossible match
|
||||
if nums_a and nums_b and nums_a.isdisjoint(nums_b):
|
||||
return 0.0
|
||||
|
||||
# --- order-sensitive flat/building guard ---
|
||||
seq_a = extract_number_sequence(a_norm)
|
||||
seq_b = extract_number_sequence(b_norm)
|
||||
|
||||
has_flat_token_user = any(
|
||||
tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
|
||||
)
|
||||
has_flat_token_epc = "flat" in b_norm
|
||||
|
||||
if (
|
||||
len(seq_a) == 2
|
||||
and len(seq_b) >= 2
|
||||
and has_flat_token_epc
|
||||
and not has_flat_token_user
|
||||
and seq_a != seq_b[:2]
|
||||
):
|
||||
return 0.0
|
||||
|
||||
# --- token similarity (order-independent) ---
|
||||
toks_a = tokenise(a_norm)
|
||||
toks_b = tokenise(b_norm)
|
||||
|
||||
if not toks_a or not toks_b:
|
||||
token_score = 0.0
|
||||
else:
|
||||
token_score = len(toks_a & toks_b) / len(toks_a | toks_b)
|
||||
|
||||
# --- character similarity (soft signal) ---
|
||||
char_score = SequenceMatcher(None, a_norm, b_norm).ratio()
|
||||
|
||||
# --- weighted blend ---
|
||||
return round(
|
||||
0.65 * token_score + 0.35 * char_score,
|
||||
4,
|
||||
)
|
||||
|
||||
|
||||
def normalise_address(s: str) -> str:
|
||||
"""
|
||||
Canonical UK-focused address normalisation.
|
||||
|
||||
- Lowercases
|
||||
- Removes punctuation (keeps / for flats)
|
||||
- Normalises whitespace
|
||||
- Applies synonym compression at token level
|
||||
"""
|
||||
|
||||
if not s:
|
||||
return ""
|
||||
|
||||
ADDRESS_SYNONYMS = {
|
||||
# street types
|
||||
"rd": "road",
|
||||
"rd.": "road",
|
||||
"st": "street",
|
||||
"st.": "street",
|
||||
"ave": "avenue",
|
||||
"ave.": "avenue",
|
||||
"ln": "lane",
|
||||
"ln.": "lane",
|
||||
"cres": "crescent",
|
||||
"ct": "court",
|
||||
"dr": "drive",
|
||||
# flats / units
|
||||
"apt": "flat",
|
||||
"apartment": "flat",
|
||||
"unit": "flat",
|
||||
"ste": "suite",
|
||||
# numbering noise
|
||||
"no": "",
|
||||
"no.": "",
|
||||
}
|
||||
# 1. lowercase
|
||||
s = s.lower()
|
||||
|
||||
# 1.5 split digit-letter suffixes
|
||||
s = re.sub(r"(\d+)([a-z])\b", r"\1 \2", s)
|
||||
|
||||
# 2. remove punctuation except /
|
||||
s = re.sub(r"[^\w\s/]", " ", s)
|
||||
|
||||
# 3. normalise whitespace
|
||||
s = re.sub(r"\s+", " ", s).strip()
|
||||
|
||||
# 4. tokenise + synonym normalisation
|
||||
tokens = []
|
||||
for tok in s.split():
|
||||
replacement = ADDRESS_SYNONYMS.get(tok, tok)
|
||||
if replacement:
|
||||
tokens.append(replacement)
|
||||
|
||||
return " ".join(tokens)
|
||||
|
||||
|
||||
def score_addresses(
|
||||
df: pd.DataFrame,
|
||||
user_address: str,
|
||||
column: str = "address",
|
||||
) -> pd.Series:
|
||||
if column not in df.columns:
|
||||
raise ValueError(f"Missing column: {column}")
|
||||
|
||||
return df[column].apply(lambda x: levenshtein(user_address, x))
|
||||
|
||||
|
||||
def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
|
||||
"""
|
||||
Recursively fetch EPC data by postcode.
|
||||
If results hit the size limit, retry with double size up to max_attempts.
|
||||
"""
|
||||
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
|
||||
url = os.path.join(client.domestic.host, "search")
|
||||
|
||||
if size:
|
||||
url += "?" + urlencode({"size": size})
|
||||
|
||||
search_resp = client.domestic.call(
|
||||
url=url,
|
||||
method="get",
|
||||
params={"postcode": postcode},
|
||||
)
|
||||
|
||||
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
|
||||
|
||||
row_count = len(results_df)
|
||||
|
||||
# If we hit the size limit, there *may* be more results
|
||||
if row_count == size:
|
||||
print(
|
||||
f"⚠️ Warning: hit size limit ({size}) for postcode '{postcode}'. "
|
||||
f"Attempt {attempt}/{max_attempts}."
|
||||
)
|
||||
|
||||
if attempt < max_attempts:
|
||||
print(f"🔁 Retrying with size={size * 2}")
|
||||
return get_epc_data_with_postcode(
|
||||
postcode=postcode,
|
||||
size=size * 2,
|
||||
attempt=attempt + 1,
|
||||
max_attempts=max_attempts,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"🚨 Max attempts reached. Results may be truncated. "
|
||||
"(Please do a manual review by the tech team.)"
|
||||
)
|
||||
|
||||
return results_df
|
||||
|
||||
|
||||
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
|
||||
"""
|
||||
Returns True if all non-null UPRNs in df match the given uprn.
|
||||
Returns False otherwise.
|
||||
"""
|
||||
|
||||
if column not in df.columns:
|
||||
return False
|
||||
|
||||
# Drop nulls and normalise to string
|
||||
uprns = df[column].dropna().astype(str).str.strip().unique()
|
||||
|
||||
# No valid UPRNs to compare
|
||||
if len(uprns) == 0:
|
||||
return False
|
||||
|
||||
# Exactly one unique UPRN and it matches
|
||||
return len(uprns) == 1 and uprns[0] == str(uprn)
|
||||
|
||||
|
||||
def get_uprn_candidates(
|
||||
df: pd.DataFrame,
|
||||
user_address: str,
|
||||
address_column: str = "address",
|
||||
uprn_column: str = "uprn",
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Annotate EPC results with lexicographical similarity scores and ranks.
|
||||
|
||||
Returns a DataFrame sorted by descending lexiscore.
|
||||
DOES NOT choose or return a UPRN.
|
||||
"""
|
||||
|
||||
if address_column not in df.columns:
|
||||
raise ValueError(f"Missing column: {address_column}")
|
||||
|
||||
if uprn_column not in df.columns:
|
||||
raise ValueError(f"Missing column: {uprn_column}")
|
||||
|
||||
out = df.copy()
|
||||
|
||||
user_norm = normalise_address(user_address)
|
||||
|
||||
out["lexiscore"] = out[address_column].apply(lambda x: levenshtein(user_norm, x))
|
||||
|
||||
# Normalise UPRN to string
|
||||
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
||||
|
||||
# Rank: 1 = best match
|
||||
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
|
||||
|
||||
return out.sort_values(
|
||||
["lexirank", "lexiscore"],
|
||||
ascending=[True, False],
|
||||
)
|
||||
|
||||
|
||||
def get_uprn(user_inputed_address: str, postcode: str):
|
||||
"""
|
||||
Return uprn (str)
|
||||
Return False if failed to find a sensible matching epc
|
||||
Return Nons when epc found but no UPRN
|
||||
"""
|
||||
df = get_epc_data_with_postcode(postcode=postcode)
|
||||
|
||||
if df.empty:
|
||||
return None
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
df,
|
||||
user_address=user_inputed_address,
|
||||
)
|
||||
|
||||
# Best score
|
||||
best_score = scored_df.iloc[0]["lexiscore"]
|
||||
|
||||
if best_score <= 0:
|
||||
return None
|
||||
|
||||
# All rank-1 rows (possible draw)
|
||||
top_rank_df = scored_df[scored_df["lexirank"] == 1]
|
||||
|
||||
# If rank-1 rows do not agree on a single UPRN → ambiguous
|
||||
if not df_has_single_uprn(top_rank_df, uprn=top_rank_df.iloc[0]["uprn"]):
|
||||
return None
|
||||
|
||||
address = top_rank_df["address"].values[0]
|
||||
lexiscore = float(top_rank_df["lexiscore"].values[0])
|
||||
|
||||
logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
|
||||
# Safe to return the agreed UPRN
|
||||
found_uprn = top_rank_df.iloc[0]["uprn"]
|
||||
|
||||
if found_uprn == "":
|
||||
return None
|
||||
|
||||
return found_uprn
|
||||
|
||||
|
||||
def resolve_uprns_for_postcode_group(
|
||||
group_df: pd.DataFrame,
|
||||
epc_df: pd.DataFrame,
|
||||
address_col: str = "Address 1",
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Given:
|
||||
- group_df: rows sharing the same postcode
|
||||
- epc_df: EPC search results for that postcode
|
||||
|
||||
Returns:
|
||||
group_df + found_uprn + diagnostics
|
||||
"""
|
||||
|
||||
results = []
|
||||
|
||||
for _, row in group_df.iterrows():
|
||||
user_address = str(row[address_col]).strip()
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
epc_df,
|
||||
user_address=user_address,
|
||||
)
|
||||
|
||||
if scored_df.empty:
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "no_epc_candidates",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
best_score = scored_df.iloc[0]["lexiscore"]
|
||||
|
||||
if best_score <= 0:
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": best_score,
|
||||
"status": "zero_score",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
top_rank_df = scored_df[scored_df["lexirank"] == 1]
|
||||
|
||||
if not df_has_single_uprn(top_rank_df, top_rank_df.iloc[0]["uprn"]):
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": top_rank_df.iloc[0]["uprn"],
|
||||
"best_match_address": top_rank_df.iloc[0]["address"],
|
||||
"best_match_lexiscore": best_score,
|
||||
"status": "ambiguous",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": str(top_rank_df.iloc[0]["uprn"]),
|
||||
"best_match_uprn": str(top_rank_df.iloc[0]["uprn"]),
|
||||
"best_match_address": top_rank_df.iloc[0]["address"],
|
||||
"best_match_lexiscore": best_score,
|
||||
"status": "matched",
|
||||
}
|
||||
)
|
||||
|
||||
return pd.concat(
|
||||
[group_df.reset_index(drop=True), pd.DataFrame(results)],
|
||||
axis=1,
|
||||
)
|
||||
|
||||
|
||||
def test(a, b):
|
||||
assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}"
|
||||
|
||||
|
||||
def run_all_test():
|
||||
# Basic usage with different post codes styles
|
||||
test(get_epc_data_with_postcode("b93 8sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("B938sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
|
||||
|
||||
test(get_uprn("68", "b93 8sy"), "100070989938")
|
||||
test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938")
|
||||
test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("28 A", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("28A", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("6 Aitken Close", "E8 4SQ"), False)
|
||||
|
||||
# unique case
|
||||
test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("5 , 1 Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False)
|
||||
test(
|
||||
get_uprn("1 Semley Gate", "e9 5nh"), "10008238188"
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("48 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("42 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("46 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
INPUT_FILE = "hackney.xlsx"
|
||||
|
||||
ADDRESS_COL = "Address 1"
|
||||
POSTCODE_COL = "Postcode"
|
||||
UPRN_COL = "UPRN"
|
||||
|
||||
df = pd.read_excel(INPUT_FILE)
|
||||
|
||||
failures = []
|
||||
|
||||
for _, row in tqdm(
|
||||
df.iterrows(),
|
||||
total=len(df),
|
||||
desc="Auditing UPRNs",
|
||||
):
|
||||
input_address = str(row[ADDRESS_COL]).strip()
|
||||
postcode = str(row[POSTCODE_COL]).strip()
|
||||
|
||||
expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL]))
|
||||
|
||||
try:
|
||||
epc_df = get_epc_data_with_postcode(postcode)
|
||||
|
||||
if epc_df.empty:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "no_epc_results",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
epc_df,
|
||||
user_address=input_address,
|
||||
)
|
||||
|
||||
best_row = scored_df.iloc[0]
|
||||
|
||||
best_match_uprn = str(best_row["uprn"])
|
||||
best_match_address = best_row["address"]
|
||||
best_match_lexiscore = round(float(best_row["lexiscore"]), 4)
|
||||
|
||||
found_uprn = get_uprn(input_address, postcode)
|
||||
|
||||
except Exception as e:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "exception",
|
||||
"error": str(e),
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
found_uprn_norm = None if not found_uprn else str(found_uprn)
|
||||
|
||||
if found_uprn_norm != expected_uprn:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": found_uprn_norm,
|
||||
"best_match_uprn": best_match_uprn,
|
||||
"best_match_address": best_match_address,
|
||||
"best_match_lexiscore": best_match_lexiscore,
|
||||
"status": ("no_match" if found_uprn_norm is None else "mismatch"),
|
||||
}
|
||||
)
|
||||
|
||||
failures_df = pd.DataFrame(failures)
|
||||
|
||||
print("===================================")
|
||||
print(f"Total rows : {len(df)}")
|
||||
print(f"Failures : {len(failures_df)}")
|
||||
print("===================================")
|
||||
|
||||
failures_df.to_excel(
|
||||
"hackney_uprn_failures.xlsx",
|
||||
index=False,
|
||||
)
|
||||
|
||||
|
||||
# TO do function dispatcher,
|
||||
|
||||
# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
|
||||
# fix that
|
||||
# Look again at flat 1
|
||||
# pandas reader the seperate postcode_splitter
|
||||
# dump into s3
|
||||
17
backend/address2UPRN/script.py
Normal file
17
backend/address2UPRN/script.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
import pandas as pd
|
||||
|
||||
|
||||
# use Address 1
|
||||
junte_df = pd.read_excel("hackney_uprn_failures.xlsx")
|
||||
|
||||
|
||||
# use domna_address_1
|
||||
khalim_df = pd.read_excel("khalim_standard.xlsx")
|
||||
|
||||
|
||||
combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1')
|
||||
|
||||
# Find the row in khalim_df that does not app
|
||||
|
||||
result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])]
|
||||
|
||||
40
backend/address2UPRN/tests/test_csv.py
Normal file
40
backend/address2UPRN/tests/test_csv.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# tests/test_address_to_uprn_csv.py
|
||||
|
||||
import csv
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from backend.address2UPRN.main import get_uprn
|
||||
|
||||
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
|
||||
|
||||
|
||||
def load_test_cases():
|
||||
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
return [
|
||||
pytest.param(
|
||||
row["User Input"],
|
||||
row["Postcode"],
|
||||
row["Manual UPRN Code"],
|
||||
id=f'{row["User Input"]} [{row["Postcode"]}]',
|
||||
)
|
||||
for row in reader
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"user_input,postcode,expected_uprn",
|
||||
load_test_cases(),
|
||||
)
|
||||
def test_uprn_resolution_matches_manual(
|
||||
user_input: str,
|
||||
postcode: str,
|
||||
expected_uprn: str,
|
||||
):
|
||||
from utils.logger import setup_logger
|
||||
|
||||
uprn = get_uprn(user_input, postcode)
|
||||
if uprn:
|
||||
assert uprn == expected_uprn
|
||||
else:
|
||||
assert str(uprn) == expected_uprn
|
||||
167
backend/address2UPRN/tests/test_data.csv
Normal file
167
backend/address2UPRN/tests/test_data.csv
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
User Input,Postcode,Manual UPRN Code
|
||||
47 The Fairway,OX16 0RR,100120771697
|
||||
11 REGENT COURT,SL1 3LG,100081041562
|
||||
3/137a Windmill Road,TW8 9NH,100021516998
|
||||
Flat 33,SW18 4BE,100023328943
|
||||
FLAT 1 Brendon Grove,N2 8JE,200013412
|
||||
Flat 15,KT8 2NE,100062123759
|
||||
FLAT 5 Stonehill Road,W4 3AH,100021589829
|
||||
10 Douglas Court,SL7 1UQ,100081278099
|
||||
1 Windmill Road,HP17 8JA,766034606
|
||||
31 Denewood,HP13 7LH,100081095964
|
||||
"10, Greenways Drive",TW4 5DD,10091597009
|
||||
Flat 10,W4 3AH,"100021589834"
|
||||
Flat 11,TW4 5DD,10091597010
|
||||
Flat 11,W4 3AH,100021589835
|
||||
"12, Greenways Drive",TW4 5DD,10091597011
|
||||
"Flat 12, Forbes House",W4 3AH,100021589836
|
||||
FLAT 1 Goodstone Court,HA1 4FL,10070269053
|
||||
Flat 13,TW4 5DD,10091597012
|
||||
Flat 13,W4 3AH,100021589837
|
||||
Flat 14,TW4 5DD,10091597013
|
||||
Flat 14,W4 3AH,100021589838
|
||||
Flat 15,TW4 5DD,10091597014
|
||||
Flat 15,W4 3AH,100021589839
|
||||
Flat 16,TW4 5DD,"10091597015"
|
||||
Flat 16,W4 3AH,100021589840
|
||||
Flat 17,TW4 5DD,10091597016
|
||||
Flat 17,W4 3AH,100021589841
|
||||
Flat 18,TW4 5DD,10091597017
|
||||
Flat 19,W4 3AH,100021589843
|
||||
Flat 20,W4 3AH,100021589844
|
||||
Flat 21,W4 3AH,100021589845
|
||||
Flat 22,W4 3AH,100021589846
|
||||
FLAT 2 Goodstone Court,HA1 4FL,10070269054
|
||||
Flat 23,W4 3AH,100021589847
|
||||
Flat 24,W4 3AH,100021589848
|
||||
"30c, Bosanquet Close",UB8 3PE,100021475316
|
||||
"30e, Bosanquet Close",UB8 3PE,100021475318
|
||||
FLAT 3 Goodstone Court,HA1 4FL,10070269055
|
||||
FLAT 4 Goodstone Court,HA1 4FL,10070269056
|
||||
FLAT 5 Goodstone Court,HA1 4FL,10070269057
|
||||
FLAT 6 Goodstone Court,HA1 4FL,10070269058
|
||||
FLAT 7 Goodstone Court,HA1 4FL,10070269059
|
||||
FLAT 8 Goodstone Court,HA1 4FL,10070269060
|
||||
FLAT 9 Goodstone Court,HA1 4FL,10070269061
|
||||
FLAT 10 Goodstone Court,HA1 4FL,10070269062
|
||||
FLAT 11 Goodstone Court,HA1 4FL,10070269063
|
||||
FLAT 12 Goodstone Court,HA1 4FL,10070269064
|
||||
FLAT 13 Goodstone Court,HA1 4FL,10070269065
|
||||
FLAT 14 Goodstone Court,HA1 4FL,10070269066
|
||||
FLAT 15 Goodstone Court,HA1 4FL,10070269067
|
||||
FLAT 16 Goodstone Court,HA1 4FL,10070269068
|
||||
FLAT 17 Goodstone Court,HA1 4FL,10070269069
|
||||
FLAT 18 Goodstone Court,HA1 4FL,10070269070
|
||||
FLAT 19 Goodstone Court,HA1 4FL,10070269071
|
||||
FLAT 20 Goodstone Court,HA1 4FL,10070269072
|
||||
FLAT 21 Goodstone Court,HA1 4FL,10070269073
|
||||
FLAT 22 Goodstone Court,HA1 4FL,10070269074
|
||||
FLAT 23 Goodstone Court,HA1 4FL,10070269075
|
||||
FLAT 24 Goodstone Court,HA1 4FL,10070269076
|
||||
FLAT 25 Goodstone Court,HA1 4FL,10070269077
|
||||
FLAT 26 Goodstone Court,HA1 4FL,10070269078
|
||||
FLAT 27 Goodstone Court,HA1 4FL,10070269079
|
||||
FLAT 28 Goodstone Court,HA1 4FL,10070269080
|
||||
FLAT 29 Goodstone Court,HA1 4FL,10070269081
|
||||
FLAT 30 Goodstone Court,HA1 4FL,10070269082
|
||||
FLAT 31 Goodstone Court,HA1 4FL,10070269083
|
||||
FLAT 32 Goodstone Court,HA1 4FL,10070269084
|
||||
FLAT 33 Goodstone Court,HA1 4FL,10070269085
|
||||
FLAT 34 Goodstone Court,HA1 4FL,10070269086
|
||||
FLAT 35 Goodstone Court,HA1 4FL,10070269087
|
||||
FLAT 36 Goodstone Court,HA1 4FL,10070269088
|
||||
FLAT 37 Goodstone Court,HA1 4FL,10070269089
|
||||
FLAT 38 Goodstone Court,HA1 4FL,10070269090
|
||||
FLAT 39 Goodstone Court,HA1 4FL,10070269091
|
||||
FLAT 40 Goodstone Court,HA1 4FL,10070269092
|
||||
FLAT 41 Goodstone Court,HA1 4FL,10070269093
|
||||
FLAT 42 Goodstone Court,HA1 4FL,10070269094
|
||||
FLAT 43 Goodstone Court,HA1 4FL,10070269095
|
||||
"13 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778260
|
||||
"14 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778259
|
||||
"15 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778258
|
||||
"16 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778263
|
||||
"17 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778262
|
||||
"18 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778261
|
||||
"19 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778266
|
||||
"20 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778265
|
||||
"21 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778264
|
||||
90a Murray Road,W5 4DA,12135293
|
||||
"Flat 1, 6 Wolverton Gardens",W5 3LJ,"12119972"
|
||||
"1, Monsted House",UB1 1FG,12189944
|
||||
"10, Monsted House",UB1 1FG,12189953
|
||||
"20, Monsted House",UB1 1FG,12189963
|
||||
"2, Monsted House",UB1 1FG,12189945
|
||||
"3, Monsted House",UB1 1FG,12189946
|
||||
"4, Monsted House",UB1 1FG,12189947
|
||||
"5, Monsted House",UB1 1FG,12189948
|
||||
"6, Monsted House",UB1 1FG,12189949
|
||||
"7, Monsted House",UB1 1FG,12189950
|
||||
"8, Monsted House",UB1 1FG,12189951
|
||||
"9, Monsted House",UB1 1FG,12189952
|
||||
"1 Cullis House, 1, Accolade Avenue",UB1 1FH,12189904
|
||||
"2 Cullis House, 1, Accolade Avenue",UB1 1FH,12189905
|
||||
"3 Cullis House, 1, Accolade Avenue",UB1 1FH,12189906
|
||||
"4 Cullis House, 1, Accolade Avenue",UB1 1FH,12189907
|
||||
"5 Cullis House, 1, Accolade Avenue",UB1 1FH,12189908
|
||||
"6 Cullis House, 1, Accolade Avenue",UB1 1FH,12189909
|
||||
1 Genteel House Samara Drive,UB1 1FJ,12189835
|
||||
2 Genteel House Samara Drive,UB1 1FJ,12189836
|
||||
3 Genteel House Samara Drive,UB1 1FJ,12189837
|
||||
4 Genteel House Samara Drive,UB1 1FJ,12189838
|
||||
5 Genteel House Samara Drive,UB1 1FJ,12189839
|
||||
6 Genteel House Samara Drive,UB1 1FJ,12189840
|
||||
7 Genteel House Samara Drive,UB1 1FJ,12189841
|
||||
8 Genteel House Samara Drive,UB1 1FJ,12189842
|
||||
9 Genteel House Samara Drive,UB1 1FJ,12189843
|
||||
10 Genteel House Samara Drive,UB1 1FJ,12189844
|
||||
1 ASH TREE HOUSE,SE5 0TE,10009803979
|
||||
3 ASH TREE HOUSE,SE5 0TE,10009803981
|
||||
5 ASH TREE HOUSE,SE5 0TE,10009803983
|
||||
8 ASH TREE HOUSE,SE5 0TE,10009803986
|
||||
12 ASH TREE HOUSE,SE5 0TE,10009803990
|
||||
FLAT 1 599 HARROW ROAD,W10 4RA,217113930
|
||||
FLAT 2 599 HARROW ROAD,W10 4RA,217113931
|
||||
FLAT 3 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 4 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 5 599 HARROW ROAD,W10 4RA,217113934
|
||||
FLAT 6 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 7 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 8 599 HARROW ROAD,W10 4RA,None
|
||||
"Flat 1, Ohio Building",SE13 7RX,10023226256
|
||||
"Flat 2, Ohio Building",SE13 7RX,10023226257
|
||||
"Apartment 1 Block B, 105, Benwell Road",N7 7BW,10012792307
|
||||
"Apartment 2 Block B, 105, Benwell Road",N7 7BW,10012792308
|
||||
"Apartment 3 Block B, 105, Benwell Road",N7 7BW,10012792309
|
||||
"Apartment 4 Block B, 105, Benwell Road",N7 7BW,10012792310
|
||||
"Apartment 5 Block B, 105, Benwell Road",N7 7BW,10012792311
|
||||
"Apartment 6 Block B, 105, Benwell Road",N7 7BW,10012792312
|
||||
"Apartment 7 Block B, 105, Benwell Road",N7 7BW,10012792313
|
||||
"Apartment 8 Block B, 105, Benwell Road",N7 7BW,10012792314
|
||||
"Apartment 9 Block B, 105, Benwell Road",N7 7BW,10012792315
|
||||
"Apartment 10 Block B, 105, Benwell Road",N7 7BW,10012792316
|
||||
"Apartment 11 Block B, 105, Benwell Road",N7 7BW,10012792317
|
||||
"Apartment 12 Block B, 105, Benwell Road",N7 7BW,10012792318
|
||||
"Apartment 13 Block B, 105, Benwell Road",N7 7BW,10012792319
|
||||
"Apartment 1 Block D, 32, Hornsey Road",N7 7AT,10012792366
|
||||
"Apartment 2 Block D, 32, Hornsey Road",N7 7AT,10012792367
|
||||
"Apartment 3 Block D, 32, Hornsey Road",N7 7AT,10012792368
|
||||
"Apartment 4 Block D, 32, Hornsey Road",N7 7AT,10012792369
|
||||
"Apartment 5 Block D, 32, Hornsey Road",N7 7AT,10012792370
|
||||
"Apartment 6 Block D, 32, Hornsey Road",N7 7AT,"10012792371"
|
||||
"Apartment 7 Block D, 32, Hornsey Road",N7 7AT,10012792372
|
||||
"Apartment 8 Block D, 32, Hornsey Road",N7 7AT,10012792373
|
||||
"Apartment 9 Block D, 32, Hornsey Road",N7 7AT,10012792374
|
||||
"Apartment 10 Block D, 32, Hornsey Road",N7 7AT,10012792375
|
||||
"Apartment 11 Block D, 32, Hornsey Road",N7 7AT,10012792376
|
||||
"Apartment 12 Block D, 32, Hornsey Road",N7 7AT,10012792377
|
||||
"Apartment 13 Block D, 32, Hornsey Road",N7 7AT,10012792378
|
||||
"Apartment 14 Block D, 32, Hornsey Road",N7 7AT,10012792379
|
||||
"Apartment 15 Block D, 32, Hornsey Road",N7 7AT,10012792380
|
||||
"Apartment 16 Block D, 32, Hornsey Road",N7 7AT,"10012792381"
|
||||
"Apartment 17Block D, 32, Hornsey Road",N7 7AT,10012792382
|
||||
"Apartment 18 Block D, 32, Hornsey Road",N7 7AT,10012792383
|
||||
24b Honley Road,SE6 2HZ,None
|
||||
FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
|
||||
2 COLLEGE HOUSE,CM7 1JS,100091449870
|
||||
3 COLLEGE HOUSE,CM7 1JS,100091449871
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
# fastapi
|
||||
fastapi==0.115.2
|
||||
sqlalchemy==2.0.36
|
||||
|
|
@ -12,5 +13,4 @@ boto3==1.35.44
|
|||
openpyxl==3.1.2
|
||||
# Basic
|
||||
pytz
|
||||
sqlmodel
|
||||
|
||||
sqlmodel
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
# Pandas and numpy
|
||||
numpy==2.1.2
|
||||
pandas==2.2.3
|
||||
|
|
@ -22,4 +23,4 @@ pyarrow==17.0.0
|
|||
fastparquet==2024.5.0
|
||||
aiohttp==3.10.10
|
||||
# find my epc
|
||||
beautifulsoup4
|
||||
beautifulsoup4
|
||||
BIN
backend/postcode_splitter/hackney.xlsx
Normal file
BIN
backend/postcode_splitter/hackney.xlsx
Normal file
Binary file not shown.
114
backend/postcode_splitter/main.py
Normal file
114
backend/postcode_splitter/main.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
|
||||
def sanitise_postcode(postcode: str) -> str | None:
|
||||
"""
|
||||
Normalise postcode for grouping.
|
||||
|
||||
- Uppercase
|
||||
- Remove all whitespace
|
||||
"""
|
||||
if pd.isna(postcode):
|
||||
return None
|
||||
|
||||
return postcode.upper().replace(" ", "")
|
||||
|
||||
|
||||
def is_valid_postcode(postcode_clean: str) -> bool:
|
||||
"""
|
||||
Validate postcode using postcodes.io.
|
||||
|
||||
Expects a sanitised postcode (e.g. E84SQ).
|
||||
Returns True if valid, False otherwise.
|
||||
"""
|
||||
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
|
||||
if not postcode_clean:
|
||||
return False
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
|
||||
timeout=5,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("result", False)
|
||||
except requests.RequestException:
|
||||
# Network issues, rate limits, etc.
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability")
|
||||
df = df.head(500)
|
||||
|
||||
# Sanitise postcodes
|
||||
df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
|
||||
|
||||
# --- validate AFTER grouping (save API calls) ---
|
||||
|
||||
# Get unique, non-null postcodes
|
||||
unique_postcodes = (
|
||||
df["postcode_clean"]
|
||||
.dropna()
|
||||
.unique()
|
||||
)
|
||||
|
||||
# Validate each postcode once, TODOadd a progress bar
|
||||
postcode_validity = {
|
||||
pc: is_valid_postcode(pc)
|
||||
for pc in tqdm(unique_postcodes, total=len(unique_postcodes))
|
||||
}
|
||||
|
||||
# Map validity back onto dataframe
|
||||
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
|
||||
|
||||
|
||||
results = []
|
||||
|
||||
for postcode, group_df in tqdm(
|
||||
df[df["postcode_valid"]].groupby("postcode_clean"),
|
||||
desc="Resolving UPRNs by postcode",
|
||||
):
|
||||
try:
|
||||
epc_df = get_epc_data_with_postcode(postcode)
|
||||
|
||||
if epc_df.empty:
|
||||
tmp = group_df.copy()
|
||||
tmp["found_uprn"] = None
|
||||
tmp["status"] = "no_epc_results"
|
||||
results.append(tmp)
|
||||
continue
|
||||
|
||||
resolved = resolve_uprns_for_postcode_group(
|
||||
group_df=group_df,
|
||||
epc_df=epc_df,
|
||||
)
|
||||
|
||||
results.append(resolved)
|
||||
|
||||
except Exception as e:
|
||||
tmp = group_df.copy()
|
||||
tmp["found_uprn"] = None
|
||||
tmp["status"] = "exception"
|
||||
tmp["error"] = str(e)
|
||||
results.append(tmp)
|
||||
|
||||
final_df = pd.concat(results, ignore_index=True)
|
||||
a = final_df[[
|
||||
"best_match_lexiscore","Address 1",
|
||||
"best_match_address", "Postcode",
|
||||
"UPRN", "best_match_uprn"
|
||||
]] # add levi score to viewing
|
||||
b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing
|
||||
b = b[[
|
||||
"best_match_lexiscore","Address 1",
|
||||
"best_match_address", "Postcode",
|
||||
"UPRN", "best_match_uprn"
|
||||
]]
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
11
conftest.py
11
conftest.py
|
|
@ -1,5 +1,11 @@
|
|||
import os
|
||||
from backend.app.config import get_settings
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
# Load .env in conftest.py directory for local development
|
||||
load_dotenv()
|
||||
|
||||
DEFAULT_ENV = {
|
||||
"API_KEY": "test",
|
||||
|
|
@ -8,7 +14,10 @@ DEFAULT_ENV = {
|
|||
"DATA_BUCKET": "test",
|
||||
"PLAN_TRIGGER_BUCKET": "test",
|
||||
"ENGINE_SQS_URL": "test",
|
||||
"EPC_AUTH_TOKEN": "test", # overridden in GitHub Actions
|
||||
"EPC_AUTH_TOKEN": os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
"test",
|
||||
), # overridden in GitHub Actions
|
||||
"GOOGLE_SOLAR_API_KEY": "test",
|
||||
"DB_HOST": "localhost",
|
||||
"DB_USERNAME": "test",
|
||||
|
|
|
|||
|
|
@ -1,111 +1,111 @@
|
|||
import pandas as pd
|
||||
|
||||
epc_c_recommendations = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
"solid floor, ashp 3.0 - corrected.xlsx"
|
||||
)
|
||||
epc_b_recommendations = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
|
||||
"solid floor, ashp 3.0 - corrected.xlsx"
|
||||
)
|
||||
# epc_c_recommendations = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
# "solid floor, ashp 3.0 - corrected.xlsx"
|
||||
# )
|
||||
# epc_b_recommendations = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
|
||||
# "solid floor, ashp 3.0 - corrected.xlsx"
|
||||
# )
|
||||
|
||||
epc_c_movers = epc_b_recommendations[
|
||||
epc_b_recommendations["current_epc_rating"] == "Epc.C"
|
||||
]
|
||||
epc_c_movers["property_type"].value_counts()
|
||||
# epc_c_movers = epc_b_recommendations[
|
||||
# epc_b_recommendations["current_epc_rating"] == "Epc.C"
|
||||
# ]
|
||||
# epc_c_movers["property_type"].value_counts()
|
||||
|
||||
house_epc_c_movers = epc_c_movers[
|
||||
epc_c_movers["property_type"] == "House"
|
||||
]
|
||||
house_epc_c_movers_with_solar = house_epc_c_movers[
|
||||
~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
|
||||
]
|
||||
# house_epc_c_movers = epc_c_movers[
|
||||
# epc_c_movers["property_type"] == "House"
|
||||
# ]
|
||||
# house_epc_c_movers_with_solar = house_epc_c_movers[
|
||||
# ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
|
||||
# ]
|
||||
|
||||
house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
|
||||
~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
|
||||
]
|
||||
# house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
|
||||
# ~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
|
||||
# ]
|
||||
|
||||
flat_epc_c_movers = epc_c_movers[
|
||||
epc_c_movers["property_type"] == "Flat"
|
||||
]
|
||||
# flat_epc_c_movers = epc_c_movers[
|
||||
# epc_c_movers["property_type"] == "Flat"
|
||||
# ]
|
||||
|
||||
epc_c_recommendations["sap_points"].mean()
|
||||
epc_c_recommendations["sap_points"].mean()
|
||||
# epc_c_recommendations["sap_points"].mean()
|
||||
# epc_c_recommendations["sap_points"].mean()
|
||||
|
||||
measure_cols = [
|
||||
"air_source_heat_pump",
|
||||
"boiler_upgrade",
|
||||
"cavity_wall_insulation",
|
||||
"double_glazing",
|
||||
"external_wall_insulation",
|
||||
"flat_roof_insulation",
|
||||
"high_heat_retention_storage_heaters",
|
||||
"internal_wall_insulation",
|
||||
"loft_insulation",
|
||||
"low_energy_lighting",
|
||||
"mechanical_ventilation",
|
||||
"room_roof_insulation",
|
||||
"roomstat_programmer_trvs",
|
||||
"sealing_open_fireplace",
|
||||
"secondary_glazing",
|
||||
"secondary_heating",
|
||||
"solar_pv",
|
||||
"solar_pv_with_battery",
|
||||
"suspended_floor_insulation",
|
||||
"time_temperature_zone_control",
|
||||
]
|
||||
# measure_cols = [
|
||||
# "air_source_heat_pump",
|
||||
# "boiler_upgrade",
|
||||
# "cavity_wall_insulation",
|
||||
# "double_glazing",
|
||||
# "external_wall_insulation",
|
||||
# "flat_roof_insulation",
|
||||
# "high_heat_retention_storage_heaters",
|
||||
# "internal_wall_insulation",
|
||||
# "loft_insulation",
|
||||
# "low_energy_lighting",
|
||||
# "mechanical_ventilation",
|
||||
# "room_roof_insulation",
|
||||
# "roomstat_programmer_trvs",
|
||||
# "sealing_open_fireplace",
|
||||
# "secondary_glazing",
|
||||
# "secondary_heating",
|
||||
# "solar_pv",
|
||||
# "solar_pv_with_battery",
|
||||
# "suspended_floor_insulation",
|
||||
# "time_temperature_zone_control",
|
||||
# ]
|
||||
|
||||
epc_c_melted = (
|
||||
epc_c_recommendations
|
||||
.melt(
|
||||
id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
|
||||
value_vars=measure_cols,
|
||||
var_name="measure_type",
|
||||
value_name="value",
|
||||
)
|
||||
.dropna(subset=["value"])
|
||||
)
|
||||
epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
|
||||
epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
|
||||
# epc_c_melted = (
|
||||
# epc_c_recommendations
|
||||
# .melt(
|
||||
# id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
|
||||
# value_vars=measure_cols,
|
||||
# var_name="measure_type",
|
||||
# value_name="value",
|
||||
# )
|
||||
# .dropna(subset=["value"])
|
||||
# )
|
||||
# epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
|
||||
# epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
|
||||
|
||||
epc_b_melted = (
|
||||
epc_b_recommendations
|
||||
.melt(
|
||||
id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
|
||||
value_vars=measure_cols,
|
||||
var_name="measure_type",
|
||||
value_name="value",
|
||||
)
|
||||
.dropna(subset=["value"])
|
||||
)
|
||||
# epc_b_melted = (
|
||||
# epc_b_recommendations
|
||||
# .melt(
|
||||
# id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
|
||||
# value_vars=measure_cols,
|
||||
# var_name="measure_type",
|
||||
# value_name="value",
|
||||
# )
|
||||
# .dropna(subset=["value"])
|
||||
# )
|
||||
|
||||
epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
|
||||
epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
|
||||
# epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
|
||||
# epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
|
||||
|
||||
measures_compared = epc_c_measures.merge(
|
||||
epc_b_measures,
|
||||
left_on="measure_type",
|
||||
right_on="measure_type",
|
||||
suffixes=("_epc_c", "_epc_b"),
|
||||
)
|
||||
# measures_compared = epc_c_measures.merge(
|
||||
# epc_b_measures,
|
||||
# left_on="measure_type",
|
||||
# right_on="measure_type",
|
||||
# suffixes=("_epc_c", "_epc_b"),
|
||||
# )
|
||||
|
||||
epc_c_retrofits = epc_c_recommendations[
|
||||
epc_c_recommendations["total_retrofit_cost"] > 0
|
||||
]
|
||||
# epc_c_retrofits = epc_c_recommendations[
|
||||
# epc_c_recommendations["total_retrofit_cost"] > 0
|
||||
# ]
|
||||
|
||||
epc_b_retrofits = epc_b_recommendations[
|
||||
epc_b_recommendations["total_retrofit_cost"] > 0
|
||||
]
|
||||
# epc_b_retrofits = epc_b_recommendations[
|
||||
# epc_b_recommendations["total_retrofit_cost"] > 0
|
||||
# ]
|
||||
|
||||
epc_c_retrofits["sap_points"].mean()
|
||||
epc_b_retrofits["sap_points"].mean()
|
||||
# epc_c_retrofits["sap_points"].mean()
|
||||
# epc_b_retrofits["sap_points"].mean()
|
||||
|
||||
properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
|
||||
# properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
|
||||
|
||||
properties_in_both["total_retrofit_cost_epc_c"].mean()
|
||||
properties_in_both["sap_points_epc_c"].mean()
|
||||
properties_in_both["total_retrofit_cost_epc_b"].mean()
|
||||
properties_in_both["sap_points_epc_b"].mean()
|
||||
# properties_in_both["total_retrofit_cost_epc_c"].mean()
|
||||
# properties_in_both["sap_points_epc_c"].mean()
|
||||
# properties_in_both["total_retrofit_cost_epc_b"].mean()
|
||||
# properties_in_both["sap_points_epc_b"].mean()
|
||||
|
||||
# Solar PV savings - we need the amount of solar PV bill savings
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
|
@ -114,16 +114,12 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco
|
|||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
||||
from collections import defaultdict
|
||||
|
||||
PORTFOLIO_ID = 435 # Peabody
|
||||
PORTFOLIO_ID = 485 # Peabody
|
||||
SCENARIOS = [
|
||||
908,
|
||||
909,
|
||||
910,
|
||||
970
|
||||
]
|
||||
scenario_names = {
|
||||
908: "EPC C - no solid floor, ashp 3.0",
|
||||
909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
|
||||
910: "EPC B - no solid floor, no EWI, ashp 3.0"
|
||||
970: "EPC C - no solid floor, ashp 3.0",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -236,307 +232,266 @@ recommendations_df = pd.DataFrame(recommendations_data)
|
|||
properties_df = pd.DataFrame(properties_data)
|
||||
plans_df = pd.DataFrame(plans_data)
|
||||
|
||||
s_id = 910
|
||||
ps_w_a_plan = plans_df[plans_df["scenario_id"] == s_id].copy()
|
||||
# Take the newest by scenario id
|
||||
ps_w_a_plan = ps_w_a_plan.sort_values("created_at", ascending=False).drop_duplicates(
|
||||
subset=["property_id"]
|
||||
)
|
||||
z = ps_w_a_plan[
|
||||
ps_w_a_plan["cost_of_works"] > 0
|
||||
].copy()
|
||||
z2 = properties_df[properties_df["property_id"].isin(z["property_id"].values)]
|
||||
# '', 'hot_water_cost_current',
|
||||
# 'lighting_cost_current', 'appliances_cost_current',
|
||||
# 'gas_standing_charge', 'electricity_standing_charge'
|
||||
z2["total_bills"] = z2["heating_cost_current"] + z2["hot_water_cost_current"] + z2["lighting_cost_current"] + z2[
|
||||
"appliances_cost_current"
|
||||
] + z2["gas_standing_charge"] + z2["electricity_standing_charge"]
|
||||
with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
|
||||
recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
|
||||
properties_df.to_excel(writer, sheet_name="properties", index=False)
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
|
||||
# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
|
||||
|
||||
# For a property ID, find a property where the no EWI/IWI approach is more expensive than the EWI approach
|
||||
pids = properties_df["property_id"].unique()
|
||||
for pid in tqdm(pids):
|
||||
|
||||
if pid in [603272, 550550, 574493]:
|
||||
continue
|
||||
|
||||
# get the plans
|
||||
property_plan = plans_df[plans_df["property_id"] == int(pid)]
|
||||
# Take the newest plan by scenario id
|
||||
property_plan = property_plan.sort_values("created_at", ascending=False).drop_duplicates(
|
||||
subset=["scenario_id"]
|
||||
)
|
||||
a = property_plan[property_plan["scenario_id"] == 909].squeeze() # no EWI/IWI
|
||||
b = property_plan[property_plan["scenario_id"] == 908].squeeze() # EWI
|
||||
if (a["cost_of_works"] > b["cost_of_works"]) and (
|
||||
a["post_epc_rating"].value == "C") and (b["cost_of_works"] > 5000):
|
||||
bah
|
||||
# # Check tenures
|
||||
# initial_asset_data = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||
# "- Data Extracts for Domna.xlsx",
|
||||
# sheet_name="Properties"
|
||||
# )
|
||||
# sustainability_data = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||
# "- Data Extracts for Domna.xlsx",
|
||||
# sheet_name="Sustainability"
|
||||
# )
|
||||
|
||||
solar_pv_recommendations = recommendations_df[
|
||||
recommendations_df["measure_type"] == "solar_pv"
|
||||
]
|
||||
# sustainability_sample = sustainability_data[
|
||||
# sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
|
||||
# ]
|
||||
|
||||
solid_wall_recommendation = recommendations_df[
|
||||
recommendations_df["scenario_id"].isin([908]) &
|
||||
recommendations_df["measure_type"].isin(["internal_wall_insulation"]) &
|
||||
recommendations_df["default"]
|
||||
]
|
||||
average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
|
||||
# Add on scenarion names
|
||||
average_savings["scenario_name"] = average_savings["scenario_id"].map(scenario_names)
|
||||
# sustainability_sample = sustainability_sample.merge(
|
||||
# initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
|
||||
# )
|
||||
|
||||
# Check tenures
|
||||
initial_asset_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||
"- Data Extracts for Domna.xlsx",
|
||||
sheet_name="Properties"
|
||||
)
|
||||
sustainability_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||
"- Data Extracts for Domna.xlsx",
|
||||
sheet_name="Sustainability"
|
||||
)
|
||||
# block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
|
||||
# block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
|
||||
|
||||
sustainability_sample = sustainability_data[
|
||||
sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
|
||||
]
|
||||
# initial_asset_data.columns
|
||||
# initial_asset_data["LeaseType"].value_counts()
|
||||
|
||||
sustainability_sample = sustainability_sample.merge(
|
||||
initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
|
||||
)
|
||||
# # sustainability_sample["Tenure Group"].value_counts()
|
||||
# # Tenure Group
|
||||
# # General Needs 57787
|
||||
# # Home Ownership 25471
|
||||
# # Care & Supported Housing 4239
|
||||
# # Rental 2677
|
||||
# # Other 188
|
||||
|
||||
block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
|
||||
block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
|
||||
# df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
|
||||
# df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
|
||||
|
||||
initial_asset_data.columns
|
||||
initial_asset_data["LeaseType"].value_counts()
|
||||
# tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
|
||||
# tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
|
||||
|
||||
# sustainability_sample["Tenure Group"].value_counts()
|
||||
# Tenure Group
|
||||
# General Needs 57787
|
||||
# Home Ownership 25471
|
||||
# Care & Supported Housing 4239
|
||||
# Rental 2677
|
||||
# Other 188
|
||||
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
|
||||
|
||||
df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
|
||||
df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
|
||||
# sample_data = initial_asset_data[
|
||||
# ~initial_asset_data["Ownership Type"].isin(
|
||||
# [
|
||||
# # Commercial # Everything is resi - based on the Residential Indicator variable - all are true
|
||||
# # Freeholder
|
||||
# "FREEHOLDER", # 19517 properties
|
||||
# # HOMEBUY / EQUITY LOAN
|
||||
# "Rent to Homebuy", # 1 property
|
||||
# # Leaseholder
|
||||
# "LEASEHOLD 100%", # 8455 properties
|
||||
# "Owned and Managed - 999 year lease", # 2076 properties
|
||||
# "Managed but not Owned-Private Lease", # 159 properties
|
||||
# "Owned and managed LEASEHOLD", # 26 properties
|
||||
# # Outright Sale - can't find anything matching
|
||||
# # SHARED EQUITY
|
||||
# "Shared Ownership", # 4065 properties
|
||||
# "Shared Ownership Owned Not Managed", # 23 properties
|
||||
# # Extra categories which seem sensible to exclude
|
||||
# "NOT MANAGED AND NOT OWNED"
|
||||
# ]
|
||||
# )
|
||||
# ]
|
||||
|
||||
tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
|
||||
tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
|
||||
# sample_data["Ownership Type"].value_counts()
|
||||
|
||||
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
|
||||
# sample_data = initial_asset_data[
|
||||
# initial_asset_data["Ownership Type"].isin(
|
||||
# [
|
||||
# "Owned and Managed",
|
||||
# "Owned and Managed - 999 year lease",
|
||||
# "Owned and managed LEASEHOLD",
|
||||
# "LEASEHOLD 100%",
|
||||
# "DATALOAD DEFAULT"
|
||||
# ]
|
||||
# )
|
||||
# ]
|
||||
# dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
|
||||
# dropped["Ownership Type"].value_counts()
|
||||
|
||||
sample_data = initial_asset_data[
|
||||
~initial_asset_data["Ownership Type"].isin(
|
||||
[
|
||||
# Commercial # Everything is resi - based on the Residential Indicator variable - all are true
|
||||
# Freeholder
|
||||
"FREEHOLDER", # 19517 properties
|
||||
# HOMEBUY / EQUITY LOAN
|
||||
"Rent to Homebuy", # 1 property
|
||||
# Leaseholder
|
||||
"LEASEHOLD 100%", # 8455 properties
|
||||
"Owned and Managed - 999 year lease", # 2076 properties
|
||||
"Managed but not Owned-Private Lease", # 159 properties
|
||||
"Owned and managed LEASEHOLD", # 26 properties
|
||||
# Outright Sale - can't find anything matching
|
||||
# SHARED EQUITY
|
||||
"Shared Ownership", # 4065 properties
|
||||
"Shared Ownership Owned Not Managed", # 23 properties
|
||||
# Extra categories which seem sensible to exclude
|
||||
"NOT MANAGED AND NOT OWNED"
|
||||
]
|
||||
)
|
||||
]
|
||||
# for value in [
|
||||
# # Commercial # Everything is resi, so should be fine. No matches
|
||||
# # Freeholder
|
||||
# "FREEHOLDER", # 19517 properties
|
||||
# # HOMEBUY / EQUITY LOAN
|
||||
# "Rent to Homebuy", # 1 property
|
||||
# # Leaseholder
|
||||
# "LEASEHOLD 100%", # 8455 properties
|
||||
# "Owned and Managed - 999 year lease", # 2076 properties
|
||||
# "Managed but not Owned-Private Lease", # 159 properties
|
||||
# "Owned and managed LEASEHOLD", # 26 properties
|
||||
# # Outright Sale - can't find anything matching
|
||||
# # SHARED EQUITY
|
||||
# "Shared Ownership", # 4065 properties
|
||||
# "Shared Ownership Owned Not Managed", # 23 properties
|
||||
# ]:
|
||||
# print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
|
||||
|
||||
sample_data["Ownership Type"].value_counts()
|
||||
# house_types = [
|
||||
# "HOUSE",
|
||||
# "BUNGALOW",
|
||||
# "MAISONETTE",
|
||||
# "DUPLEX",
|
||||
# ]
|
||||
|
||||
sample_data = initial_asset_data[
|
||||
initial_asset_data["Ownership Type"].isin(
|
||||
[
|
||||
"Owned and Managed",
|
||||
"Owned and Managed - 999 year lease",
|
||||
"Owned and managed LEASEHOLD",
|
||||
"LEASEHOLD 100%",
|
||||
"DATALOAD DEFAULT"
|
||||
]
|
||||
)
|
||||
]
|
||||
dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
|
||||
dropped["Ownership Type"].value_counts()
|
||||
# guaranteed_control = [
|
||||
# "Owned and Managed",
|
||||
# "Owned and Managed - 999 year lease",
|
||||
# "Owned and managed LEASEHOLD",
|
||||
# "LEASEHOLD 100%",
|
||||
# "DATALOAD DEFAULT",
|
||||
# ]
|
||||
|
||||
for value in [
|
||||
# Commercial # Everything is resi, so should be fine. No matches
|
||||
# Freeholder
|
||||
"FREEHOLDER", # 19517 properties
|
||||
# HOMEBUY / EQUITY LOAN
|
||||
"Rent to Homebuy", # 1 property
|
||||
# Leaseholder
|
||||
"LEASEHOLD 100%", # 8455 properties
|
||||
"Owned and Managed - 999 year lease", # 2076 properties
|
||||
"Managed but not Owned-Private Lease", # 159 properties
|
||||
"Owned and managed LEASEHOLD", # 26 properties
|
||||
# Outright Sale - can't find anything matching
|
||||
# SHARED EQUITY
|
||||
"Shared Ownership", # 4065 properties
|
||||
"Shared Ownership Owned Not Managed", # 23 properties
|
||||
]:
|
||||
print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
|
||||
# sample_data = initial_asset_data[
|
||||
# (
|
||||
# initial_asset_data["Ownership Type"].isin(guaranteed_control)
|
||||
# )
|
||||
# |
|
||||
# (
|
||||
# (initial_asset_data["Ownership Type"] == "FREEHOLDER")
|
||||
# &
|
||||
# (initial_asset_data["Property Type"].isin(house_types))
|
||||
# )
|
||||
# ]
|
||||
|
||||
house_types = [
|
||||
"HOUSE",
|
||||
"BUNGALOW",
|
||||
"MAISONETTE",
|
||||
"DUPLEX",
|
||||
]
|
||||
# fabric_retrofit_sample = initial_asset_data[
|
||||
# initial_asset_data["Ownership Type"].isin(
|
||||
# [
|
||||
# "Owned and Managed",
|
||||
# "FREEHOLDER",
|
||||
# "DATALOAD DEFAULT",
|
||||
# ]
|
||||
# )
|
||||
# ]
|
||||
|
||||
guaranteed_control = [
|
||||
"Owned and Managed",
|
||||
"Owned and Managed - 999 year lease",
|
||||
"Owned and managed LEASEHOLD",
|
||||
"LEASEHOLD 100%",
|
||||
"DATALOAD DEFAULT",
|
||||
]
|
||||
# initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
|
||||
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
|
||||
|
||||
sample_data = initial_asset_data[
|
||||
(
|
||||
initial_asset_data["Ownership Type"].isin(guaranteed_control)
|
||||
)
|
||||
|
|
||||
(
|
||||
(initial_asset_data["Ownership Type"] == "FREEHOLDER")
|
||||
&
|
||||
(initial_asset_data["Property Type"].isin(house_types))
|
||||
)
|
||||
]
|
||||
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
|
||||
# z = initial_asset_data[
|
||||
# ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
|
||||
# ]
|
||||
|
||||
fabric_retrofit_sample = initial_asset_data[
|
||||
initial_asset_data["Ownership Type"].isin(
|
||||
[
|
||||
"Owned and Managed",
|
||||
"FREEHOLDER",
|
||||
"DATALOAD DEFAULT",
|
||||
]
|
||||
)
|
||||
]
|
||||
# block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
|
||||
# zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
|
||||
|
||||
initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
|
||||
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
|
||||
# potential_sample = initial_asset_data[
|
||||
# ~pd.isnull(initial_asset_data["BlockCode"])
|
||||
# ]
|
||||
|
||||
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
|
||||
z = initial_asset_data[
|
||||
~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
|
||||
]
|
||||
# compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
|
||||
# initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
# left_on="Property Type",
|
||||
# right_on="Property Type",
|
||||
# suffixes=("_on_block_codes", "_overall")
|
||||
# )
|
||||
|
||||
block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
|
||||
zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
|
||||
# # Comparison of smaller sample vs overall
|
||||
# new_asset_data = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||
# "- Peabody "
|
||||
# "- Data Extracts for Domna v2.xlsx",
|
||||
# sheet_name="Properties"
|
||||
# )
|
||||
|
||||
potential_sample = initial_asset_data[
|
||||
~pd.isnull(initial_asset_data["BlockCode"])
|
||||
]
|
||||
# new_sustainability_data = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||
# "- Peabody "
|
||||
# "- Data Extracts for Domna v2.xlsx",
|
||||
# sheet_name="Sustainability"
|
||||
# )
|
||||
|
||||
compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
|
||||
initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
left_on="Property Type",
|
||||
right_on="Property Type",
|
||||
suffixes=("_on_block_codes", "_overall")
|
||||
)
|
||||
# sap_bands = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
|
||||
# "08012026.xlsx",
|
||||
# )
|
||||
|
||||
# Comparison of smaller sample vs overall
|
||||
new_asset_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||
"- Peabody "
|
||||
"- Data Extracts for Domna v2.xlsx",
|
||||
sheet_name="Properties"
|
||||
)
|
||||
# combined = new_asset_data.merge(
|
||||
# new_sustainability_data,
|
||||
# left_on="UPRN",
|
||||
# right_on="Org Ref",
|
||||
# suffixes=("_asset", "_sustainability")
|
||||
# ).merge(
|
||||
# sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
|
||||
# )
|
||||
# reduced_sample = combined[
|
||||
# ~combined["AH Tenure"].isin(
|
||||
# ["Commercial",
|
||||
# "Freeholder",
|
||||
# "HOMEBUY / EQUITY LOAN",
|
||||
# "Leaseholder",
|
||||
# "Outright Sale",
|
||||
# "SHARED EQUITY",
|
||||
# "Shared Ownership"]
|
||||
# )
|
||||
# ].copy()
|
||||
|
||||
new_sustainability_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||
"- Peabody "
|
||||
"- Data Extracts for Domna v2.xlsx",
|
||||
sheet_name="Sustainability"
|
||||
)
|
||||
# # property types
|
||||
# property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
|
||||
# combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
# left_on="Property Type",
|
||||
# right_on="Property Type",
|
||||
# suffixes=("_reduced_sample", "_overall")
|
||||
# )
|
||||
|
||||
sap_bands = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
|
||||
"08012026.xlsx",
|
||||
)
|
||||
# # lodged ratings
|
||||
# lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
|
||||
# normalize=True).to_frame().reset_index().merge(
|
||||
# combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
# left_on="Lodged EPC Band",
|
||||
# right_on="Lodged EPC Band",
|
||||
# suffixes=("_reduced_sample", "_overall")
|
||||
# )
|
||||
|
||||
combined = new_asset_data.merge(
|
||||
new_sustainability_data,
|
||||
left_on="UPRN",
|
||||
right_on="Org Ref",
|
||||
suffixes=("_asset", "_sustainability")
|
||||
).merge(
|
||||
sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
|
||||
)
|
||||
reduced_sample = combined[
|
||||
~combined["AH Tenure"].isin(
|
||||
["Commercial",
|
||||
"Freeholder",
|
||||
"HOMEBUY / EQUITY LOAN",
|
||||
"Leaseholder",
|
||||
"Outright Sale",
|
||||
"SHARED EQUITY",
|
||||
"Shared Ownership"]
|
||||
)
|
||||
].copy()
|
||||
# # modelled ratings
|
||||
# modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
|
||||
# normalize=True).to_frame().reset_index().merge(
|
||||
# combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
# left_on="SAP Band",
|
||||
# right_on="SAP Band",
|
||||
# suffixes=("_reduced_sample", "_overall")
|
||||
# )
|
||||
|
||||
# property types
|
||||
property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
|
||||
combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
left_on="Property Type",
|
||||
right_on="Property Type",
|
||||
suffixes=("_reduced_sample", "_overall")
|
||||
)
|
||||
# # Testing measures
|
||||
# m1 = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
# "solid floor, ashp 3.0 - 20250113 final.xlsx"
|
||||
# )
|
||||
# m2 = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
# "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
|
||||
# )
|
||||
|
||||
# lodged ratings
|
||||
lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
|
||||
normalize=True).to_frame().reset_index().merge(
|
||||
combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
left_on="Lodged EPC Band",
|
||||
right_on="Lodged EPC Band",
|
||||
suffixes=("_reduced_sample", "_overall")
|
||||
)
|
||||
# compare = m1.merge(
|
||||
# m2,
|
||||
# left_on="uprn",
|
||||
# right_on="uprn",
|
||||
# suffixes=("_ewi_iwi", "_no_ewi_iwi")
|
||||
# )
|
||||
|
||||
# modelled ratings
|
||||
modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
|
||||
normalize=True).to_frame().reset_index().merge(
|
||||
combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
left_on="SAP Band",
|
||||
right_on="SAP Band",
|
||||
suffixes=("_reduced_sample", "_overall")
|
||||
)
|
||||
# # Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
|
||||
# only_no_ewi_iwi = compare[
|
||||
# (compare["total_retrofit_cost_ewi_iwi"] == 0) &
|
||||
# (compare["total_retrofit_cost_no_ewi_iwi"] != 0)
|
||||
# ]
|
||||
|
||||
# Testing measures
|
||||
m1 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
"solid floor, ashp 3.0 - 20250113 final.xlsx"
|
||||
)
|
||||
m2 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
"solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
|
||||
)
|
||||
# (m1["total_retrofit_cost"] > 0).sum()
|
||||
# (m2["total_retrofit_cost"] > 0).sum()
|
||||
|
||||
compare = m1.merge(
|
||||
m2,
|
||||
left_on="uprn",
|
||||
right_on="uprn",
|
||||
suffixes=("_ewi_iwi", "_no_ewi_iwi")
|
||||
)
|
||||
# with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
|
||||
|
||||
# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
|
||||
only_no_ewi_iwi = compare[
|
||||
(compare["total_retrofit_cost_ewi_iwi"] == 0) &
|
||||
(compare["total_retrofit_cost_no_ewi_iwi"] != 0)
|
||||
]
|
||||
|
||||
(m1["total_retrofit_cost"] > 0).sum()
|
||||
(m2["total_retrofit_cost"] > 0).sum()
|
||||
|
||||
with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
|
||||
|
||||
z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
|
||||
# z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
pydantic==2.9.2
|
||||
pydantic>=1.10.7
|
||||
pydantic-settings==2.6.0
|
||||
epc-api-python==1.0.2
|
||||
numpy==2.1.2
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
[pytest]
|
||||
pythonpath = .
|
||||
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests
|
||||
|
|
|
|||
|
|
@ -15,16 +15,12 @@ from sqlalchemy import func
|
|||
|
||||
# PORTFOLIO_ID = 206
|
||||
# SCENARIOS = [389]
|
||||
PORTFOLIO_ID = 435 # Peabody
|
||||
PORTFOLIO_ID = 485 # Peabody
|
||||
SCENARIOS = [
|
||||
908,
|
||||
909,
|
||||
910,
|
||||
970,
|
||||
]
|
||||
scenario_names = {
|
||||
908: "EPC C - no solid floor, ashp 3.0",
|
||||
909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
|
||||
910: "EPC B - no solid floor, no EWI, ashp 3.0"
|
||||
970: "EPC C - No solid floor, EQI, IWI",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -295,6 +291,11 @@ for scenario_id in SCENARIOS:
|
|||
df[df["predicted_post_works_sap"] == ""]
|
||||
|
||||
# Create excel to store to
|
||||
<<<<<<< HEAD
|
||||
filename = (f"{scenario_names[scenario_id]} - 20250113 final.xlsx")
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
df.to_excel(writer, sheet_name="properties", index=False)
|
||||
=======
|
||||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
||||
f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx")
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
|
|
@ -475,3 +476,4 @@ dupes = plans_df2[plans_df2["property_id"].duplicated()]
|
|||
example = example.merge(
|
||||
plans_df, how="left",
|
||||
)
|
||||
>>>>>>> 3874da6177cbcc37f7a488bec0a06e387906653c
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue