Merge pull request #369 from Hestia-Homes/prs-oo-route-march

Prs oo route march
This commit is contained in:
KhalimCK 2024-11-05 14:57:46 +00:00 committed by GitHub
commit 2b6c86e557
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 255 additions and 2 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Route-March" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Route-March" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -2,6 +2,7 @@ import os
import time
import re
from urllib.parse import urlencode
import usaddress
import pandas as pd
import numpy as np
@ -257,6 +258,8 @@ class SearchEpc:
params = {"address": self.address1, "postcode": self.postcode}
url = os.path.join(self.client.domestic.host, "search")
if size:
url += "?" + urlencode({k: v for k, v in {"size": size}.items() if v})
for retry in range(self.max_retries):
try:

View file

@ -0,0 +1,240 @@
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from dotenv import load_dotenv
from urllib.parse import urlencode
from epc_api.client import EpcClient
from utils.logger import setup_logger
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from recommendations.recommendation_utils import (
estimate_perimeter,
estimate_external_wall_area,
estimate_number_of_floors
)
logger = setup_logger()
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
CONFIG = [
{
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
"11.11.2024.xlsx",
"tab": "SETTLE GBIS x 242 ",
"postcode_column": "Postcode",
},
{
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
"11.11.2024.xlsx",
"tab": "ACIS GBIS x 76",
"postcode_column": "Postcode",
},
{
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
"11.11.2024.xlsx",
"tab": "SOUTHERN GBIS x 150",
"postcode_column": "Postcode",
},
{
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
"11.11.2024.xlsx",
"tab": "COMMUNITY HOUSING GBIS x 199",
"postcode_column": "Postcode",
},
{
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
"11.11.2024.xlsx",
"tab": "EASTLIGHT GBIS x 42",
"postcode_column": "Postcode",
},
]
CAVITY_WALL_DESCRIPTIONS = [
"Cavity wall, as built, no insulation (assumed)",
"Cavity wall, as built, partial insulation (assumed)",
"Cavity wall, as built, insulated (assumed)",
"Cavity wall, with internal insulation",
"Cavity wall, with external insulation",
]
ROOF_DESCRIPTIONS = [
"Pitched, no insulation",
"Pitched, no insulation (assumed)",
"Pitched, 25 mm loft insulation",
"Pitched, 50 mm loft insulation",
"Pitched, 75 mm loft insulation",
"Pitched, 100 mm loft insulation",
"Pitched, 150 mm loft insulation",
"Pitched, limited insulation (assumed)",
"Pitched, insulated (assumed)",
]
SOCIAL_TENURES = ["Rented (social)", "rental (social)"]
def process_postcode_epcs(postcode, client):
params = {"postcode": postcode.rstrip().lstrip()}
url = os.path.join(client.domestic.host, "search") + "?" + urlencode({"size": 1000})
response = client.domestic.call(method="get", url=url, params=params)
if "rows" not in response:
logger.warning("No EPCs found for postcode %s", postcode)
return pd.DataFrame()
postcode_epcs = pd.DataFrame(response["rows"])
# Processing code here
postcode_epcs["uprn"] = np.where(
pd.isnull(postcode_epcs["uprn"]),
postcode_epcs["address"],
postcode_epcs["uprn"]
)
postcode_epcs = postcode_epcs.sort_values("lodgement-date", ascending=False)
postcode_epcs = postcode_epcs.drop_duplicates("uprn", keep="first")
return postcode_epcs
def filter_and_prepare_epcs(epcs):
epcs["Is Cavity Property"] = epcs["walls-description"].isin(CAVITY_WALL_DESCRIPTIONS) & (
epcs["current-energy-efficiency"].astype(int) <= 72
)
epcs["Solar and Loft"] = (
epcs["roof-description"].isin(ROOF_DESCRIPTIONS)
) & (
epcs["photo-supply"].isin(["0", "", "0.0"])
) & (
epcs["current-energy-efficiency"].astype(int) <= 68
)
epcs = epcs[epcs["Is Cavity Property"] | epcs["Solar and Loft"]]
epcs = epcs[~epcs["tenure"].isin(SOCIAL_TENURES)]
return epcs
def rename_and_add_columns(epcs):
# Retrieve just the data we need
epcs = epcs[
[
"uprn",
"address",
"postcode",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
# New fields needed
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
#
"energy-consumption-current", # kwh/m2
"tenure",
"Is Cavity Property",
"Solar and Loft",
]
]
epcs = epcs.rename(
columns={
"address": "Address",
"postcode": "Postcode",
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)",
"tenure": "Tenure"
}
)
epcs["Number of Habitable Rooms"] = epcs["Number of Habitable Rooms"].astype(int)
epcs["Property Floor Area"] = epcs["Property Floor Area"].astype(float)
# Add additional columns as in your original code
epcs["Estimated Number of Floors"] = epcs.apply(
lambda x: estimate_number_of_floors(x["Property Type"]) if pd.notnull(x["Property Type"]) else None, axis=1
)
epcs["Estimated Perimeter (m)"] = epcs.apply(
lambda x: estimate_perimeter(
x["Property Floor Area"] / x["Estimated Number of Floors"],
x["Number of Habitable Rooms"] / x["Estimated Number of Floors"]
), axis=1
)
epcs["Estimated Heat Loss Perimeter (m2)"] = epcs.apply(
lambda x: estimate_external_wall_area(
x["Estimated Number of Floors"],
float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.4,
x["Estimated Perimeter (m)"],
x["Archetype"]
), axis=1
)
epcs["Roof Insulation Thickness"] = epcs.apply(
lambda x: RoofAttributes(description=x["Roof Construction"]).process()[
"insulation_thickness"] if pd.notnull(x["Roof Construction"]) else None,
axis=1
)
return epcs
def main():
"""
This application is used to identify additional units that are private rentals or owner occupies that can be
included in the route marches
Required inputs are the following:
- An excel file that contains one or many tabs that include the addresses to be visited
"""
# This should be set:
output_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/PRS and OO properties - WC 11.11.2024.xlsx"
)
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
writer = pd.ExcelWriter(output_filepath, engine="xlsxwriter")
for config in CONFIG:
logger.info("Processing %s", config["tab"])
# Read in the data
route_march_addresses = pd.read_excel(
config["filepath"],
sheet_name=config["tab"],
engine="openpyxl"
)
postcodes = route_march_addresses[config["postcode_column"]].unique()
epcs = []
for postcode in tqdm(postcodes):
postcode_epcs = process_postcode_epcs(postcode, client)
if postcode_epcs.empty:
continue
epcs.append(postcode_epcs)
# Concatenate all postcodes' data and filter it
epcs = pd.concat(epcs)
epcs = filter_and_prepare_epcs(epcs)
epcs = rename_and_add_columns(epcs)
sheet_name = config["tab"][:31] # Excel sheet names max length of 31 characters
epcs.to_excel(writer, sheet_name=sheet_name, index=False)
# Save and close the writer outside the loop
writer.close()
logger.info("Data successfully written to %s", output_filepath)

View file

@ -0,0 +1,10 @@
openpyxl
epc-api-python==1.0.2
numpy==2.1.2
pandas==2.2.3
usaddress==0.5.11
fuzzywuzzy==0.18.0
boto3==1.35.44
python-dotenv
tqdm
xlsxwriter