mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #369 from Hestia-Homes/prs-oo-route-march
Prs oo route march
This commit is contained in:
commit
2b6c86e557
5 changed files with 255 additions and 2 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Route-March" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Route-March" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import os
|
|||
import time
|
||||
import re
|
||||
|
||||
from urllib.parse import urlencode
|
||||
import usaddress
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
|
@ -257,6 +258,8 @@ class SearchEpc:
|
|||
params = {"address": self.address1, "postcode": self.postcode}
|
||||
|
||||
url = os.path.join(self.client.domestic.host, "search")
|
||||
if size:
|
||||
url += "?" + urlencode({k: v for k, v in {"size": size}.items() if v})
|
||||
|
||||
for retry in range(self.max_retries):
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,240 @@
|
|||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from dotenv import load_dotenv
|
||||
from urllib.parse import urlencode
|
||||
from epc_api.client import EpcClient
|
||||
from utils.logger import setup_logger
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
|
||||
from recommendations.recommendation_utils import (
|
||||
estimate_perimeter,
|
||||
estimate_external_wall_area,
|
||||
estimate_number_of_floors
|
||||
)
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
CONFIG = [
|
||||
{
|
||||
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
|
||||
"11.11.2024.xlsx",
|
||||
"tab": "SETTLE GBIS x 242 ",
|
||||
"postcode_column": "Postcode",
|
||||
},
|
||||
{
|
||||
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
|
||||
"11.11.2024.xlsx",
|
||||
"tab": "ACIS GBIS x 76",
|
||||
"postcode_column": "Postcode",
|
||||
},
|
||||
{
|
||||
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
|
||||
"11.11.2024.xlsx",
|
||||
"tab": "SOUTHERN GBIS x 150",
|
||||
"postcode_column": "Postcode",
|
||||
},
|
||||
{
|
||||
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
|
||||
"11.11.2024.xlsx",
|
||||
"tab": "COMMUNITY HOUSING GBIS x 199",
|
||||
"postcode_column": "Postcode",
|
||||
},
|
||||
{
|
||||
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
|
||||
"11.11.2024.xlsx",
|
||||
"tab": "EASTLIGHT GBIS x 42",
|
||||
"postcode_column": "Postcode",
|
||||
},
|
||||
]
|
||||
|
||||
CAVITY_WALL_DESCRIPTIONS = [
|
||||
"Cavity wall, as built, no insulation (assumed)",
|
||||
"Cavity wall, as built, partial insulation (assumed)",
|
||||
"Cavity wall, as built, insulated (assumed)",
|
||||
"Cavity wall, with internal insulation",
|
||||
"Cavity wall, with external insulation",
|
||||
]
|
||||
|
||||
ROOF_DESCRIPTIONS = [
|
||||
"Pitched, no insulation",
|
||||
"Pitched, no insulation (assumed)",
|
||||
"Pitched, 25 mm loft insulation",
|
||||
"Pitched, 50 mm loft insulation",
|
||||
"Pitched, 75 mm loft insulation",
|
||||
"Pitched, 100 mm loft insulation",
|
||||
"Pitched, 150 mm loft insulation",
|
||||
"Pitched, limited insulation (assumed)",
|
||||
"Pitched, insulated (assumed)",
|
||||
]
|
||||
|
||||
SOCIAL_TENURES = ["Rented (social)", "rental (social)"]
|
||||
|
||||
|
||||
def process_postcode_epcs(postcode, client):
|
||||
params = {"postcode": postcode.rstrip().lstrip()}
|
||||
url = os.path.join(client.domestic.host, "search") + "?" + urlencode({"size": 1000})
|
||||
response = client.domestic.call(method="get", url=url, params=params)
|
||||
if "rows" not in response:
|
||||
logger.warning("No EPCs found for postcode %s", postcode)
|
||||
return pd.DataFrame()
|
||||
postcode_epcs = pd.DataFrame(response["rows"])
|
||||
|
||||
# Processing code here
|
||||
postcode_epcs["uprn"] = np.where(
|
||||
pd.isnull(postcode_epcs["uprn"]),
|
||||
postcode_epcs["address"],
|
||||
postcode_epcs["uprn"]
|
||||
)
|
||||
postcode_epcs = postcode_epcs.sort_values("lodgement-date", ascending=False)
|
||||
postcode_epcs = postcode_epcs.drop_duplicates("uprn", keep="first")
|
||||
return postcode_epcs
|
||||
|
||||
|
||||
def filter_and_prepare_epcs(epcs):
|
||||
epcs["Is Cavity Property"] = epcs["walls-description"].isin(CAVITY_WALL_DESCRIPTIONS) & (
|
||||
epcs["current-energy-efficiency"].astype(int) <= 72
|
||||
)
|
||||
epcs["Solar and Loft"] = (
|
||||
epcs["roof-description"].isin(ROOF_DESCRIPTIONS)
|
||||
) & (
|
||||
epcs["photo-supply"].isin(["0", "", "0.0"])
|
||||
) & (
|
||||
epcs["current-energy-efficiency"].astype(int) <= 68
|
||||
)
|
||||
epcs = epcs[epcs["Is Cavity Property"] | epcs["Solar and Loft"]]
|
||||
epcs = epcs[~epcs["tenure"].isin(SOCIAL_TENURES)]
|
||||
return epcs
|
||||
|
||||
|
||||
def rename_and_add_columns(epcs):
|
||||
# Retrieve just the data we need
|
||||
epcs = epcs[
|
||||
[
|
||||
"uprn",
|
||||
"address",
|
||||
"postcode",
|
||||
"property-type",
|
||||
"built-form",
|
||||
"inspection-date",
|
||||
"current-energy-rating",
|
||||
"current-energy-efficiency",
|
||||
"roof-description",
|
||||
"walls-description",
|
||||
"transaction-type",
|
||||
# New fields needed
|
||||
"secondheat-description",
|
||||
"total-floor-area",
|
||||
"construction-age-band",
|
||||
"floor-height",
|
||||
"number-habitable-rooms",
|
||||
"mainheat-description",
|
||||
#
|
||||
"energy-consumption-current", # kwh/m2
|
||||
"tenure",
|
||||
"Is Cavity Property",
|
||||
"Solar and Loft",
|
||||
]
|
||||
]
|
||||
|
||||
epcs = epcs.rename(
|
||||
columns={
|
||||
"address": "Address",
|
||||
"postcode": "Postcode",
|
||||
"inspection-date": "Date of last EPC",
|
||||
"current-energy-efficiency": "SAP score on register",
|
||||
"current-energy-rating": "EPC rating on register",
|
||||
"property-type": "Property Type",
|
||||
"built-form": "Archetype",
|
||||
"total-floor-area": "Property Floor Area",
|
||||
"construction-age-band": "Property Age Band",
|
||||
"floor-height": "Property Floor Height",
|
||||
"number-habitable-rooms": "Number of Habitable Rooms",
|
||||
"walls-description": "Wall Construction",
|
||||
"roof-description": "Roof Construction",
|
||||
"mainheat-description": "Heating Type",
|
||||
"secondheat-description": "Secondary Heating",
|
||||
"transaction-type": "Reason for last EPC",
|
||||
"energy-consumption-current": "Heat Demand (kWh/m2)",
|
||||
"tenure": "Tenure"
|
||||
}
|
||||
)
|
||||
|
||||
epcs["Number of Habitable Rooms"] = epcs["Number of Habitable Rooms"].astype(int)
|
||||
epcs["Property Floor Area"] = epcs["Property Floor Area"].astype(float)
|
||||
|
||||
# Add additional columns as in your original code
|
||||
epcs["Estimated Number of Floors"] = epcs.apply(
|
||||
lambda x: estimate_number_of_floors(x["Property Type"]) if pd.notnull(x["Property Type"]) else None, axis=1
|
||||
)
|
||||
|
||||
epcs["Estimated Perimeter (m)"] = epcs.apply(
|
||||
lambda x: estimate_perimeter(
|
||||
x["Property Floor Area"] / x["Estimated Number of Floors"],
|
||||
x["Number of Habitable Rooms"] / x["Estimated Number of Floors"]
|
||||
), axis=1
|
||||
)
|
||||
epcs["Estimated Heat Loss Perimeter (m2)"] = epcs.apply(
|
||||
lambda x: estimate_external_wall_area(
|
||||
x["Estimated Number of Floors"],
|
||||
float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.4,
|
||||
x["Estimated Perimeter (m)"],
|
||||
x["Archetype"]
|
||||
), axis=1
|
||||
)
|
||||
epcs["Roof Insulation Thickness"] = epcs.apply(
|
||||
lambda x: RoofAttributes(description=x["Roof Construction"]).process()[
|
||||
"insulation_thickness"] if pd.notnull(x["Roof Construction"]) else None,
|
||||
axis=1
|
||||
)
|
||||
return epcs
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
This application is used to identify additional units that are private rentals or owner occupies that can be
|
||||
included in the route marches
|
||||
|
||||
Required inputs are the following:
|
||||
- An excel file that contains one or many tabs that include the addresses to be visited
|
||||
"""
|
||||
|
||||
# This should be set:
|
||||
output_filepath = (
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/PRS and OO properties - WC 11.11.2024.xlsx"
|
||||
)
|
||||
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
writer = pd.ExcelWriter(output_filepath, engine="xlsxwriter")
|
||||
|
||||
for config in CONFIG:
|
||||
logger.info("Processing %s", config["tab"])
|
||||
# Read in the data
|
||||
route_march_addresses = pd.read_excel(
|
||||
config["filepath"],
|
||||
sheet_name=config["tab"],
|
||||
engine="openpyxl"
|
||||
)
|
||||
|
||||
postcodes = route_march_addresses[config["postcode_column"]].unique()
|
||||
|
||||
epcs = []
|
||||
for postcode in tqdm(postcodes):
|
||||
postcode_epcs = process_postcode_epcs(postcode, client)
|
||||
if postcode_epcs.empty:
|
||||
continue
|
||||
epcs.append(postcode_epcs)
|
||||
|
||||
# Concatenate all postcodes' data and filter it
|
||||
epcs = pd.concat(epcs)
|
||||
epcs = filter_and_prepare_epcs(epcs)
|
||||
epcs = rename_and_add_columns(epcs)
|
||||
|
||||
sheet_name = config["tab"][:31] # Excel sheet names max length of 31 characters
|
||||
epcs.to_excel(writer, sheet_name=sheet_name, index=False)
|
||||
|
||||
# Save and close the writer outside the loop
|
||||
writer.close()
|
||||
logger.info("Data successfully written to %s", output_filepath)
|
||||
10
etl/route_march/oo_prs_additional_units/requirements.txt
Normal file
10
etl/route_march/oo_prs_additional_units/requirements.txt
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
openpyxl
|
||||
epc-api-python==1.0.2
|
||||
numpy==2.1.2
|
||||
pandas==2.2.3
|
||||
usaddress==0.5.11
|
||||
fuzzywuzzy==0.18.0
|
||||
boto3==1.35.44
|
||||
python-dotenv
|
||||
tqdm
|
||||
xlsxwriter
|
||||
Loading…
Add table
Reference in a new issue