mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
refactoring prs and oo data puls
This commit is contained in:
parent
cb4b597272
commit
2f930e3fa2
2 changed files with 113 additions and 34 deletions
|
|
@ -1,10 +1,20 @@
|
|||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from dotenv import load_dotenv
|
||||
from urllib.parse import urlencode
|
||||
from epc_api.client import EpcClient
|
||||
from utils.logger import setup_logger
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
|
||||
from recommendations.recommendation_utils import (
|
||||
estimate_perimeter,
|
||||
estimate_external_wall_area,
|
||||
estimate_number_of_floors
|
||||
)
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
|
|
@ -64,6 +74,89 @@ ROOF_DESCRIPTIONS = [
|
|||
SOCIAL_TENURES = ["Rented (social)", "rental (social)"]
|
||||
|
||||
|
||||
def process_postcode_epcs(postcode, client):
|
||||
params = {"postcode": postcode}
|
||||
url = os.path.join(client.domestic.host, "search") + "?" + urlencode({"size": 1000})
|
||||
response = client.domestic.call(method="get", url=url, params=params)
|
||||
postcode_epcs = pd.DataFrame(response["rows"])
|
||||
|
||||
# Processing code here
|
||||
postcode_epcs["uprn"] = np.where(
|
||||
pd.isnull(postcode_epcs["uprn"]),
|
||||
postcode_epcs["address"],
|
||||
postcode_epcs["uprn"]
|
||||
)
|
||||
postcode_epcs = postcode_epcs.sort_values("lodgement-date", ascending=False)
|
||||
postcode_epcs = postcode_epcs.drop_duplicates("uprn", keep="first")
|
||||
return postcode_epcs
|
||||
|
||||
|
||||
def filter_and_prepare_epcs(epcs):
|
||||
epcs["Is Cavity Property"] = epcs["walls-description"].isin(CAVITY_WALL_DESCRIPTIONS) & (
|
||||
epcs["current-energy-efficiency"].astype(int) <= 72
|
||||
)
|
||||
epcs["Solar and Loft"] = (
|
||||
epcs["roof-description"].isin(ROOF_DESCRIPTIONS)
|
||||
) & (
|
||||
epcs["photo-supply"].isin(["0", "", "0.0"])
|
||||
) & (
|
||||
epcs["current-energy-efficiency"].astype(int) <= 68
|
||||
)
|
||||
epcs = epcs[epcs["Is Cavity Property"] | epcs["Solar and Loft"]]
|
||||
epcs = epcs[~epcs["tenure"].isin(SOCIAL_TENURES)]
|
||||
return epcs
|
||||
|
||||
|
||||
def rename_and_add_columns(epcs):
|
||||
epcs = epcs.rename(
|
||||
columns={
|
||||
"address": "Address",
|
||||
"postcode": "Postcode",
|
||||
"inspection-date": "Date of last EPC",
|
||||
"current-energy-efficiency": "SAP score on register",
|
||||
"current-energy-rating": "EPC rating on register",
|
||||
"property-type": "Property Type",
|
||||
"built-form": "Archetype",
|
||||
"total-floor-area": "Property Floor Area",
|
||||
"construction-age-band": "Property Age Band",
|
||||
"floor-height": "Property Floor Height",
|
||||
"number-habitable-rooms": "Number of Habitable Rooms",
|
||||
"walls-description": "Wall Construction",
|
||||
"roof-description": "Roof Construction",
|
||||
"mainheat-description": "Heating Type",
|
||||
"secondheat-description": "Secondary Heating",
|
||||
"transaction-type": "Reason for last EPC",
|
||||
"energy-consumption-current": "Heat Demand (kWh/m2)",
|
||||
"tenure": "Tenure"
|
||||
}
|
||||
)
|
||||
|
||||
# Add additional columns as in your original code
|
||||
epcs["Estimated Number of Floors"] = epcs.apply(
|
||||
lambda x: estimate_number_of_floors(x["Property Type"]) if pd.notnull(x["Property Type"]) else None, axis=1
|
||||
)
|
||||
epcs["Estimated Perimeter (m)"] = epcs.apply(
|
||||
lambda x: estimate_perimeter(
|
||||
x["Property Floor Area"] / x["Estimated Number of Floors"],
|
||||
x["Number of Habitable Rooms"] / x["Estimated Number of Floors"]
|
||||
), axis=1
|
||||
)
|
||||
epcs["Estimated Heat Loss Perimeter (m2)"] = epcs.apply(
|
||||
lambda x: estimate_external_wall_area(
|
||||
x["Estimated Number of Floors"],
|
||||
float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
|
||||
x["Estimated Perimeter (m)"],
|
||||
x["Archetype"]
|
||||
), axis=1
|
||||
)
|
||||
epcs["Roof Insulation Thickness"] = epcs.apply(
|
||||
lambda x: RoofAttributes(description=x["Roof Construction"]).process()[
|
||||
"insulation_thickness"] if pd.notnull(x["Roof Construction"]) else None,
|
||||
axis=1
|
||||
)
|
||||
return epcs
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
This application is used to identify additional units that are private rentals or owner occupies that can be
|
||||
|
|
@ -73,7 +166,13 @@ def main():
|
|||
- An excel file that contains one or many tabs that include the addresses to be visited
|
||||
"""
|
||||
|
||||
# This should be set:
|
||||
output_filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/PRS and OO properties - WC 11.11.2024"
|
||||
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
writer = pd.ExcelWriter(output_filepath, engine="xlsxwriter")
|
||||
|
||||
for config in CONFIG:
|
||||
logger.info("Processing %s", config["tab"])
|
||||
# Read in the data
|
||||
route_march_addresses = pd.read_excel(
|
||||
config["filepath"],
|
||||
|
|
@ -84,39 +183,18 @@ def main():
|
|||
postcodes = route_march_addresses[config["postcode_column"]].unique()
|
||||
|
||||
epcs = []
|
||||
for postcode in postcodes:
|
||||
# Get the EPCs in this postcode
|
||||
|
||||
params = {"postcode": postcode}
|
||||
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
url = os.path.join(client.domestic.host, "search")
|
||||
url += "?" + urlencode({k: v for k, v in {"size": 1000}.items() if v})
|
||||
response = client.domestic.call(method="get", url=url, params=params)
|
||||
|
||||
postcode_epcs = pd.DataFrame(response["rows"])
|
||||
# Get the newest EPC, per UPRN
|
||||
postcode_epcs["uprn"] = np.where(
|
||||
pd.isnull(postcode_epcs["uprn"]),
|
||||
postcode_epcs["address"],
|
||||
postcode_epcs["uprn"]
|
||||
)
|
||||
postcode_epcs = postcode_epcs.sort_values("lodgement-date", ascending=False)
|
||||
postcode_epcs = postcode_epcs.drop_duplicates("uprn", keep="first")
|
||||
|
||||
postcode_epcs["Is Cavity Property"] = postcode_epcs["walls-description"].isin(
|
||||
CAVITY_WALL_DESCRIPTIONS
|
||||
) & (postcode_epcs["current-energy-efficiency"].astype(int) <= 72)
|
||||
|
||||
postcode_epcs["Solar and Loft"] = (postcode_epcs["roof-description"].isin(ROOF_DESCRIPTIONS)) & (
|
||||
postcode_epcs["photo-supply"].isin(["0", "", "0.0"])) & (
|
||||
postcode_epcs["current-energy-efficiency"].astype(int) <= 68
|
||||
)
|
||||
|
||||
postcode_epcs = postcode_epcs[postcode_epcs["Is Cavity Property"] | postcode_epcs["Solar and Loft"]]
|
||||
|
||||
# Remove any social properties
|
||||
postcode_epcs = postcode_epcs[~postcode_epcs["tenure"].isin(SOCIAL_TENURES)]
|
||||
|
||||
for postcode in tqdm(postcodes):
|
||||
postcode_epcs = process_postcode_epcs(postcode, client)
|
||||
epcs.append(postcode_epcs)
|
||||
|
||||
# Concatenate all postcodes' data and filter it
|
||||
epcs = pd.concat(epcs)
|
||||
epcs = filter_and_prepare_epcs(epcs)
|
||||
epcs = rename_and_add_columns(epcs)
|
||||
|
||||
sheet_name = config["tab"][:31] # Excel sheet names max length of 31 characters
|
||||
epcs.to_excel(writer, sheet_name=sheet_name, index=False)
|
||||
|
||||
# Save and close the writer outside the loop
|
||||
writer.close()
|
||||
logger.info("Data successfully written to %s", output_filepath)
|
||||
|
|
|
|||
|
|
@ -6,4 +6,5 @@ usaddress==0.5.11
|
|||
fuzzywuzzy==0.18.0
|
||||
boto3==1.35.44
|
||||
python-dotenv
|
||||
tqdm
|
||||
tqdm
|
||||
xlsxwriter
|
||||
Loading…
Add table
Reference in a new issue