minor refactoring of plan router

This commit is contained in:
Khalim Conn-Kowlessar 2023-10-06 14:46:22 +01:00
parent e2633dfa5b
commit bdbdbdc676
6 changed files with 102 additions and 42 deletions

View file

@ -48,6 +48,8 @@ class Property(Definitions):
self.postcode = postcode
self.address1 = address1
self.data = data
self.old_data = None
self.uprn = None
self.full_sap_epc = None
self.in_conservation_area, self.is_listed, self.is_heritage = None, None, None
@ -100,6 +102,10 @@ class Property(Definitions):
]
if len(newest_response) > 1:
raise Exception("More than one result found for this address - investigate me")
# We'll keep old EPCs in case it contains information, not present on the newest one
self.old_data = [epc for epc in response["rows"] if epc["lmk-key"] != newest_response[0]["lmk-key"]]
response["rows"] = newest_response
self.data = response["rows"][0]
@ -264,11 +270,9 @@ class Property(Definitions):
self.set_mains_gas()
self.set_floor_height()
self.set_wall_area()
self.set_floor_area()
self.set_age_band()
self.set_number_floors()
self.set_perimeter()
self.set_basic_property_attributes()
self.set_wall_type()
for description, attribute in cleaned.items():
@ -478,16 +482,6 @@ class Property(Definitions):
While we do not have the
"""
def set_floor_area(self):
"""
Sets the floor area based on the EPC data
"""
# We don't know the number of floors at the moment so we're going to assume 1
# however this is something we'll need to use Verisk data for
self.floor_area = float(self.data["total-floor-area"])
def get_spatial_data(self, uprn_filenames):
"""
@ -515,40 +509,34 @@ class Property(Definitions):
# Pull out spatial features
self.set_spatial(spatial)
def set_number_floors(self):
def set_basic_property_attributes(self):
"""
This method sets the number of floors of the property, using a simple approach based on an estimate for
average room size, number of rooms and total floor area
It sets the perimeter of the property, using a simple approach based on an estimate for average room size,
number of rooms and total floor area
Also sets floor area, number of rooms, using backup cleaned values if this data is not present, based on
medians across the EPC data
:return:
"""
total_floor_area = float(self.data["total-floor-area"])
self.floor_area = float(self.data["total-floor-area"])
number_of_rooms = float(self.data["number-habitable-rooms"])
self.perimeter = estimate_perimeter(
self.floor_area / self.number_of_floors, number_of_rooms / self.number_of_floors
)
if self.data["property-type"] == "House":
self.number_of_floors = estimate_floors(total_floor_area, number_of_rooms)
self.number_of_floors = estimate_floors(self.floor_area, number_of_rooms)
elif self.data["property-type"] == "Flat":
self.number_of_floors = 1
else:
raise NotImplementedError("Implement me")
def set_perimeter(self):
"""
This method sets the perimeter of the property, using a simple approach based on average room
size, number of rooms and total floor area
:return:
"""
if not self.number_of_floors:
raise ValueError("Number of floors not set, run set_number_floors")
total_floor_area = float(self.data["total-floor-area"])
number_of_rooms = float(self.data["number-habitable-rooms"])
self.perimeter = estimate_perimeter(
total_floor_area / self.number_of_floors, number_of_rooms / self.number_of_floors
)
def set_wall_type(self):
"""
This method sets the wall type of the property, using a simple approach based on the wall description

View file

@ -61,6 +61,9 @@ async def trigger_plan(body: PlanTriggerRequest):
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
cleaning_data = read_parquet_from_s3(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
input_properties = []
for config in plan_input:
@ -94,6 +97,18 @@ async def trigger_plan(body: PlanTriggerRequest):
if not input_properties:
return Response(status_code=204)
local_property_data = []
for p in input_properties:
local_property_data.append(
{
"id": p.id,
"uprn": p.uprn,
"data": p.data,
"full_sap_epc": p.full_sap_epc,
"old_data": p.old_data,
}
)
logger.info("Getting EPC, and spatial data")
for p in input_properties:
p.search_address_epc()
@ -188,13 +203,6 @@ async def trigger_plan(body: PlanTriggerRequest):
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
# Clean the data
logger.info("Reading in cleaning dataset from s3")
cleaning_data = read_parquet_from_s3(
bucket_name=get_settings().DATA_BUCKET,
file_key="sap_change_model/cleaning_dataset.parquet",
).rename(columns={"local-authority": "LOCAL_AUTHORITY"})
# Merge the cleaning data onto recommendations_scoring_data
# Perform the same cleaning as in the model

View file

@ -0,0 +1,14 @@
local_data = {
"plan_input": plan_input,
"uprn_filenames": uprn_filenames,
"local_property_data": local_property_data,
"materials": materials,
"materials_by_type": materials_by_type,
"cleaned": cleaned,
"cleaning_data": cleaning_data
}
import pickle
with open('local_data.pickle', 'wb') as f:
pickle.dump(local_data, f)

View file

View file

@ -0,0 +1,52 @@
"""
This is a simple application which estimates some of the basic dimensions of a property based on EPC
data which we can use as a proxy value if we don't have this information on the EPC
"""
import os
from pathlib import Path
import pandas as pd
from tqdm import tqdm
from etl.epc.settings import EARLIEST_EPC_DATE
from etl.epc.DataProcessor import DataProcessor
from BaseUtility import Definitions
from utils.s3 import save_dataframe_to_s3_parquet
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
GROUPBY = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTITUENCY", "CONSTRUCTION_AGE_BAND"]
BUCKET = os.environ.get("BUCKET", "retrofit-data-dev")
def app():
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
for directory in tqdm(directories):
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
data = data[data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["UPRN"])]
data["TOTAL_FLOOR_AREA"] = data["TOTAL_FLOOR_AREA"].astype(float)
data["CONSTRUCTION_AGE_BAND"] = data["CONSTRUCTION_AGE_BAND"].apply(
lambda x: DataProcessor.clean_construction_age_band(x)
)
data = data[~pd.isnull(data["CONSTRUCTION_AGE_BAND"])]
data = data[~data["CONSTRUCTION_AGE_BAND"].isin(Definitions.DATA_ANOMALY_MATCHES)]
data = data[~pd.isnull(data["TOTAL_FLOOR_AREA"])]
df = (
data.groupby(GROUPBY)
.agg({"NUMBER_HABITABLE_ROOMS": "median", "TOTAL_FLOOR_AREA": "mean"})
.reset_index()
)
local_authority = data["LOCAL_AUTHORITY"].unique()
if len(local_authority) > 1:
raise Exception("More than one la in data")
local_authority = local_authority[0]
save_dataframe_to_s3_parquet(
df=df,
bucket_name=BUCKET,
file_key=f"property_dimensions/{local_authority}.parquet",
)

View file

@ -3,8 +3,6 @@ from copy import deepcopy
import pandas as pd
from backend.Property import Property
from statistics import mean
from recommendations.rdsap_tables import (
epc_wall_description_map, wall_uvalues_df, default_wall_thickness, table_s9 as s9, table_s10 as s10,
table_s11 as s11