mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
minor refactoring of plan router
This commit is contained in:
parent
e2633dfa5b
commit
bdbdbdc676
6 changed files with 102 additions and 42 deletions
|
|
@ -48,6 +48,8 @@ class Property(Definitions):
|
|||
self.postcode = postcode
|
||||
self.address1 = address1
|
||||
self.data = data
|
||||
self.old_data = None
|
||||
|
||||
self.uprn = None
|
||||
self.full_sap_epc = None
|
||||
self.in_conservation_area, self.is_listed, self.is_heritage = None, None, None
|
||||
|
|
@ -100,6 +102,10 @@ class Property(Definitions):
|
|||
]
|
||||
if len(newest_response) > 1:
|
||||
raise Exception("More than one result found for this address - investigate me")
|
||||
|
||||
# We'll keep old EPCs in case it contains information, not present on the newest one
|
||||
self.old_data = [epc for epc in response["rows"] if epc["lmk-key"] != newest_response[0]["lmk-key"]]
|
||||
|
||||
response["rows"] = newest_response
|
||||
|
||||
self.data = response["rows"][0]
|
||||
|
|
@ -264,11 +270,9 @@ class Property(Definitions):
|
|||
self.set_mains_gas()
|
||||
self.set_floor_height()
|
||||
self.set_wall_area()
|
||||
self.set_floor_area()
|
||||
self.set_age_band()
|
||||
|
||||
self.set_number_floors()
|
||||
self.set_perimeter()
|
||||
self.set_basic_property_attributes()
|
||||
self.set_wall_type()
|
||||
|
||||
for description, attribute in cleaned.items():
|
||||
|
|
@ -478,16 +482,6 @@ class Property(Definitions):
|
|||
While we do not have the
|
||||
"""
|
||||
|
||||
def set_floor_area(self):
|
||||
"""
|
||||
Sets the floor area based on the EPC data
|
||||
|
||||
"""
|
||||
# We don't know the number of floors at the moment so we're going to assume 1
|
||||
# however this is something we'll need to use Verisk data for
|
||||
|
||||
self.floor_area = float(self.data["total-floor-area"])
|
||||
|
||||
def get_spatial_data(self, uprn_filenames):
|
||||
|
||||
"""
|
||||
|
|
@ -515,40 +509,34 @@ class Property(Definitions):
|
|||
# Pull out spatial features
|
||||
self.set_spatial(spatial)
|
||||
|
||||
def set_number_floors(self):
|
||||
def set_basic_property_attributes(self):
|
||||
"""
|
||||
This method sets the number of floors of the property, using a simple approach based on an estimate for
|
||||
average room size, number of rooms and total floor area
|
||||
|
||||
It sets the perimeter of the property, using a simple approach based on an estimate for average room size,
|
||||
number of rooms and total floor area
|
||||
|
||||
Also sets floor area, number of rooms, using backup cleaned values if this data is not present, based on
|
||||
medians across the EPC data
|
||||
:return:
|
||||
"""
|
||||
|
||||
total_floor_area = float(self.data["total-floor-area"])
|
||||
self.floor_area = float(self.data["total-floor-area"])
|
||||
|
||||
number_of_rooms = float(self.data["number-habitable-rooms"])
|
||||
|
||||
self.perimeter = estimate_perimeter(
|
||||
self.floor_area / self.number_of_floors, number_of_rooms / self.number_of_floors
|
||||
)
|
||||
|
||||
if self.data["property-type"] == "House":
|
||||
self.number_of_floors = estimate_floors(total_floor_area, number_of_rooms)
|
||||
self.number_of_floors = estimate_floors(self.floor_area, number_of_rooms)
|
||||
elif self.data["property-type"] == "Flat":
|
||||
self.number_of_floors = 1
|
||||
else:
|
||||
raise NotImplementedError("Implement me")
|
||||
|
||||
def set_perimeter(self):
|
||||
"""
|
||||
This method sets the perimeter of the property, using a simple approach based on average room
|
||||
size, number of rooms and total floor area
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not self.number_of_floors:
|
||||
raise ValueError("Number of floors not set, run set_number_floors")
|
||||
|
||||
total_floor_area = float(self.data["total-floor-area"])
|
||||
number_of_rooms = float(self.data["number-habitable-rooms"])
|
||||
|
||||
self.perimeter = estimate_perimeter(
|
||||
total_floor_area / self.number_of_floors, number_of_rooms / self.number_of_floors
|
||||
)
|
||||
|
||||
def set_wall_type(self):
|
||||
"""
|
||||
This method sets the wall type of the property, using a simple approach based on the wall description
|
||||
|
|
|
|||
|
|
@ -61,6 +61,9 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
uprn_filenames = read_dataframe_from_s3_parquet(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
|
||||
)
|
||||
cleaning_data = read_parquet_from_s3(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
input_properties = []
|
||||
for config in plan_input:
|
||||
|
|
@ -94,6 +97,18 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
if not input_properties:
|
||||
return Response(status_code=204)
|
||||
|
||||
local_property_data = []
|
||||
for p in input_properties:
|
||||
local_property_data.append(
|
||||
{
|
||||
"id": p.id,
|
||||
"uprn": p.uprn,
|
||||
"data": p.data,
|
||||
"full_sap_epc": p.full_sap_epc,
|
||||
"old_data": p.old_data,
|
||||
}
|
||||
)
|
||||
|
||||
logger.info("Getting EPC, and spatial data")
|
||||
for p in input_properties:
|
||||
p.search_address_epc()
|
||||
|
|
@ -188,13 +203,6 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
logger.info("Preparing data for scoring in sap change api")
|
||||
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
|
||||
|
||||
# Clean the data
|
||||
logger.info("Reading in cleaning dataset from s3")
|
||||
cleaning_data = read_parquet_from_s3(
|
||||
bucket_name=get_settings().DATA_BUCKET,
|
||||
file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
).rename(columns={"local-authority": "LOCAL_AUTHORITY"})
|
||||
|
||||
# Merge the cleaning data onto recommendations_scoring_data
|
||||
|
||||
# Perform the same cleaning as in the model
|
||||
|
|
|
|||
14
backend/app/plan/temp_script_for_flight.py
Normal file
14
backend/app/plan/temp_script_for_flight.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
local_data = {
|
||||
"plan_input": plan_input,
|
||||
"uprn_filenames": uprn_filenames,
|
||||
"local_property_data": local_property_data,
|
||||
"materials": materials,
|
||||
"materials_by_type": materials_by_type,
|
||||
"cleaned": cleaned,
|
||||
"cleaning_data": cleaning_data
|
||||
}
|
||||
|
||||
import pickle
|
||||
|
||||
with open('local_data.pickle', 'wb') as f:
|
||||
pickle.dump(local_data, f)
|
||||
0
etl/property_dimensions/__init__.py
Normal file
0
etl/property_dimensions/__init__.py
Normal file
52
etl/property_dimensions/app.py
Normal file
52
etl/property_dimensions/app.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
"""
|
||||
This is a simple application which estimates some of the basic dimensions of a property based on EPC
|
||||
data which we can use as a proxy value if we don't have this information on the EPC
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from etl.epc.settings import EARLIEST_EPC_DATE
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from BaseUtility import Definitions
|
||||
from utils.s3 import save_dataframe_to_s3_parquet
|
||||
|
||||
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
|
||||
|
||||
GROUPBY = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTITUENCY", "CONSTRUCTION_AGE_BAND"]
|
||||
|
||||
BUCKET = os.environ.get("BUCKET", "retrofit-data-dev")
|
||||
|
||||
|
||||
def app():
|
||||
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
|
||||
for directory in tqdm(directories):
|
||||
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
||||
data = data[data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
|
||||
data = data[~pd.isnull(data["UPRN"])]
|
||||
data["TOTAL_FLOOR_AREA"] = data["TOTAL_FLOOR_AREA"].astype(float)
|
||||
|
||||
data["CONSTRUCTION_AGE_BAND"] = data["CONSTRUCTION_AGE_BAND"].apply(
|
||||
lambda x: DataProcessor.clean_construction_age_band(x)
|
||||
)
|
||||
data = data[~pd.isnull(data["CONSTRUCTION_AGE_BAND"])]
|
||||
data = data[~data["CONSTRUCTION_AGE_BAND"].isin(Definitions.DATA_ANOMALY_MATCHES)]
|
||||
data = data[~pd.isnull(data["TOTAL_FLOOR_AREA"])]
|
||||
|
||||
df = (
|
||||
data.groupby(GROUPBY)
|
||||
.agg({"NUMBER_HABITABLE_ROOMS": "median", "TOTAL_FLOOR_AREA": "mean"})
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
local_authority = data["LOCAL_AUTHORITY"].unique()
|
||||
if len(local_authority) > 1:
|
||||
raise Exception("More than one la in data")
|
||||
local_authority = local_authority[0]
|
||||
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=df,
|
||||
bucket_name=BUCKET,
|
||||
file_key=f"property_dimensions/{local_authority}.parquet",
|
||||
)
|
||||
|
|
@ -3,8 +3,6 @@ from copy import deepcopy
|
|||
|
||||
import pandas as pd
|
||||
|
||||
from backend.Property import Property
|
||||
from statistics import mean
|
||||
from recommendations.rdsap_tables import (
|
||||
epc_wall_description_map, wall_uvalues_df, default_wall_thickness, table_s9 as s9, table_s10 as s10,
|
||||
table_s11 as s11
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue