Merge pull request #99 from Hestia-Homes/main

restructuign package to move Property to backend
This commit is contained in:
KhalimCK 2023-07-21 17:38:08 +01:00 committed by GitHub
commit 8e0a2815a9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 47 additions and 374 deletions

View file

@ -3,7 +3,7 @@ from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.utils import read_csv_from_s3
from backend.app.config import get_settings
from model_data.Property import Property
from backend.Property import Property
from epc_api.client import EpcClient
from utils.logger import setup_logger
from recommendations.FloorRecommendations import FloorRecommendations

View file

@ -2,8 +2,7 @@ import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import pickle
from typing import Any, Dict, Tuple, Optional, List
from typing import Dict, Optional, List
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score, \
median_absolute_error, mean_absolute_percentage_error
@ -18,10 +17,6 @@ from utils.logger import setup_logger
logger = setup_logger()
# with open("all_data.pkl", "rb") as f:
# all_data = pickle.load(f)
class SapModel:
# We want to estimate for making improvements on different property components
RESPONSE = "current-energy-efficiency"

View file

@ -1,16 +1,12 @@
from tqdm import tqdm
import os
from model_data.BoreholeClient import BoreholeClient
from model_data.LandRegistryClient import LandRegistryClient
from model_data.temp_inputs import input_data
from model_data.Property import Property
from model_data.config import EPC_AUTH_TOKEN
from epc_api.client import EpcClient
from model_data.downloader import pagenated_epc_download
from model_data.EpcClean import EpcClean
from open_uprn.OpenUprnClient import OpenUprnClient
from model_data.analysis.UvalueEstimations import UvalueEstimations
from model_data.analysis.SapModel import SapModel
LAND_REGISTRY_PATHS = [
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
@ -24,345 +20,6 @@ LAND_REGISTRY_PATHS = [
]
def handler():
# To begin with, the input data is a list of dictionaries, however we would read this file in
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
input_properties = [
Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client)
for config in input_data
]
for p in input_properties:
p.search_address_epc()
p.set_year_built()
uprns = [p.data['uprn'] for p in input_properties]
open_uprn_client = OpenUprnClient(
path=os.path.abspath(
os.path.dirname(__file__)
) + "/model_data/local_data/osopenuprn_202306_csv/osopenuprn_202305.csv",
uprns=uprns
)
open_uprn_client.read()
# We're using Ordinance Survey Open Uprn data
# to find the coordinates of each address, which we will then be able to use at a later stage
for p in input_properties:
p.get_coordinates(open_uprn_client)
conservation_area_client = ConservationAreaClient(
historic_england_path=os.path.abspath(
os.path.dirname(__file__)
) + "/model_data/local_data/Historic_Eng_Conservation_Areas/Conservation_Areas.shp",
gov_path=os.path.abspath(
os.path.dirname(__file__)
) + "/model_data/local_data/gov-conservation-area.geojson"
)
conservation_area_client.read()
# Check if the property is in a conversation area
for p in input_properties:
in_conservation_area = conservation_area_client.is_in_conservation_area(p.coordinates)
p.set_is_in_conservation_area(in_conservation_area)
local_authorities = {p.data['local-authority'] for p in input_properties}
# TODO: Do this at a constituency level
constituencies = {p.data["constituency"] for p in input_properties}
property_types = ["bungalow", "flat", "house", "maisonette", "park home"]
floor_areas = ["unknown", "s", "m", "l", "xl", "xxl", "xxxl"]
# We pull properties from local authorities, by property type. This will allow us to build
# a dataset of up to 10k properties per local authority/property type combination
# For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were
# conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England
# and Wales from 31 July 2014
# Download data from August 2014 onwards
data = []
for c in tqdm(constituencies):
for pt in property_types:
for fa in floor_areas:
data.extend(
pagenated_epc_download(
client=epc_client,
params={
"constituency": c,
"property-type": pt,
"from-month": 8,
"from-year": 2014,
"floor-area": fa,
},
page_size=5000,
n_pages=10,
)
)
# Incorporate input data into cleaning
cleaner = EpcClean(data + [p.data for p in input_properties])
cleaner.clean()
z = [x for x in data if x["floor-description"] == "(anheddiad arall islaw)"]
address_meta = [
{
"postcode": x["postcode"].upper(),
"address1": x["address1"].upper(),
"address2": x["address2"].upper(),
"address3": x["address3"].upper(),
"address": x["address"],
"uprn": x["uprn"]
} for x in data
]
import pickle
with open("sample_addresses.pkl", "wb") as f:
pickle.dump(address_meta, f)
# Land registry
land_registry_client = LandRegistryClient(
paths=LAND_REGISTRY_PATHS,
addresses=address_meta
)
lr_data = land_registry_client.read()
# Borehole
borehole_client = BoreholeClient(
path=os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf"
)
borehole_client.read()
# Now, for our input properties, we need to identify the components of the building, based
# on the cleaning we've done
for p in input_properties:
p.get_components(cleaner)
# TODO: Add property age band into this
uvalue_estimates = UvalueEstimations(data=data)
uvalue_estimates.get_estimates(cleaner=cleaner)
x = {'low-energy-fixed-light-count': '', 'address': 'Flat 28, 22, Adelina Grove', 'uprn-source': 'Address Matched',
'floor-height': '', 'heating-cost-potential': '668', 'unheated-corridor-length': '7.73',
'hot-water-cost-potential': '190', 'construction-age-band': 'England and Wales: 1991-1995',
'potential-energy-rating': 'D', 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Average',
'lighting-energy-eff': 'Average', 'environment-impact-potential': '46',
'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '1081', 'address3': '',
'mainheatcont-description': 'No time or thermostatic control of room temperature',
'sheating-energy-eff': 'N/A', 'property-type': 'Flat', 'local-authority-label': 'Tower Hamlets',
'fixed-lighting-outlets-count': '', 'energy-tariff': 'dual', 'mechanical-ventilation': 'natural',
'hot-water-cost-current': '190', 'county': 'Greater London Authority', 'postcode': 'E1 3BX',
'solar-water-heating-flag': 'N', 'constituency': 'E14000555', 'co2-emissions-potential': '5.2',
'number-heated-rooms': '2', 'floor-description': '(another dwelling below)',
'energy-consumption-potential': '301', 'local-authority': 'E09000030', 'built-form': 'Semi-Detached',
'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
'inspection-date': '2018-09-05', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '53',
'address1': 'Flat 28', 'heat-loss-corridor': 'unheated corridor', 'flat-storey-count': '',
'constituency-label': 'Bethnal Green and Bow', 'roof-energy-eff': 'Average', 'total-floor-area': '103.0',
'building-reference-number': '4441803568', 'environment-impact-current': '44', 'co2-emissions-current': '5.5',
'roof-description': 'Pitched, insulated (assumed)', 'floor-energy-eff': 'NO DATA!',
'number-habitable-rooms': '2', 'address2': '22, Adelina Grove', 'hot-water-env-eff': 'Poor',
'posttown': 'LONDON', 'mainheatc-energy-eff': 'Very Poor', 'main-fuel': 'electricity (not community)',
'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 25% of fixed outlets',
'roof-env-eff': 'Average', 'walls-energy-eff': 'Good', 'photo-supply': '', 'lighting-cost-potential': '84',
'mainheat-env-eff': 'Very Poor', 'multi-glaze-proportion': '100', 'main-heating-controls': '2701',
'lodgement-datetime': '2018-09-06 17:25:59', 'flat-top-storey': 'Y', 'current-energy-rating': 'E',
'secondheat-description': 'None', 'walls-env-eff': 'Good', 'transaction-type': 'rental (private)',
'uprn': '6032920', 'current-energy-efficiency': '48', 'energy-consumption-current': '316',
'mainheat-description': 'Electric ceiling heating', 'lighting-cost-current': '147',
'lodgement-date': '2018-09-06', 'extension-count': '1', 'mainheatc-env-eff': 'Very Poor',
'lmk-key': '175926409402018090617255958380158', 'wind-turbine-count': '0', 'tenure': 'rental (private)',
'floor-level': '4th', 'potential-energy-efficiency': '67', 'hot-water-energy-eff': 'Average',
'low-energy-lighting': '25', 'walls-description': 'Solid brick, as built, insulated (assumed)',
'hotwater-description': 'Electric immersion, off-peak'}
from utils.uvalue_estimates import classify_decile_newvalues
total_floor_area_group_decile = UvalueEstimations.classify_decile_newvalues(
decile_boundaries=uvalue_estimates.walls_decile_data["decile_boundaries"],
decile_labels=uvalue_estimates.walls_decile_data["decile_labels"],
new_values=[float(x["total-floor-area"])],
)[0]
u_value_estimate = uvalue_estimates.walls[
(uvalue_estimates.walls["local-authority"] == x["local-authority"]) &
(uvalue_estimates.walls["property-type"] == x["property-type"]) &
(uvalue_estimates.walls["built-form"] == x["built-form"]) &
(uvalue_estimates.walls["walls-energy-eff"] == x["walls-energy-eff"]) &
(uvalue_estimates.walls["walls-env-eff"] == x["walls-env-eff"]) &
(uvalue_estimates.walls["total-floor-area_group"] == total_floor_area_group_decile)
]
uvalue_estimates.walls[
uvalue_estimates.walls
]
# all_data = {
# "input_properties": input_properties,
# "cleaner": cleaner,
# "uvalue_estimates": uvalue_estimates,
# "land_registry_client": land_registry_client,
# "borehole_client": borehole_client,
# "conservation_area_client": conservation_area_client,
# "open_uprn_client": open_uprn_client,
# "data": data
# }
# import pickle
# with open("all_data.pkl", "wb") as f:
# pickle.dump(all_data, f)
# input_properties[4].data["address1"]
# input_properties[4].data["postcode"]
# floors_df["address1"].values[4]
# floors_df["original_description"].values[4]
#
# df = pd.DataFrame(
# [
# x.data for x in input_properties
# ]
# )
# df["property-type"].unique()
#
# from model_data.recommendations.WallRecommendations import WallRecommendations
# all_res = []
# for p in input_properties:
# inst = WallRecommendations(property_instance=p, uvalue_estimates=uvalue_estimates)
# inst.recommend()
# n_recs = len(inst.recommendations)
# all_res.append(n_recs)
#
# self = WallRecommendations(property_instance=input_properties[2], uvalue_estimates=uvalue_estimates)
# input_properties[6].walls
# self.recommend()
# df = pd.DataFrame(self.recommendations[0]["parts"])
# recommendations = pd.DataFrame(self.recommendations)
#
# from model_data.recommendations.FloorRecommendations import FloorRecommendations
# self = FloorRecommendations(property_instance=input_properties[4], uvalue_estimates=uvalue_estimates)
# self.recommendations
# self.recommend()
# self.recommendations
#
# # We need to deduce a U-value for "Good" energy effieciency
#
# mainheating = pd.DataFrame(
# [{"address1": p.address1, "postcode": p.postcode, **p.main_heating} for p in input_properties])
# hotwater = pd.DataFrame([{"address1": p.address1, **p.hotwater} for p in input_properties])
#
# mainheating[["address1", "postcode"]]
#
# # TODO: I want to knwo what "Good" efficiency means for the description
# # 'Flat 28, 22 Adelina Grove' 'Solid brick, as built, insulated (assumed)'
# # so to do this, filter on the local authority code and property type, where we have U
# # values for the wall and take a median!
#
# p = input_properties[6]
# df = pd.DataFrame(data)
#
# res = []
# for p in input_properties:
# distances = []
# for borehole in tqdm(borehole_client.data, total=len(borehole_client.data)):
# dist_meeters, _ = borehole_client.distance_between_bng_coords(
# x1_bng=p.coordinates['x_coordinate'],
# y1_bng=p.coordinates['y_coordinate'],
# x2_bng=float(borehole['EASTING']),
# y2_bng=float(borehole['NORTHING'])
# )
# distances.append(dist_meeters)
#
# res.append(
# {
# "uprn": int(p.data["uprn"]),
# "meters_to_nearest_borehole": min(distances)
# }
#
# )
# res = pd.DataFrame(res)
#
# properties_dataset = [
# {
# **p.data,
# "in_conservation_area": p.in_conservation_area,
# **p.coordinates,
#
# } for p in input_properties
# ]
#
# properties_dataset = pd.DataFrame(properties_dataset)
# properties_dataset = properties_dataset.merge(res, on="uprn", how="left")
#
# properties_dataset.to_csv("properties_dataset.csv")
# We test estimating gain
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
df = pd.DataFrame(data)
# We need to split the data into a train and test set for model build
# If these categorical variables are not of type 'category', convert them
print(results.summary())
grouped_error = []
groupby = ["mainheat-description"]
for group, data in model_data.groupby(groupby, observed=True):
group_fit_error, _ = calculate_regression_metrics(y_true=data[response].astype(float), y_pred=data["fit"])
# plot_regression(pd.DataFrame({"fit": data["fit"].values, "actual": data[response].astype(float).values}))
grouped_error.append(
{
**dict(zip(groupby, group)),
"n_samples": data.shape[0],
**group_fit_error,
}
)
grouped_error = pd.DataFrame(grouped_error)
grouped_error = grouped_error.sort_values("R2 Score", ascending=True)
plot_regression(fit_df)
model_data[["thermal_transmittance", response]].corr()
summary = model_data.groupby(["property-type", "built-form"], observed=True)[
["thermal_transmittance", response]
].corr()
summary = (
model_data
.groupby(component_features + base_features)
.agg({response: 'median', "idx": 'size'})
.reset_index()
)
summary = summary.sort_values("walls-description")
example = summary[
(summary["walls-description"].isin(
[
"Solid brick, as built, no insulation (assumed)",
"Solid brick, as built, partial insulation (assumed)",
"Solid brick, as built, insulated (assumed)",
]
)) &
(summary["property-type"] == "House") &
(summary["built-form"] == "Detached") &
# (summary["construction-age-band"] == "England and Wales: 1976-1982")
(summary["number-habitable-rooms"] == "4")
]
from textblob import TextBlob
converter = TextBlob("excelent lighting in this hosehold")
from model_data.utils import correct_spelling
result = correct_spelling("excelent lighting in this hosehold")
print(result)
'excellent lighting in this household'
def app():
"""
For a pre-defined list of constituencies and property types, we'll download EPC data from the API
@ -425,4 +82,6 @@ def app():
uvalue_estimates.get_estimates(cleaner=cleaner)
# TODO: Store these to a db
uvalue_estimates.floors_decile_data
sap_model = SapModel(data=data, cleaner=cleaner)
sap_model.run()
# TODO: Store outputs to db

View file

@ -18,4 +18,3 @@ statsmodels
scikit-learn
pyspellchecker
textblob
xgboost

View file

@ -1,3 +1,2 @@
xgboost
statsmodels
scikit-learn

View file

@ -1,7 +1,7 @@
import math
from typing import List
from model_data.BaseUtility import BaseUtility
from model_data.Property import Property
from backend.Property import Property
from model_data.rdsap_tables import default_wall_thickness, age_band_data
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,

View file

@ -1,7 +1,7 @@
import itertools
import math
from model_data.Property import Property
from backend.Property import Property
from model_data.BaseUtility import BaseUtility
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,

View file

@ -1,5 +1,5 @@
from copy import deepcopy
from model_data.Property import Property
from backend.Property import Property
from statistics import mean

View file

@ -5,6 +5,17 @@ from unittest.mock import Mock
from recommendations.FloorRecommendations import FloorRecommendations
# with open(
# os.path.abspath(os.path.dirname(__file__)) + "/recommendations/tests/test_data/input_properties.pkl", "rb"
# ) as f:
# input_properties = pickle.load(f)
#
# with open(
# os.path.abspath(os.path.dirname(__file__)) + "/recommendations/tests/test_data/uvalue_estimates.pkl", "rb"
# ) as f:
# uvalue_estimates = pickle.load(f)
class TestWallRecommendations:
@pytest.fixture
@ -106,7 +117,7 @@ class TestWallRecommendations:
recommender.recommend()
assert not recommender.property.floor["is_suspended"]
assert recommender.property.floor["is_solid"]
assert recommender.estimated_u_value == 0.7361642182695053
assert recommender.estimated_u_value == 0.7528014214215474
assert recommender.recommendations
types = {part["type"] for x in recommender.recommendations for part in x["parts"]}

View file

@ -7,7 +7,7 @@ import numpy as np
from unittest.mock import Mock, MagicMock
from recommendations.WallRecommendations import WallRecommendations
from model_data.analysis.UvalueEstimations import UvalueEstimations
from model_data.Property import Property
from backend.Property import Property
from recommendations.recommendation_utils import is_diminishing_returns

View file

@ -28,21 +28,31 @@ provider:
package:
individually: true
# include:
# - backend/**
# # Might need to refine the paths that are included
# - model_data/**
exclude:
- model_data/local_data/**
- model_data/tests/**
- infrastructure/**
- data_collection/**
- node_modules/**
- conservation_areas/**
- open_uprn/**
- land_registry/**
- recommendations/tests/**
- pytest.ini
patterns:
- backend/**
- !backend/tests/**
- recommendations/**
- !recommendations/tests/**
- model_data/BaseUtility.py
- !model_data/**
- !infrastructure/**
- !data_collection/**
- !node_modules/**
- !conservation_areas/**
- !open_uprn/**
- !land_registry/**
- !pytest.ini
# exclude:
# - model_data/local_data/**
# - model_data/tests/**
# - infrastructure/**
# - data_collection/**
# - node_modules/**
# - conservation_areas/**
# - open_uprn/**
# - land_registry/**
# - recommendations/tests/**
# - pytest.ini
plugins:
- serverless-python-requirements