mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
added SapModel to app
This commit is contained in:
parent
8d4e0c956b
commit
3e5d6cc4b4
2 changed files with 5 additions and 351 deletions
|
|
@ -2,8 +2,7 @@ import numpy as np
|
|||
import pandas as pd
|
||||
import statsmodels.api as sm
|
||||
import matplotlib.pyplot as plt
|
||||
import pickle
|
||||
from typing import Any, Dict, Tuple, Optional, List
|
||||
from typing import Dict, Optional, List
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score, \
|
||||
median_absolute_error, mean_absolute_percentage_error
|
||||
|
|
@ -18,10 +17,6 @@ from utils.logger import setup_logger
|
|||
logger = setup_logger()
|
||||
|
||||
|
||||
# with open("all_data.pkl", "rb") as f:
|
||||
# all_data = pickle.load(f)
|
||||
|
||||
|
||||
class SapModel:
|
||||
# We want to estimate for making improvements on different property components
|
||||
RESPONSE = "current-energy-efficiency"
|
||||
|
|
|
|||
|
|
@ -1,16 +1,12 @@
|
|||
from tqdm import tqdm
|
||||
import os
|
||||
from model_data.BoreholeClient import BoreholeClient
|
||||
from model_data.LandRegistryClient import LandRegistryClient
|
||||
|
||||
from model_data.temp_inputs import input_data
|
||||
from model_data.Property import Property
|
||||
from model_data.config import EPC_AUTH_TOKEN
|
||||
from epc_api.client import EpcClient
|
||||
from model_data.downloader import pagenated_epc_download
|
||||
from model_data.EpcClean import EpcClean
|
||||
from open_uprn.OpenUprnClient import OpenUprnClient
|
||||
from model_data.analysis.UvalueEstimations import UvalueEstimations
|
||||
from model_data.analysis.SapModel import SapModel
|
||||
|
||||
LAND_REGISTRY_PATHS = [
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
|
||||
|
|
@ -24,345 +20,6 @@ LAND_REGISTRY_PATHS = [
|
|||
]
|
||||
|
||||
|
||||
def handler():
|
||||
# To begin with, the input data is a list of dictionaries, however we would read this file in
|
||||
|
||||
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
|
||||
input_properties = [
|
||||
Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client)
|
||||
for config in input_data
|
||||
]
|
||||
|
||||
for p in input_properties:
|
||||
p.search_address_epc()
|
||||
p.set_year_built()
|
||||
|
||||
uprns = [p.data['uprn'] for p in input_properties]
|
||||
|
||||
open_uprn_client = OpenUprnClient(
|
||||
path=os.path.abspath(
|
||||
os.path.dirname(__file__)
|
||||
) + "/model_data/local_data/osopenuprn_202306_csv/osopenuprn_202305.csv",
|
||||
uprns=uprns
|
||||
)
|
||||
open_uprn_client.read()
|
||||
|
||||
# We're using Ordinance Survey Open Uprn data
|
||||
# to find the coordinates of each address, which we will then be able to use at a later stage
|
||||
for p in input_properties:
|
||||
p.get_coordinates(open_uprn_client)
|
||||
|
||||
conservation_area_client = ConservationAreaClient(
|
||||
historic_england_path=os.path.abspath(
|
||||
os.path.dirname(__file__)
|
||||
) + "/model_data/local_data/Historic_Eng_Conservation_Areas/Conservation_Areas.shp",
|
||||
gov_path=os.path.abspath(
|
||||
os.path.dirname(__file__)
|
||||
) + "/model_data/local_data/gov-conservation-area.geojson"
|
||||
)
|
||||
conservation_area_client.read()
|
||||
|
||||
# Check if the property is in a conversation area
|
||||
for p in input_properties:
|
||||
in_conservation_area = conservation_area_client.is_in_conservation_area(p.coordinates)
|
||||
p.set_is_in_conservation_area(in_conservation_area)
|
||||
|
||||
local_authorities = {p.data['local-authority'] for p in input_properties}
|
||||
# TODO: Do this at a constituency level
|
||||
constituencies = {p.data["constituency"] for p in input_properties}
|
||||
property_types = ["bungalow", "flat", "house", "maisonette", "park home"]
|
||||
floor_areas = ["unknown", "s", "m", "l", "xl", "xxl", "xxxl"]
|
||||
|
||||
# We pull properties from local authorities, by property type. This will allow us to build
|
||||
# a dataset of up to 10k properties per local authority/property type combination
|
||||
# For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were
|
||||
# conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England
|
||||
# and Wales from 31 July 2014
|
||||
# Download data from August 2014 onwards
|
||||
data = []
|
||||
for c in tqdm(constituencies):
|
||||
for pt in property_types:
|
||||
for fa in floor_areas:
|
||||
data.extend(
|
||||
pagenated_epc_download(
|
||||
client=epc_client,
|
||||
params={
|
||||
"constituency": c,
|
||||
"property-type": pt,
|
||||
"from-month": 8,
|
||||
"from-year": 2014,
|
||||
"floor-area": fa,
|
||||
},
|
||||
page_size=5000,
|
||||
n_pages=10,
|
||||
)
|
||||
)
|
||||
|
||||
# Incorporate input data into cleaning
|
||||
cleaner = EpcClean(data + [p.data for p in input_properties])
|
||||
cleaner.clean()
|
||||
|
||||
z = [x for x in data if x["floor-description"] == "(anheddiad arall islaw)"]
|
||||
|
||||
address_meta = [
|
||||
{
|
||||
"postcode": x["postcode"].upper(),
|
||||
"address1": x["address1"].upper(),
|
||||
"address2": x["address2"].upper(),
|
||||
"address3": x["address3"].upper(),
|
||||
"address": x["address"],
|
||||
"uprn": x["uprn"]
|
||||
} for x in data
|
||||
]
|
||||
|
||||
import pickle
|
||||
with open("sample_addresses.pkl", "wb") as f:
|
||||
pickle.dump(address_meta, f)
|
||||
|
||||
# Land registry
|
||||
land_registry_client = LandRegistryClient(
|
||||
paths=LAND_REGISTRY_PATHS,
|
||||
addresses=address_meta
|
||||
)
|
||||
lr_data = land_registry_client.read()
|
||||
|
||||
# Borehole
|
||||
borehole_client = BoreholeClient(
|
||||
path=os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf"
|
||||
)
|
||||
borehole_client.read()
|
||||
|
||||
# Now, for our input properties, we need to identify the components of the building, based
|
||||
# on the cleaning we've done
|
||||
for p in input_properties:
|
||||
p.get_components(cleaner)
|
||||
|
||||
# TODO: Add property age band into this
|
||||
uvalue_estimates = UvalueEstimations(data=data)
|
||||
uvalue_estimates.get_estimates(cleaner=cleaner)
|
||||
|
||||
x = {'low-energy-fixed-light-count': '', 'address': 'Flat 28, 22, Adelina Grove', 'uprn-source': 'Address Matched',
|
||||
'floor-height': '', 'heating-cost-potential': '668', 'unheated-corridor-length': '7.73',
|
||||
'hot-water-cost-potential': '190', 'construction-age-band': 'England and Wales: 1991-1995',
|
||||
'potential-energy-rating': 'D', 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Average',
|
||||
'lighting-energy-eff': 'Average', 'environment-impact-potential': '46',
|
||||
'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '1081', 'address3': '',
|
||||
'mainheatcont-description': 'No time or thermostatic control of room temperature',
|
||||
'sheating-energy-eff': 'N/A', 'property-type': 'Flat', 'local-authority-label': 'Tower Hamlets',
|
||||
'fixed-lighting-outlets-count': '', 'energy-tariff': 'dual', 'mechanical-ventilation': 'natural',
|
||||
'hot-water-cost-current': '190', 'county': 'Greater London Authority', 'postcode': 'E1 3BX',
|
||||
'solar-water-heating-flag': 'N', 'constituency': 'E14000555', 'co2-emissions-potential': '5.2',
|
||||
'number-heated-rooms': '2', 'floor-description': '(another dwelling below)',
|
||||
'energy-consumption-potential': '301', 'local-authority': 'E09000030', 'built-form': 'Semi-Detached',
|
||||
'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
|
||||
'inspection-date': '2018-09-05', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '53',
|
||||
'address1': 'Flat 28', 'heat-loss-corridor': 'unheated corridor', 'flat-storey-count': '',
|
||||
'constituency-label': 'Bethnal Green and Bow', 'roof-energy-eff': 'Average', 'total-floor-area': '103.0',
|
||||
'building-reference-number': '4441803568', 'environment-impact-current': '44', 'co2-emissions-current': '5.5',
|
||||
'roof-description': 'Pitched, insulated (assumed)', 'floor-energy-eff': 'NO DATA!',
|
||||
'number-habitable-rooms': '2', 'address2': '22, Adelina Grove', 'hot-water-env-eff': 'Poor',
|
||||
'posttown': 'LONDON', 'mainheatc-energy-eff': 'Very Poor', 'main-fuel': 'electricity (not community)',
|
||||
'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
|
||||
'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 25% of fixed outlets',
|
||||
'roof-env-eff': 'Average', 'walls-energy-eff': 'Good', 'photo-supply': '', 'lighting-cost-potential': '84',
|
||||
'mainheat-env-eff': 'Very Poor', 'multi-glaze-proportion': '100', 'main-heating-controls': '2701',
|
||||
'lodgement-datetime': '2018-09-06 17:25:59', 'flat-top-storey': 'Y', 'current-energy-rating': 'E',
|
||||
'secondheat-description': 'None', 'walls-env-eff': 'Good', 'transaction-type': 'rental (private)',
|
||||
'uprn': '6032920', 'current-energy-efficiency': '48', 'energy-consumption-current': '316',
|
||||
'mainheat-description': 'Electric ceiling heating', 'lighting-cost-current': '147',
|
||||
'lodgement-date': '2018-09-06', 'extension-count': '1', 'mainheatc-env-eff': 'Very Poor',
|
||||
'lmk-key': '175926409402018090617255958380158', 'wind-turbine-count': '0', 'tenure': 'rental (private)',
|
||||
'floor-level': '4th', 'potential-energy-efficiency': '67', 'hot-water-energy-eff': 'Average',
|
||||
'low-energy-lighting': '25', 'walls-description': 'Solid brick, as built, insulated (assumed)',
|
||||
'hotwater-description': 'Electric immersion, off-peak'}
|
||||
from utils.uvalue_estimates import classify_decile_newvalues
|
||||
total_floor_area_group_decile = UvalueEstimations.classify_decile_newvalues(
|
||||
decile_boundaries=uvalue_estimates.walls_decile_data["decile_boundaries"],
|
||||
decile_labels=uvalue_estimates.walls_decile_data["decile_labels"],
|
||||
new_values=[float(x["total-floor-area"])],
|
||||
)[0]
|
||||
|
||||
u_value_estimate = uvalue_estimates.walls[
|
||||
(uvalue_estimates.walls["local-authority"] == x["local-authority"]) &
|
||||
(uvalue_estimates.walls["property-type"] == x["property-type"]) &
|
||||
(uvalue_estimates.walls["built-form"] == x["built-form"]) &
|
||||
(uvalue_estimates.walls["walls-energy-eff"] == x["walls-energy-eff"]) &
|
||||
(uvalue_estimates.walls["walls-env-eff"] == x["walls-env-eff"]) &
|
||||
(uvalue_estimates.walls["total-floor-area_group"] == total_floor_area_group_decile)
|
||||
]
|
||||
|
||||
uvalue_estimates.walls[
|
||||
uvalue_estimates.walls
|
||||
]
|
||||
|
||||
# all_data = {
|
||||
# "input_properties": input_properties,
|
||||
# "cleaner": cleaner,
|
||||
# "uvalue_estimates": uvalue_estimates,
|
||||
# "land_registry_client": land_registry_client,
|
||||
# "borehole_client": borehole_client,
|
||||
# "conservation_area_client": conservation_area_client,
|
||||
# "open_uprn_client": open_uprn_client,
|
||||
# "data": data
|
||||
# }
|
||||
|
||||
# import pickle
|
||||
# with open("all_data.pkl", "wb") as f:
|
||||
# pickle.dump(all_data, f)
|
||||
|
||||
# input_properties[4].data["address1"]
|
||||
# input_properties[4].data["postcode"]
|
||||
# floors_df["address1"].values[4]
|
||||
# floors_df["original_description"].values[4]
|
||||
#
|
||||
# df = pd.DataFrame(
|
||||
# [
|
||||
# x.data for x in input_properties
|
||||
# ]
|
||||
# )
|
||||
# df["property-type"].unique()
|
||||
#
|
||||
# from model_data.recommendations.WallRecommendations import WallRecommendations
|
||||
# all_res = []
|
||||
# for p in input_properties:
|
||||
# inst = WallRecommendations(property_instance=p, uvalue_estimates=uvalue_estimates)
|
||||
# inst.recommend()
|
||||
# n_recs = len(inst.recommendations)
|
||||
# all_res.append(n_recs)
|
||||
#
|
||||
# self = WallRecommendations(property_instance=input_properties[2], uvalue_estimates=uvalue_estimates)
|
||||
# input_properties[6].walls
|
||||
# self.recommend()
|
||||
# df = pd.DataFrame(self.recommendations[0]["parts"])
|
||||
# recommendations = pd.DataFrame(self.recommendations)
|
||||
#
|
||||
# from model_data.recommendations.FloorRecommendations import FloorRecommendations
|
||||
# self = FloorRecommendations(property_instance=input_properties[4], uvalue_estimates=uvalue_estimates)
|
||||
# self.recommendations
|
||||
# self.recommend()
|
||||
# self.recommendations
|
||||
#
|
||||
# # We need to deduce a U-value for "Good" energy effieciency
|
||||
#
|
||||
# mainheating = pd.DataFrame(
|
||||
# [{"address1": p.address1, "postcode": p.postcode, **p.main_heating} for p in input_properties])
|
||||
# hotwater = pd.DataFrame([{"address1": p.address1, **p.hotwater} for p in input_properties])
|
||||
#
|
||||
# mainheating[["address1", "postcode"]]
|
||||
#
|
||||
# # TODO: I want to knwo what "Good" efficiency means for the description
|
||||
# # 'Flat 28, 22 Adelina Grove' 'Solid brick, as built, insulated (assumed)'
|
||||
# # so to do this, filter on the local authority code and property type, where we have U
|
||||
# # values for the wall and take a median!
|
||||
#
|
||||
# p = input_properties[6]
|
||||
# df = pd.DataFrame(data)
|
||||
#
|
||||
# res = []
|
||||
# for p in input_properties:
|
||||
# distances = []
|
||||
# for borehole in tqdm(borehole_client.data, total=len(borehole_client.data)):
|
||||
# dist_meeters, _ = borehole_client.distance_between_bng_coords(
|
||||
# x1_bng=p.coordinates['x_coordinate'],
|
||||
# y1_bng=p.coordinates['y_coordinate'],
|
||||
# x2_bng=float(borehole['EASTING']),
|
||||
# y2_bng=float(borehole['NORTHING'])
|
||||
# )
|
||||
# distances.append(dist_meeters)
|
||||
#
|
||||
# res.append(
|
||||
# {
|
||||
# "uprn": int(p.data["uprn"]),
|
||||
# "meters_to_nearest_borehole": min(distances)
|
||||
# }
|
||||
#
|
||||
# )
|
||||
# res = pd.DataFrame(res)
|
||||
#
|
||||
# properties_dataset = [
|
||||
# {
|
||||
# **p.data,
|
||||
# "in_conservation_area": p.in_conservation_area,
|
||||
# **p.coordinates,
|
||||
#
|
||||
# } for p in input_properties
|
||||
# ]
|
||||
#
|
||||
# properties_dataset = pd.DataFrame(properties_dataset)
|
||||
# properties_dataset = properties_dataset.merge(res, on="uprn", how="left")
|
||||
#
|
||||
# properties_dataset.to_csv("properties_dataset.csv")
|
||||
|
||||
# We test estimating gain
|
||||
import pandas as pd
|
||||
pd.set_option('display.max_rows', 500)
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.width', 1000)
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# We need to split the data into a train and test set for model build
|
||||
|
||||
# If these categorical variables are not of type 'category', convert them
|
||||
|
||||
print(results.summary())
|
||||
|
||||
grouped_error = []
|
||||
groupby = ["mainheat-description"]
|
||||
for group, data in model_data.groupby(groupby, observed=True):
|
||||
group_fit_error, _ = calculate_regression_metrics(y_true=data[response].astype(float), y_pred=data["fit"])
|
||||
# plot_regression(pd.DataFrame({"fit": data["fit"].values, "actual": data[response].astype(float).values}))
|
||||
grouped_error.append(
|
||||
{
|
||||
**dict(zip(groupby, group)),
|
||||
"n_samples": data.shape[0],
|
||||
**group_fit_error,
|
||||
}
|
||||
)
|
||||
|
||||
grouped_error = pd.DataFrame(grouped_error)
|
||||
grouped_error = grouped_error.sort_values("R2 Score", ascending=True)
|
||||
|
||||
plot_regression(fit_df)
|
||||
|
||||
model_data[["thermal_transmittance", response]].corr()
|
||||
|
||||
summary = model_data.groupby(["property-type", "built-form"], observed=True)[
|
||||
["thermal_transmittance", response]
|
||||
].corr()
|
||||
|
||||
summary = (
|
||||
model_data
|
||||
.groupby(component_features + base_features)
|
||||
.agg({response: 'median', "idx": 'size'})
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
summary = summary.sort_values("walls-description")
|
||||
|
||||
example = summary[
|
||||
(summary["walls-description"].isin(
|
||||
[
|
||||
"Solid brick, as built, no insulation (assumed)",
|
||||
"Solid brick, as built, partial insulation (assumed)",
|
||||
"Solid brick, as built, insulated (assumed)",
|
||||
]
|
||||
)) &
|
||||
(summary["property-type"] == "House") &
|
||||
(summary["built-form"] == "Detached") &
|
||||
# (summary["construction-age-band"] == "England and Wales: 1976-1982")
|
||||
(summary["number-habitable-rooms"] == "4")
|
||||
]
|
||||
|
||||
from textblob import TextBlob
|
||||
converter = TextBlob("excelent lighting in this hosehold")
|
||||
|
||||
from model_data.utils import correct_spelling
|
||||
result = correct_spelling("excelent lighting in this hosehold")
|
||||
print(result)
|
||||
'excellent lighting in this household'
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
For a pre-defined list of constituencies and property types, we'll download EPC data from the API
|
||||
|
|
@ -425,4 +82,6 @@ def app():
|
|||
uvalue_estimates.get_estimates(cleaner=cleaner)
|
||||
# TODO: Store these to a db
|
||||
|
||||
uvalue_estimates.floors_decile_data
|
||||
sap_model = SapModel(data=data, cleaner=cleaner)
|
||||
sap_model.run()
|
||||
# TODO: Store outputs to db
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue