mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
94 lines
3.9 KiB
Python
94 lines
3.9 KiB
Python
from tqdm import tqdm
|
|
import os
|
|
|
|
from model_data.config import EPC_AUTH_TOKEN
|
|
from epc_api.client import EpcClient
|
|
from model_data.downloader import pagenated_epc_download
|
|
from model_data.EpcClean import EpcClean
|
|
from model_data.analysis.UvalueEstimations import UvalueEstimations
|
|
from model_data.analysis.SapModel import SapModel
|
|
|
|
LAND_REGISTRY_PATHS = [
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
|
|
]
|
|
|
|
|
|
def app():
|
|
"""
|
|
For a pre-defined list of constituencies and property data_types, we'll download EPC data from the API
|
|
and produce a dataset of cleaned fields so that when we get new properties, we can quickly
|
|
sanitise any description data
|
|
:return:
|
|
"""
|
|
|
|
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
|
|
|
constituencies = {'E14000555', 'E14000726', 'E14000720', 'E14000721', 'E14000553', 'E14000752'}
|
|
property_types = ["bungalow", "flat", "house", "maisonette", "park home"]
|
|
floor_areas = ["unknown", "s", "m", "l", "xl", "xxl", "xxxl"]
|
|
|
|
# We pull properties from local authorities, by property type. This will allow us to build
|
|
# a dataset of up to 10k properties per local authority/property type combination
|
|
# For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were
|
|
# conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England
|
|
# and Wales from 31 July 2014
|
|
# Download data from August 2014 onwards
|
|
data = []
|
|
for c in tqdm(constituencies):
|
|
for pt in property_types:
|
|
for fa in floor_areas:
|
|
data.extend(
|
|
pagenated_epc_download(
|
|
client=epc_client,
|
|
params={
|
|
"constituency": c,
|
|
"property-type": pt,
|
|
"from-month": 8,
|
|
"from-year": 2014,
|
|
"floor-area": fa,
|
|
},
|
|
page_size=5000,
|
|
n_pages=10,
|
|
)
|
|
)
|
|
|
|
# Production of sample data for land registry
|
|
# address_meta = [
|
|
# {
|
|
# "postcode": x["postcode"].upper(),
|
|
# "address1": x["address1"].upper(),
|
|
# "address2": x["address2"].upper(),
|
|
# "address3": x["address3"].upper(),
|
|
# "address": x["address"],
|
|
# "uprn": x["uprn"]
|
|
# } for x in data
|
|
# ]
|
|
#
|
|
# import pickle
|
|
# with open("sample_addresses.pkl", "wb") as f:
|
|
# pickle.dump(address_meta, f)
|
|
|
|
# Incorporate input data into cleaning
|
|
cleaner = EpcClean(data)
|
|
lighting_averages = cleaner.lighting_averages
|
|
# TODO: WE need to store lighting_averages to a db
|
|
# We should also extend these averages so they're by more variables (property type, age band, constituency,
|
|
# etc)
|
|
cleaner.clean()
|
|
# TODO: cleaner.cleaned datasets to a db
|
|
|
|
# TODO: Add property age band into this
|
|
uvalue_estimates = UvalueEstimations(data=data)
|
|
uvalue_estimates.get_estimates(cleaner=cleaner)
|
|
# TODO: Store these to a db
|
|
|
|
sap_model = SapModel(data=data, cleaner=cleaner)
|
|
sap_model.run()
|
|
# TODO: Store outputs to db
|