mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
extending description cleaning for expanded data
This commit is contained in:
parent
2a002c1faf
commit
a5ef3b8483
4 changed files with 84 additions and 14 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -6,7 +6,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/model_data" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (hestia-data)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -1,6 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (hestia-data)" project-jdk-type="Python SDK" />
|
||||
<component name="PythonCompatibilityInspectionAdvertiser">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ def handler():
|
|||
# TODO: Do this at a constituency level
|
||||
constituencies = {p.data["constituency"] for p in input_properties}
|
||||
property_types = ["bungalow", "flat", "house", "maisonette", "park home"]
|
||||
floor_areas = ["unknown", "s", "m", "l", "xl", "xxl", "xxxl"]
|
||||
|
||||
# We pull properties from local authorities, by property type. This will allow us to build
|
||||
# a dataset of up to 10k properties per local authority/property type combination
|
||||
|
|
@ -82,24 +83,28 @@ def handler():
|
|||
data = []
|
||||
for c in tqdm(constituencies):
|
||||
for pt in property_types:
|
||||
data.extend(
|
||||
pagenated_epc_download(
|
||||
client=epc_client,
|
||||
params={
|
||||
"constituency": c,
|
||||
"property-type": pt,
|
||||
"from-month": 8,
|
||||
"from-year": 2014,
|
||||
},
|
||||
page_size=5000,
|
||||
n_pages=10,
|
||||
for fa in floor_areas:
|
||||
data.extend(
|
||||
pagenated_epc_download(
|
||||
client=epc_client,
|
||||
params={
|
||||
"constituency": c,
|
||||
"property-type": pt,
|
||||
"from-month": 8,
|
||||
"from-year": 2014,
|
||||
"floor-area": fa,
|
||||
},
|
||||
page_size=5000,
|
||||
n_pages=10,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Incorporate input data into cleaning
|
||||
cleaner = EpcClean(data + [p.data for p in input_properties])
|
||||
cleaner.clean()
|
||||
|
||||
z = [x for x in data if x["floor-description"] == "(anheddiad arall islaw)"]
|
||||
|
||||
address_meta = [
|
||||
{
|
||||
"postcode": x["postcode"].upper(),
|
||||
|
|
@ -137,6 +142,60 @@ def handler():
|
|||
uvalue_estimates = UvalueEstimations(data=data)
|
||||
uvalue_estimates.get_estimates(cleaner=cleaner)
|
||||
|
||||
x = {'low-energy-fixed-light-count': '', 'address': 'Flat 28, 22, Adelina Grove', 'uprn-source': 'Address Matched',
|
||||
'floor-height': '', 'heating-cost-potential': '668', 'unheated-corridor-length': '7.73',
|
||||
'hot-water-cost-potential': '190', 'construction-age-band': 'England and Wales: 1991-1995',
|
||||
'potential-energy-rating': 'D', 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Average',
|
||||
'lighting-energy-eff': 'Average', 'environment-impact-potential': '46',
|
||||
'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '1081', 'address3': '',
|
||||
'mainheatcont-description': 'No time or thermostatic control of room temperature',
|
||||
'sheating-energy-eff': 'N/A', 'property-type': 'Flat', 'local-authority-label': 'Tower Hamlets',
|
||||
'fixed-lighting-outlets-count': '', 'energy-tariff': 'dual', 'mechanical-ventilation': 'natural',
|
||||
'hot-water-cost-current': '190', 'county': 'Greater London Authority', 'postcode': 'E1 3BX',
|
||||
'solar-water-heating-flag': 'N', 'constituency': 'E14000555', 'co2-emissions-potential': '5.2',
|
||||
'number-heated-rooms': '2', 'floor-description': '(another dwelling below)',
|
||||
'energy-consumption-potential': '301', 'local-authority': 'E09000030', 'built-form': 'Semi-Detached',
|
||||
'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
|
||||
'inspection-date': '2018-09-05', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '53',
|
||||
'address1': 'Flat 28', 'heat-loss-corridor': 'unheated corridor', 'flat-storey-count': '',
|
||||
'constituency-label': 'Bethnal Green and Bow', 'roof-energy-eff': 'Average', 'total-floor-area': '103.0',
|
||||
'building-reference-number': '4441803568', 'environment-impact-current': '44', 'co2-emissions-current': '5.5',
|
||||
'roof-description': 'Pitched, insulated (assumed)', 'floor-energy-eff': 'NO DATA!',
|
||||
'number-habitable-rooms': '2', 'address2': '22, Adelina Grove', 'hot-water-env-eff': 'Poor',
|
||||
'posttown': 'LONDON', 'mainheatc-energy-eff': 'Very Poor', 'main-fuel': 'electricity (not community)',
|
||||
'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
|
||||
'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 25% of fixed outlets',
|
||||
'roof-env-eff': 'Average', 'walls-energy-eff': 'Good', 'photo-supply': '', 'lighting-cost-potential': '84',
|
||||
'mainheat-env-eff': 'Very Poor', 'multi-glaze-proportion': '100', 'main-heating-controls': '2701',
|
||||
'lodgement-datetime': '2018-09-06 17:25:59', 'flat-top-storey': 'Y', 'current-energy-rating': 'E',
|
||||
'secondheat-description': 'None', 'walls-env-eff': 'Good', 'transaction-type': 'rental (private)',
|
||||
'uprn': '6032920', 'current-energy-efficiency': '48', 'energy-consumption-current': '316',
|
||||
'mainheat-description': 'Electric ceiling heating', 'lighting-cost-current': '147',
|
||||
'lodgement-date': '2018-09-06', 'extension-count': '1', 'mainheatc-env-eff': 'Very Poor',
|
||||
'lmk-key': '175926409402018090617255958380158', 'wind-turbine-count': '0', 'tenure': 'rental (private)',
|
||||
'floor-level': '4th', 'potential-energy-efficiency': '67', 'hot-water-energy-eff': 'Average',
|
||||
'low-energy-lighting': '25', 'walls-description': 'Solid brick, as built, insulated (assumed)',
|
||||
'hotwater-description': 'Electric immersion, off-peak'}
|
||||
from utils.uvalue_estimates import classify_decile_newvalues
|
||||
total_floor_area_group_decile = UvalueEstimations.classify_decile_newvalues(
|
||||
decile_boundaries=uvalue_estimates.walls_decile_data["decile_boundaries"],
|
||||
decile_labels=uvalue_estimates.walls_decile_data["decile_labels"],
|
||||
new_values=[float(x["total-floor-area"])],
|
||||
)[0]
|
||||
|
||||
u_value_estimate = uvalue_estimates.walls[
|
||||
(uvalue_estimates.walls["local-authority"] == x["local-authority"]) &
|
||||
(uvalue_estimates.walls["property-type"] == x["property-type"]) &
|
||||
(uvalue_estimates.walls["built-form"] == x["built-form"]) &
|
||||
(uvalue_estimates.walls["walls-energy-eff"] == x["walls-energy-eff"]) &
|
||||
(uvalue_estimates.walls["walls-env-eff"] == x["walls-env-eff"]) &
|
||||
(uvalue_estimates.walls["total-floor-area_group"] == total_floor_area_group_decile)
|
||||
]
|
||||
|
||||
uvalue_estimates.walls[
|
||||
uvalue_estimates.walls
|
||||
]
|
||||
|
||||
# all_data = {
|
||||
# "input_properties": input_properties,
|
||||
# "cleaner": cleaner,
|
||||
|
|
|
|||
|
|
@ -12,12 +12,23 @@ class FloorAttributes(BaseUtility):
|
|||
|
||||
OBSERVED_ERRORS = ["Conservatory"]
|
||||
|
||||
WELSH_TEXT = {
|
||||
"(anheddiad arall islaw)": "(another dwelling below)",
|
||||
}
|
||||
|
||||
def __init__(self, description: str):
|
||||
self.description: str = description.lower()
|
||||
|
||||
self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or (
|
||||
description in self.OBSERVED_ERRORS)
|
||||
|
||||
# Try and perform a translation, incase it's in welsh
|
||||
translation = self.WELSH_TEXT.get(self.description)
|
||||
|
||||
if translation:
|
||||
self.nodata = False
|
||||
self.description = translation
|
||||
|
||||
if not self.nodata and not any(
|
||||
rt in self.description for rt in
|
||||
self.FLOOR_TYPES + self.DWELLING_BELOW + ["average thermal transmittance"]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue