mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge branch 'etl-michael' of github.com:Hestia-Homes/Model into etl-michael
This commit is contained in:
commit
c86be4a9b6
4 changed files with 16 additions and 5 deletions
|
|
@ -5,6 +5,9 @@ from BaseUtility import Definitions
|
|||
from etl.epc.settings import (
|
||||
DATA_PROCESSOR_SETTINGS,
|
||||
EARLIEST_EPC_DATE,
|
||||
IGNORED_TRANSACTION_TYPES,
|
||||
IGNORED_FLOOR_LEVELS,
|
||||
IGNORED_PROPERTY_TYPES,
|
||||
FULLY_GLAZED_DESCRIPTIONS,
|
||||
AVERAGE_FIXED_FEATURES,
|
||||
BUILT_FORM_REMAP,
|
||||
|
|
@ -416,9 +419,9 @@ class DataProcessor:
|
|||
|
||||
self.data = self.data[~pd.isnull(self.data["UPRN"])]
|
||||
self.data = self.data[self.data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
|
||||
self.data = self.data[self.data["TRANSACTION_TYPE"] != "new dwelling"]
|
||||
self.data = self.data[self.data["TRANSACTION_TYPE"] != IGNORED_TRANSACTION_TYPES]
|
||||
self.data = self.data[
|
||||
~self.data["FLOOR_LEVEL"].isin(["top floor", "mid floor"])
|
||||
~self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
|
||||
]
|
||||
self.data = self.data[self.data[RDSAP_RESPONSE] <= MAX_SAP_SCORE]
|
||||
|
||||
|
|
@ -430,7 +433,7 @@ class DataProcessor:
|
|||
# Because park homes are surveyed unusually (for example, we don't have u-values to
|
||||
# look up for their different components, they need to be collected in survey and aren't reflected in
|
||||
# EPCs) we'll ignore them from the model
|
||||
self.data = self.data[self.data["PROPERTY_TYPE"] != "Park home"]
|
||||
self.data = self.data[self.data["PROPERTY_TYPE"] != IGNORED_PROPERTY_TYPES]
|
||||
|
||||
def clean_multi_glaze_proportion(self) -> None:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ from recommendations.recommendation_utils import (
|
|||
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
|
||||
|
||||
|
||||
def get_cleaned():
|
||||
def get_cleaned_description_mapping():
|
||||
"""
|
||||
This function will retrieve the cleaned dataset from s3 which has the cleaned
|
||||
descriptions for the epc dataset
|
||||
|
|
@ -404,7 +404,7 @@ def app():
|
|||
# Data glossary:
|
||||
# https://epc.opendatacommunities.org/docs/guidance#glossary
|
||||
|
||||
cleaned_lookup = get_cleaned()
|
||||
cleaned_lookup = get_cleaned_description_mapping()
|
||||
|
||||
# List all subdirectories
|
||||
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,4 @@
|
|||
pandas==2.1.3
|
||||
tqdm==4.66.1
|
||||
msgpack==1.0.7
|
||||
boto3==1.29.6
|
||||
|
|
@ -155,6 +155,10 @@ MANDATORY_FIXED_FEATURES = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTITUENCY"]
|
|||
# and Wales from 31 July 2014
|
||||
EARLIEST_EPC_DATE = "2014-08-01"
|
||||
|
||||
IGNORED_TRANSACTION_TYPES = "new dwelling"
|
||||
IGNORED_FLOOR_LEVELS = ["top floor", "mid floor"]
|
||||
IGNORED_PROPERTY_TYPES = "Park home"
|
||||
|
||||
RDSAP_RESPONSE = "CURRENT_ENERGY_EFFICIENCY"
|
||||
HEAT_DEMAND_RESPONSE = "ENERGY_CONSUMPTION_CURRENT"
|
||||
CARBON_RESPONSE = "CO2_EMISSIONS_CURRENT"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue