Merge branch 'etl-michael' of github.com:Hestia-Homes/Model into etl-michael

This commit is contained in:
Michael Duong 2023-11-28 19:12:45 +00:00
commit c86be4a9b6
4 changed files with 16 additions and 5 deletions

View file

@ -5,6 +5,9 @@ from BaseUtility import Definitions
from etl.epc.settings import (
DATA_PROCESSOR_SETTINGS,
EARLIEST_EPC_DATE,
IGNORED_TRANSACTION_TYPES,
IGNORED_FLOOR_LEVELS,
IGNORED_PROPERTY_TYPES,
FULLY_GLAZED_DESCRIPTIONS,
AVERAGE_FIXED_FEATURES,
BUILT_FORM_REMAP,
@ -416,9 +419,9 @@ class DataProcessor:
self.data = self.data[~pd.isnull(self.data["UPRN"])]
self.data = self.data[self.data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
self.data = self.data[self.data["TRANSACTION_TYPE"] != "new dwelling"]
self.data = self.data[self.data["TRANSACTION_TYPE"] != IGNORED_TRANSACTION_TYPES]
self.data = self.data[
~self.data["FLOOR_LEVEL"].isin(["top floor", "mid floor"])
~self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
]
self.data = self.data[self.data[RDSAP_RESPONSE] <= MAX_SAP_SCORE]
@ -430,7 +433,7 @@ class DataProcessor:
# Because park homes are surveyed unusually (for example, we don't have u-values to
# look up for their different components, they need to be collected in survey and aren't reflected in
# EPCs) we'll ignore them from the model
self.data = self.data[self.data["PROPERTY_TYPE"] != "Park home"]
self.data = self.data[self.data["PROPERTY_TYPE"] != IGNORED_PROPERTY_TYPES]
def clean_multi_glaze_proportion(self) -> None:
"""

View file

@ -28,7 +28,7 @@ from recommendations.recommendation_utils import (
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
def get_cleaned():
def get_cleaned_description_mapping():
"""
This function will retrieve the cleaned dataset from s3 which has the cleaned
descriptions for the epc dataset
@ -404,7 +404,7 @@ def app():
# Data glossary:
# https://epc.opendatacommunities.org/docs/guidance#glossary
cleaned_lookup = get_cleaned()
cleaned_lookup = get_cleaned_description_mapping()
# List all subdirectories
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]

View file

@ -0,0 +1,4 @@
pandas==2.1.3
tqdm==4.66.1
msgpack==1.0.7
boto3==1.29.6

View file

@ -155,6 +155,10 @@ MANDATORY_FIXED_FEATURES = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTITUENCY"]
# and Wales from 31 July 2014
EARLIEST_EPC_DATE = "2014-08-01"
IGNORED_TRANSACTION_TYPES = "new dwelling"
IGNORED_FLOOR_LEVELS = ["top floor", "mid floor"]
IGNORED_PROPERTY_TYPES = "Park home"
RDSAP_RESPONSE = "CURRENT_ENERGY_EFFICIENCY"
HEAT_DEMAND_RESPONSE = "ENERGY_CONSUMPTION_CURRENT"
CARBON_RESPONSE = "CO2_EMISSIONS_CURRENT"