diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index c5cca599..1fc52fcb 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -1,5 +1,3 @@ -from asset_list.AssetList import DataRemapper - STANDARD_WALL_CONSTRUCTIONS = { "uninsulated cavity", "filled cavity", "partial insulated cavity", "cavity unknown insulation", "timber frame", "uninsulated solid brick", diff --git a/etl/route_march_data_pull/app.py b/etl/route_march_data_pull/app.py index 5960f69b..7bf3cca8 100644 --- a/etl/route_march_data_pull/app.py +++ b/etl/route_march_data_pull/app.py @@ -4,6 +4,8 @@ import json import pandas as pd import numpy as np from tqdm import tqdm +import msgpack +from utils.s3 import read_from_s3 from asset_list.AssetList import AssetList from asset_list.mappings.property_type import PROPERTY_MAPPING from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS @@ -440,8 +442,6 @@ def app(): asset_list.extract_attributes() - import msgpack - from utils.s3 import read_from_s3 cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", bucket_name="retrofit-data-dev" @@ -450,114 +450,7 @@ def app(): asset_list.identify_worktypes(cleaned) - if HAS_NON_INTRUSIVES: - # Empty cavity: - # 1) Has been flagged on the non-intrusives as being empty or partially filled - # 2) The age is before 1995 - # 3) Remove anything that likley has access issues - asset_list["Suitable for Cavity Fill"] = ( - (asset_list["Construction"] == "CAVITY") & - asset_list["Insulated"].isin(["EMPTY", "PARTIAL"]) & - ( - # Shold we defer to the year built provided by the HA? - (asset_list[PROPERTY_YEAR_BUILT] <= 1995) | (asset_list["epc_year_upper_bound"] <= 1995) - ) & - ( - # We check if the property type column contains one of the invalid property types - ~asset_list[PROPERTY_TYPE_COLUMN].str.lower().str.contains("|".join(invalid_property_types_dictionary)) - ) - ) - - # asset_list["Suitable for Extraction"] = - asset_list[ - (asset_list["Construction"] == "Cavity") & - asset_list["Insulated"].isin(["RETRO DRILLED"]) & - ( - (asset_list[PROPERTY_YEAR_BUILT] <= 1995) - ) & - ( - asset_list[PROPERTY_TYPE_COLUMN] - ) - ] - - # 4) Flag properties that look like they're good candidates for solar installs - # Firstly, flag if the fabric is completely done - - insulated_wall_substrings = [ - ", insulated", "with external insulation", "with internal insulation", "filled cavity" - ] - - insulated_roof_substrings = [ - "(another dwelling above)", "limited insulation", "(other premises above)", - ", no insulation", - ] - - def check_solar_insulation_conditions(x): - - if pd.isnull(x["Wall Construction"]): - return None - - if "average thermal transmittance" in x["Wall Construction"].lower(): - # We extract out the u-values - wall_uvalue = extract_thermal_transmittance({}, x["Wall Construction"])[0]["thermal_transmittance"] - roof_uvalue = extract_thermal_transmittance({}, x["Roof Construction"])[0]["thermal_transmittance"] - floor_uvalue = extract_thermal_transmittance({}, x["Floor Construction"])[0]["thermal_transmittance"] - - roof_uvalue = 0 if roof_uvalue is None else roof_uvalue - floor_uvalue = 0 if floor_uvalue is None else floor_uvalue - - # We apply some cutoffs - if wall_uvalue < 0.7 and roof_uvalue < 0.7 and floor_uvalue < 0.7: - return "Walls, Roof and Floor have U-values below 0.7" - - return "Confirm U-values" - - walls_insulated = any( - insulated_substring in x["Wall Construction"].lower() for insulated_substring in insulated_wall_substrings - ) - roof_is_numeric = False - if str(x["Roof Insulation Thickness"]).isdigit(): - roof_is_numeric = True - roof_insulated = int(x["Roof Insulation Thickness"]) >= 200 - else: - roof_insulated = any( - insulated_substring in x["Roof Construction"].lower() for insulated_substring in - insulated_roof_substrings - ) - - floor_is_solid = "solid" in x["Floor Construction"].lower() - - if walls_insulated and roof_insulated and floor_is_solid: - return "Walls Insulated, Roof Insulated, Floor Solid" - - if walls_insulated and floor_is_solid and roof_is_numeric: - return "Walls Insulated, Floor Solid, Loft need top-up" - - return "Not Fully Insulated or no data" - - asset_list["Solar Fabric Condition"] = asset_list.apply(check_solar_insulation_conditions, axis=1) - - asset_list["Good Solar Candidate"] = ( - asset_list["SAP Rating is 75 and below"] & - ~asset_list["Has Solar PV"] & - ( - asset_list["Heating Type"].isin( - [ - "Electric storage heaters", - "Room heaters, electric", - ] - ) | asset_list["Heating Type"].str.contains("heat pump", case=False) - ) & ( - asset_list["Solar Fabric Condition"].isin( - [ - "Walls Insulated, Roof Insulated, Floor Solid", - "Walls, Roof and Floor have U-values below 0.7", - "Walls Insulated, Floor Solid, Loft need top-up" - ] - ) - ) - ) - + # TODO: We should do this breakdown for flats def flat_analysis(asset_list): # We need to deduce the building name - we strip out the house number @@ -596,19 +489,6 @@ def app(): flat_data = flat_analysis(asset_list) - # For all of the columns in transformed_df, prefix with "Recommendation: " - for col in transformed_df.columns: - if col == "row_id": - continue - transformed_df = transformed_df.rename(columns={col: f"Recommendation: {col}"}) - - asset_list = asset_list.merge( - transformed_df, - how="left", - on="row_id" - ) - asset_list = asset_list.drop(columns=["row_id", "index"]) - # Store as an excel filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " EPC Data Pull.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data