diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..b9459684 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..5914e57c 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py index 5f6eaa32..01da1e6c 100644 --- a/backend/tests/test_funding.py +++ b/backend/tests/test_funding.py @@ -3,6 +3,8 @@ import pandas as pd from backend.Funding import Funding, EligibilityCaveats from backend.tests.test_data.innovation_measure_fixtures import innovation_scenarios from backend.tests.test_data.pre_heating_scenarios import pre_main_heating_scenarios +from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes +from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes @pytest.fixture @@ -1040,99 +1042,18 @@ def test_map_to_pre_main_heating(scenario): "expected"], f"Failed: {scenario['description']} -> {result} (expected {scenario['expected']})" -# Large scale testing for various measures -measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "external_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}, - {"type": "double_glazing", "is_innovation": False, "uplift": 0}, - {"type": "cavity_wall_insulation", "is_innovation": True, "uplift": 0.25}, - {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "uplift": 0}, -] -epc_df = pd.read_csv( - "/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv" -) -from tqdm import tqdm -from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes -from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes - # TODO: Add innovation uplift to private +raise ValueError("TODO: ADD INNOVATION TO PRIVATE") -mock_project_scores_matrix = mock_project_scores_matrix() -mock_whlg_postcodes = mock_whlg_postcodes() -mock_partial_scores_matrix = mock_partial_scores_matrix() - -errors = [] -for _, x in tqdm(epc_df.iterrows(), total=len(epc_df)): - try: - # inputs - mainheat_energy_eff = x["MAINHEAT_ENERGY_EFF"] - heating_cleaner = MainHeatAttributes(description=x["MAINHEAT_DESCRIPTION"]) - fuel_cleaner = MainFuelAttributes(description="" if pd.isnull(x["MAIN_FUEL"]) else x["MAIN_FUEL"]) - - h = heating_cleaner.process() - f = fuel_cleaner.process() - - funding = Funding( - project_scores_matrix=mock_project_scores_matrix, - partial_project_scores_matrix=mock_partial_scores_matrix, - whlg_eligible_postcodes=mock_whlg_postcodes, - social_cavity_abs_rate=13.5, - social_solid_abs_rate=17, - private_cavity_abs_rate=13.5, - private_solid_abs_rate=17, - tenure="Social" - ) - - self = funding - measures = measures - starting_sap = 33 - ending_sap = 69 - floor_area = 71 - mainheat_description = x["MAINHEAT_DESCRIPTION"] - heating_control_description = x["MAINHEATCONT_DESCRIPTION"] - is_cavity = True - current_wall_uvalue = 2 - is_partial = False - existing_li_thickness = 0 - has_wall_insulation_recommendation = True - has_roof_insulation_recommendation = True - mainheating = h - main_fuel = f - mainheat_energy_eff = mainheat_energy_eff - - funding.check_funding( - measures=measures, - starting_sap=33, - ending_sap=69, - floor_area=71, - mainheat_description=x["MAINHEAT_DESCRIPTION"], - heating_control_description=x["MAINHEATCONT_DESCRIPTION"], - is_cavity=True, - current_wall_uvalue=2, - is_partial=False, - existing_li_thickness=0, - has_wall_insulation_recommendation=True, - has_roof_insulation_recommendation=True, - mainheating=h, - main_fuel=f, - mainheat_energy_eff=mainheat_energy_eff, - ) - except Exception as e: - errors.append(x["LMK_KEY"]) - -errored_epcs = epc_df[epc_df["LMK_KEY"].isin(errors)] -unique_combs = errored_epcs[["MAINHEAT_ENERGY_EFF", "MAINHEAT_DESCRIPTION", "MAIN_FUEL"]].drop_duplicates() -i = 2 -x = errored_epcs[ - (errored_epcs["MAINHEAT_ENERGY_EFF"] == unique_combs["MAINHEAT_ENERGY_EFF"].values[i]) & - (errored_epcs["MAINHEAT_DESCRIPTION"] == unique_combs["MAINHEAT_DESCRIPTION"].values[i]) & - (errored_epcs["MAIN_FUEL"] == unique_combs["MAIN_FUEL"].values[i]) - ].head(1).squeeze() - -most_prominent_combinations = epc_df.groupby( - ["MAINHEAT_ENERGY_EFF", "MAINHEAT_DESCRIPTION", "MAIN_FUEL"] -)["LMK_KEY"].nunique().reset_index().sort_values("LMK_KEY", ascending=False).head(30).to_dict("records") +# Large scale testing for various measures +# measures = [ +# {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, +# {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, +# {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0}, +# {"type": "external_wall_insulation", "is_innovation": False, "uplift": 0}, +# {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, +# {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}, +# {"type": "double_glazing", "is_innovation": False, "uplift": 0}, +# {"type": "cavity_wall_insulation", "is_innovation": True, "uplift": 0.25}, +# {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "uplift": 0}, +# ] diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py index a3c1018f..ff8fc95a 100644 --- a/etl/epc_clean/app.py +++ b/etl/epc_clean/app.py @@ -3,11 +3,12 @@ import os import pandas as pd import msgpack import inspect +from datetime import datetime from etl.epc_clean.EpcClean import EpcClean from etl.epc.settings import EARLIEST_EPC_DATE from pathlib import Path -from utils.s3 import save_data_to_s3 +from utils.s3 import save_data_to_s3, read_from_s3 src_file_path = inspect.getfile(lambda: None) @@ -22,7 +23,7 @@ LAND_REGISTRY_PATHS = [ os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2017-part2.csv", ] -EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates" +EPC_DIRECTORY = Path("/Users/khalimconn-kowlessar/Downloads") / "all-domestic-certificates" ENVIRONMENT = os.getenv("ENVIRONMENT", "dev") @@ -74,6 +75,18 @@ def app(): # data being read in will be extremely small, meaning quicker load times. We'll begin by storing as a single # file and monitor usage patterns to see if it makes sense to split the data up + # TODO: Copy the existing cleaned to an archive location, in case we wish to roll back easily + cleaned_historic = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name=f"retrofit-data-{ENVIRONMENT}" + ) + cleaned_historic = msgpack.unpackb(cleaned_historic, raw=False) + save_data_to_s3( + data=msgpack.packb(cleaned_historic, use_bin_type=True), + s3_file_name=f"cleaned_epc_data/archive/{str(datetime.now())} - cleaned.bson", + bucket_name=f"retrofit-data-{ENVIRONMENT}" + ) + save_data_to_s3( data=msgpack.packb(cleaned_data, use_bin_type=True), s3_file_name="cleaned_epc_data/cleaned.bson", diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py index 1dcaa549..85860bbf 100644 --- a/etl/epc_clean/epc_attributes/MainheatAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py @@ -74,7 +74,10 @@ class MainHeatAttributes(Definitions): "dim system ar gael, rhagdybir bod gwresogyddion trydan, trydan": "no system present, electric heaters assumed", # Should be handled by edge cases ", trydan": ", electric", - 'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas' + 'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas', + "bwyler a rheiddiaduron, nwy prif gyflenwad, gwresogyddion ystafell, trydan": "Boiler and radiators, " + "mains gas, Room heaters, " + "electric" } REMAP = { diff --git a/etl/epc_clean/requirements.txt b/etl/epc_clean/requirements.txt index e69de29b..ca6d6981 100644 --- a/etl/epc_clean/requirements.txt +++ b/etl/epc_clean/requirements.txt @@ -0,0 +1,5 @@ +tqdm +pandas +msgpack +textblob +boto3 \ No newline at end of file