diff --git a/.idea/Model.iml b/.idea/Model.iml
index 09f2e496..b9459684 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index fb10c6b0..5914e57c 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py
index 5f6eaa32..01da1e6c 100644
--- a/backend/tests/test_funding.py
+++ b/backend/tests/test_funding.py
@@ -3,6 +3,8 @@ import pandas as pd
from backend.Funding import Funding, EligibilityCaveats
from backend.tests.test_data.innovation_measure_fixtures import innovation_scenarios
from backend.tests.test_data.pre_heating_scenarios import pre_main_heating_scenarios
+from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
+from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
@pytest.fixture
@@ -1040,99 +1042,18 @@ def test_map_to_pre_main_heating(scenario):
"expected"], f"Failed: {scenario['description']} -> {result} (expected {scenario['expected']})"
-# Large scale testing for various measures
-measures = [
- {"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
- {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
- {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0},
- {"type": "external_wall_insulation", "is_innovation": False, "uplift": 0},
- {"type": "loft_insulation", "is_innovation": False, "uplift": 0},
- {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0},
- {"type": "double_glazing", "is_innovation": False, "uplift": 0},
- {"type": "cavity_wall_insulation", "is_innovation": True, "uplift": 0.25},
- {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "uplift": 0},
-]
-epc_df = pd.read_csv(
- "/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv"
-)
-from tqdm import tqdm
-from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
-from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
-
# TODO: Add innovation uplift to private
+raise ValueError("TODO: ADD INNOVATION TO PRIVATE")
-mock_project_scores_matrix = mock_project_scores_matrix()
-mock_whlg_postcodes = mock_whlg_postcodes()
-mock_partial_scores_matrix = mock_partial_scores_matrix()
-
-errors = []
-for _, x in tqdm(epc_df.iterrows(), total=len(epc_df)):
- try:
- # inputs
- mainheat_energy_eff = x["MAINHEAT_ENERGY_EFF"]
- heating_cleaner = MainHeatAttributes(description=x["MAINHEAT_DESCRIPTION"])
- fuel_cleaner = MainFuelAttributes(description="" if pd.isnull(x["MAIN_FUEL"]) else x["MAIN_FUEL"])
-
- h = heating_cleaner.process()
- f = fuel_cleaner.process()
-
- funding = Funding(
- project_scores_matrix=mock_project_scores_matrix,
- partial_project_scores_matrix=mock_partial_scores_matrix,
- whlg_eligible_postcodes=mock_whlg_postcodes,
- social_cavity_abs_rate=13.5,
- social_solid_abs_rate=17,
- private_cavity_abs_rate=13.5,
- private_solid_abs_rate=17,
- tenure="Social"
- )
-
- self = funding
- measures = measures
- starting_sap = 33
- ending_sap = 69
- floor_area = 71
- mainheat_description = x["MAINHEAT_DESCRIPTION"]
- heating_control_description = x["MAINHEATCONT_DESCRIPTION"]
- is_cavity = True
- current_wall_uvalue = 2
- is_partial = False
- existing_li_thickness = 0
- has_wall_insulation_recommendation = True
- has_roof_insulation_recommendation = True
- mainheating = h
- main_fuel = f
- mainheat_energy_eff = mainheat_energy_eff
-
- funding.check_funding(
- measures=measures,
- starting_sap=33,
- ending_sap=69,
- floor_area=71,
- mainheat_description=x["MAINHEAT_DESCRIPTION"],
- heating_control_description=x["MAINHEATCONT_DESCRIPTION"],
- is_cavity=True,
- current_wall_uvalue=2,
- is_partial=False,
- existing_li_thickness=0,
- has_wall_insulation_recommendation=True,
- has_roof_insulation_recommendation=True,
- mainheating=h,
- main_fuel=f,
- mainheat_energy_eff=mainheat_energy_eff,
- )
- except Exception as e:
- errors.append(x["LMK_KEY"])
-
-errored_epcs = epc_df[epc_df["LMK_KEY"].isin(errors)]
-unique_combs = errored_epcs[["MAINHEAT_ENERGY_EFF", "MAINHEAT_DESCRIPTION", "MAIN_FUEL"]].drop_duplicates()
-i = 2
-x = errored_epcs[
- (errored_epcs["MAINHEAT_ENERGY_EFF"] == unique_combs["MAINHEAT_ENERGY_EFF"].values[i]) &
- (errored_epcs["MAINHEAT_DESCRIPTION"] == unique_combs["MAINHEAT_DESCRIPTION"].values[i]) &
- (errored_epcs["MAIN_FUEL"] == unique_combs["MAIN_FUEL"].values[i])
- ].head(1).squeeze()
-
-most_prominent_combinations = epc_df.groupby(
- ["MAINHEAT_ENERGY_EFF", "MAINHEAT_DESCRIPTION", "MAIN_FUEL"]
-)["LMK_KEY"].nunique().reset_index().sort_values("LMK_KEY", ascending=False).head(30).to_dict("records")
+# Large scale testing for various measures
+# measures = [
+# {"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
+# {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
+# {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0},
+# {"type": "external_wall_insulation", "is_innovation": False, "uplift": 0},
+# {"type": "loft_insulation", "is_innovation": False, "uplift": 0},
+# {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0},
+# {"type": "double_glazing", "is_innovation": False, "uplift": 0},
+# {"type": "cavity_wall_insulation", "is_innovation": True, "uplift": 0.25},
+# {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "uplift": 0},
+# ]
diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py
index a3c1018f..ff8fc95a 100644
--- a/etl/epc_clean/app.py
+++ b/etl/epc_clean/app.py
@@ -3,11 +3,12 @@ import os
import pandas as pd
import msgpack
import inspect
+from datetime import datetime
from etl.epc_clean.EpcClean import EpcClean
from etl.epc.settings import EARLIEST_EPC_DATE
from pathlib import Path
-from utils.s3 import save_data_to_s3
+from utils.s3 import save_data_to_s3, read_from_s3
src_file_path = inspect.getfile(lambda: None)
@@ -22,7 +23,7 @@ LAND_REGISTRY_PATHS = [
os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2017-part2.csv",
]
-EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+EPC_DIRECTORY = Path("/Users/khalimconn-kowlessar/Downloads") / "all-domestic-certificates"
ENVIRONMENT = os.getenv("ENVIRONMENT", "dev")
@@ -74,6 +75,18 @@ def app():
# data being read in will be extremely small, meaning quicker load times. We'll begin by storing as a single
# file and monitor usage patterns to see if it makes sense to split the data up
+ # TODO: Copy the existing cleaned to an archive location, in case we wish to roll back easily
+ cleaned_historic = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name=f"retrofit-data-{ENVIRONMENT}"
+ )
+ cleaned_historic = msgpack.unpackb(cleaned_historic, raw=False)
+ save_data_to_s3(
+ data=msgpack.packb(cleaned_historic, use_bin_type=True),
+ s3_file_name=f"cleaned_epc_data/archive/{str(datetime.now())} - cleaned.bson",
+ bucket_name=f"retrofit-data-{ENVIRONMENT}"
+ )
+
save_data_to_s3(
data=msgpack.packb(cleaned_data, use_bin_type=True),
s3_file_name="cleaned_epc_data/cleaned.bson",
diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py
index 1dcaa549..85860bbf 100644
--- a/etl/epc_clean/epc_attributes/MainheatAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py
@@ -74,7 +74,10 @@ class MainHeatAttributes(Definitions):
"dim system ar gael, rhagdybir bod gwresogyddion trydan, trydan": "no system present, electric heaters assumed",
# Should be handled by edge cases
", trydan": ", electric",
- 'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas'
+ 'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas',
+ "bwyler a rheiddiaduron, nwy prif gyflenwad, gwresogyddion ystafell, trydan": "Boiler and radiators, "
+ "mains gas, Room heaters, "
+ "electric"
}
REMAP = {
diff --git a/etl/epc_clean/requirements.txt b/etl/epc_clean/requirements.txt
index e69de29b..ca6d6981 100644
--- a/etl/epc_clean/requirements.txt
+++ b/etl/epc_clean/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+pandas
+msgpack
+textblob
+boto3
\ No newline at end of file