mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
rebuilding epc clean
This commit is contained in:
parent
3bdadc80df
commit
33ca9b7988
6 changed files with 41 additions and 99 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="epc_clean" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="epc_clean" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ import pandas as pd
|
|||
from backend.Funding import Funding, EligibilityCaveats
|
||||
from backend.tests.test_data.innovation_measure_fixtures import innovation_scenarios
|
||||
from backend.tests.test_data.pre_heating_scenarios import pre_main_heating_scenarios
|
||||
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
|
||||
from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -1040,99 +1042,18 @@ def test_map_to_pre_main_heating(scenario):
|
|||
"expected"], f"Failed: {scenario['description']} -> {result} (expected {scenario['expected']})"
|
||||
|
||||
|
||||
# Large scale testing for various measures
|
||||
measures = [
|
||||
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
|
||||
{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
|
||||
{"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0},
|
||||
{"type": "external_wall_insulation", "is_innovation": False, "uplift": 0},
|
||||
{"type": "loft_insulation", "is_innovation": False, "uplift": 0},
|
||||
{"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0},
|
||||
{"type": "double_glazing", "is_innovation": False, "uplift": 0},
|
||||
{"type": "cavity_wall_insulation", "is_innovation": True, "uplift": 0.25},
|
||||
{"type": "high_heat_retention_storage_heaters", "is_innovation": False, "uplift": 0},
|
||||
]
|
||||
epc_df = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv"
|
||||
)
|
||||
from tqdm import tqdm
|
||||
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
|
||||
from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
|
||||
|
||||
# TODO: Add innovation uplift to private
|
||||
raise ValueError("TODO: ADD INNOVATION TO PRIVATE")
|
||||
|
||||
mock_project_scores_matrix = mock_project_scores_matrix()
|
||||
mock_whlg_postcodes = mock_whlg_postcodes()
|
||||
mock_partial_scores_matrix = mock_partial_scores_matrix()
|
||||
|
||||
errors = []
|
||||
for _, x in tqdm(epc_df.iterrows(), total=len(epc_df)):
|
||||
try:
|
||||
# inputs
|
||||
mainheat_energy_eff = x["MAINHEAT_ENERGY_EFF"]
|
||||
heating_cleaner = MainHeatAttributes(description=x["MAINHEAT_DESCRIPTION"])
|
||||
fuel_cleaner = MainFuelAttributes(description="" if pd.isnull(x["MAIN_FUEL"]) else x["MAIN_FUEL"])
|
||||
|
||||
h = heating_cleaner.process()
|
||||
f = fuel_cleaner.process()
|
||||
|
||||
funding = Funding(
|
||||
project_scores_matrix=mock_project_scores_matrix,
|
||||
partial_project_scores_matrix=mock_partial_scores_matrix,
|
||||
whlg_eligible_postcodes=mock_whlg_postcodes,
|
||||
social_cavity_abs_rate=13.5,
|
||||
social_solid_abs_rate=17,
|
||||
private_cavity_abs_rate=13.5,
|
||||
private_solid_abs_rate=17,
|
||||
tenure="Social"
|
||||
)
|
||||
|
||||
self = funding
|
||||
measures = measures
|
||||
starting_sap = 33
|
||||
ending_sap = 69
|
||||
floor_area = 71
|
||||
mainheat_description = x["MAINHEAT_DESCRIPTION"]
|
||||
heating_control_description = x["MAINHEATCONT_DESCRIPTION"]
|
||||
is_cavity = True
|
||||
current_wall_uvalue = 2
|
||||
is_partial = False
|
||||
existing_li_thickness = 0
|
||||
has_wall_insulation_recommendation = True
|
||||
has_roof_insulation_recommendation = True
|
||||
mainheating = h
|
||||
main_fuel = f
|
||||
mainheat_energy_eff = mainheat_energy_eff
|
||||
|
||||
funding.check_funding(
|
||||
measures=measures,
|
||||
starting_sap=33,
|
||||
ending_sap=69,
|
||||
floor_area=71,
|
||||
mainheat_description=x["MAINHEAT_DESCRIPTION"],
|
||||
heating_control_description=x["MAINHEATCONT_DESCRIPTION"],
|
||||
is_cavity=True,
|
||||
current_wall_uvalue=2,
|
||||
is_partial=False,
|
||||
existing_li_thickness=0,
|
||||
has_wall_insulation_recommendation=True,
|
||||
has_roof_insulation_recommendation=True,
|
||||
mainheating=h,
|
||||
main_fuel=f,
|
||||
mainheat_energy_eff=mainheat_energy_eff,
|
||||
)
|
||||
except Exception as e:
|
||||
errors.append(x["LMK_KEY"])
|
||||
|
||||
errored_epcs = epc_df[epc_df["LMK_KEY"].isin(errors)]
|
||||
unique_combs = errored_epcs[["MAINHEAT_ENERGY_EFF", "MAINHEAT_DESCRIPTION", "MAIN_FUEL"]].drop_duplicates()
|
||||
i = 2
|
||||
x = errored_epcs[
|
||||
(errored_epcs["MAINHEAT_ENERGY_EFF"] == unique_combs["MAINHEAT_ENERGY_EFF"].values[i]) &
|
||||
(errored_epcs["MAINHEAT_DESCRIPTION"] == unique_combs["MAINHEAT_DESCRIPTION"].values[i]) &
|
||||
(errored_epcs["MAIN_FUEL"] == unique_combs["MAIN_FUEL"].values[i])
|
||||
].head(1).squeeze()
|
||||
|
||||
most_prominent_combinations = epc_df.groupby(
|
||||
["MAINHEAT_ENERGY_EFF", "MAINHEAT_DESCRIPTION", "MAIN_FUEL"]
|
||||
)["LMK_KEY"].nunique().reset_index().sort_values("LMK_KEY", ascending=False).head(30).to_dict("records")
|
||||
# Large scale testing for various measures
|
||||
# measures = [
|
||||
# {"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
|
||||
# {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
|
||||
# {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0},
|
||||
# {"type": "external_wall_insulation", "is_innovation": False, "uplift": 0},
|
||||
# {"type": "loft_insulation", "is_innovation": False, "uplift": 0},
|
||||
# {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0},
|
||||
# {"type": "double_glazing", "is_innovation": False, "uplift": 0},
|
||||
# {"type": "cavity_wall_insulation", "is_innovation": True, "uplift": 0.25},
|
||||
# {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "uplift": 0},
|
||||
# ]
|
||||
|
|
|
|||
|
|
@ -3,11 +3,12 @@ import os
|
|||
import pandas as pd
|
||||
import msgpack
|
||||
import inspect
|
||||
from datetime import datetime
|
||||
|
||||
from etl.epc_clean.EpcClean import EpcClean
|
||||
from etl.epc.settings import EARLIEST_EPC_DATE
|
||||
from pathlib import Path
|
||||
from utils.s3 import save_data_to_s3
|
||||
from utils.s3 import save_data_to_s3, read_from_s3
|
||||
|
||||
src_file_path = inspect.getfile(lambda: None)
|
||||
|
||||
|
|
@ -22,7 +23,7 @@ LAND_REGISTRY_PATHS = [
|
|||
os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2017-part2.csv",
|
||||
]
|
||||
|
||||
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
|
||||
EPC_DIRECTORY = Path("/Users/khalimconn-kowlessar/Downloads") / "all-domestic-certificates"
|
||||
|
||||
ENVIRONMENT = os.getenv("ENVIRONMENT", "dev")
|
||||
|
||||
|
|
@ -74,6 +75,18 @@ def app():
|
|||
# data being read in will be extremely small, meaning quicker load times. We'll begin by storing as a single
|
||||
# file and monitor usage patterns to see if it makes sense to split the data up
|
||||
|
||||
# TODO: Copy the existing cleaned to an archive location, in case we wish to roll back easily
|
||||
cleaned_historic = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name=f"retrofit-data-{ENVIRONMENT}"
|
||||
)
|
||||
cleaned_historic = msgpack.unpackb(cleaned_historic, raw=False)
|
||||
save_data_to_s3(
|
||||
data=msgpack.packb(cleaned_historic, use_bin_type=True),
|
||||
s3_file_name=f"cleaned_epc_data/archive/{str(datetime.now())} - cleaned.bson",
|
||||
bucket_name=f"retrofit-data-{ENVIRONMENT}"
|
||||
)
|
||||
|
||||
save_data_to_s3(
|
||||
data=msgpack.packb(cleaned_data, use_bin_type=True),
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
|
|
|
|||
|
|
@ -74,7 +74,10 @@ class MainHeatAttributes(Definitions):
|
|||
"dim system ar gael, rhagdybir bod gwresogyddion trydan, trydan": "no system present, electric heaters assumed",
|
||||
# Should be handled by edge cases
|
||||
", trydan": ", electric",
|
||||
'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas'
|
||||
'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas',
|
||||
"bwyler a rheiddiaduron, nwy prif gyflenwad, gwresogyddion ystafell, trydan": "Boiler and radiators, "
|
||||
"mains gas, Room heaters, "
|
||||
"electric"
|
||||
}
|
||||
|
||||
REMAP = {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
tqdm
|
||||
pandas
|
||||
msgpack
|
||||
textblob
|
||||
boto3
|
||||
Loading…
Add table
Reference in a new issue