mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
rebuilding epc clean
This commit is contained in:
parent
3bdadc80df
commit
33ca9b7988
6 changed files with 41 additions and 99 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
||||||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
|
<orderEntry type="jdk" jdkName="epc_clean" jdkType="Python SDK" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
||||||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
||||||
<component name="Black">
|
<component name="Black">
|
||||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="epc_clean" project-jdk-type="Python SDK" />
|
||||||
<component name="PyCharmProfessionalAdvertiser">
|
<component name="PyCharmProfessionalAdvertiser">
|
||||||
<option name="shown" value="true" />
|
<option name="shown" value="true" />
|
||||||
</component>
|
</component>
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ import pandas as pd
|
||||||
from backend.Funding import Funding, EligibilityCaveats
|
from backend.Funding import Funding, EligibilityCaveats
|
||||||
from backend.tests.test_data.innovation_measure_fixtures import innovation_scenarios
|
from backend.tests.test_data.innovation_measure_fixtures import innovation_scenarios
|
||||||
from backend.tests.test_data.pre_heating_scenarios import pre_main_heating_scenarios
|
from backend.tests.test_data.pre_heating_scenarios import pre_main_heating_scenarios
|
||||||
|
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
|
||||||
|
from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
@ -1040,99 +1042,18 @@ def test_map_to_pre_main_heating(scenario):
|
||||||
"expected"], f"Failed: {scenario['description']} -> {result} (expected {scenario['expected']})"
|
"expected"], f"Failed: {scenario['description']} -> {result} (expected {scenario['expected']})"
|
||||||
|
|
||||||
|
|
||||||
# Large scale testing for various measures
|
|
||||||
measures = [
|
|
||||||
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
|
|
||||||
{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
|
|
||||||
{"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0},
|
|
||||||
{"type": "external_wall_insulation", "is_innovation": False, "uplift": 0},
|
|
||||||
{"type": "loft_insulation", "is_innovation": False, "uplift": 0},
|
|
||||||
{"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0},
|
|
||||||
{"type": "double_glazing", "is_innovation": False, "uplift": 0},
|
|
||||||
{"type": "cavity_wall_insulation", "is_innovation": True, "uplift": 0.25},
|
|
||||||
{"type": "high_heat_retention_storage_heaters", "is_innovation": False, "uplift": 0},
|
|
||||||
]
|
|
||||||
epc_df = pd.read_csv(
|
|
||||||
"/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv"
|
|
||||||
)
|
|
||||||
from tqdm import tqdm
|
|
||||||
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
|
|
||||||
from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
|
|
||||||
|
|
||||||
# TODO: Add innovation uplift to private
|
# TODO: Add innovation uplift to private
|
||||||
|
raise ValueError("TODO: ADD INNOVATION TO PRIVATE")
|
||||||
|
|
||||||
mock_project_scores_matrix = mock_project_scores_matrix()
|
# Large scale testing for various measures
|
||||||
mock_whlg_postcodes = mock_whlg_postcodes()
|
# measures = [
|
||||||
mock_partial_scores_matrix = mock_partial_scores_matrix()
|
# {"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
|
||||||
|
# {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
|
||||||
errors = []
|
# {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0},
|
||||||
for _, x in tqdm(epc_df.iterrows(), total=len(epc_df)):
|
# {"type": "external_wall_insulation", "is_innovation": False, "uplift": 0},
|
||||||
try:
|
# {"type": "loft_insulation", "is_innovation": False, "uplift": 0},
|
||||||
# inputs
|
# {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0},
|
||||||
mainheat_energy_eff = x["MAINHEAT_ENERGY_EFF"]
|
# {"type": "double_glazing", "is_innovation": False, "uplift": 0},
|
||||||
heating_cleaner = MainHeatAttributes(description=x["MAINHEAT_DESCRIPTION"])
|
# {"type": "cavity_wall_insulation", "is_innovation": True, "uplift": 0.25},
|
||||||
fuel_cleaner = MainFuelAttributes(description="" if pd.isnull(x["MAIN_FUEL"]) else x["MAIN_FUEL"])
|
# {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "uplift": 0},
|
||||||
|
# ]
|
||||||
h = heating_cleaner.process()
|
|
||||||
f = fuel_cleaner.process()
|
|
||||||
|
|
||||||
funding = Funding(
|
|
||||||
project_scores_matrix=mock_project_scores_matrix,
|
|
||||||
partial_project_scores_matrix=mock_partial_scores_matrix,
|
|
||||||
whlg_eligible_postcodes=mock_whlg_postcodes,
|
|
||||||
social_cavity_abs_rate=13.5,
|
|
||||||
social_solid_abs_rate=17,
|
|
||||||
private_cavity_abs_rate=13.5,
|
|
||||||
private_solid_abs_rate=17,
|
|
||||||
tenure="Social"
|
|
||||||
)
|
|
||||||
|
|
||||||
self = funding
|
|
||||||
measures = measures
|
|
||||||
starting_sap = 33
|
|
||||||
ending_sap = 69
|
|
||||||
floor_area = 71
|
|
||||||
mainheat_description = x["MAINHEAT_DESCRIPTION"]
|
|
||||||
heating_control_description = x["MAINHEATCONT_DESCRIPTION"]
|
|
||||||
is_cavity = True
|
|
||||||
current_wall_uvalue = 2
|
|
||||||
is_partial = False
|
|
||||||
existing_li_thickness = 0
|
|
||||||
has_wall_insulation_recommendation = True
|
|
||||||
has_roof_insulation_recommendation = True
|
|
||||||
mainheating = h
|
|
||||||
main_fuel = f
|
|
||||||
mainheat_energy_eff = mainheat_energy_eff
|
|
||||||
|
|
||||||
funding.check_funding(
|
|
||||||
measures=measures,
|
|
||||||
starting_sap=33,
|
|
||||||
ending_sap=69,
|
|
||||||
floor_area=71,
|
|
||||||
mainheat_description=x["MAINHEAT_DESCRIPTION"],
|
|
||||||
heating_control_description=x["MAINHEATCONT_DESCRIPTION"],
|
|
||||||
is_cavity=True,
|
|
||||||
current_wall_uvalue=2,
|
|
||||||
is_partial=False,
|
|
||||||
existing_li_thickness=0,
|
|
||||||
has_wall_insulation_recommendation=True,
|
|
||||||
has_roof_insulation_recommendation=True,
|
|
||||||
mainheating=h,
|
|
||||||
main_fuel=f,
|
|
||||||
mainheat_energy_eff=mainheat_energy_eff,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
errors.append(x["LMK_KEY"])
|
|
||||||
|
|
||||||
errored_epcs = epc_df[epc_df["LMK_KEY"].isin(errors)]
|
|
||||||
unique_combs = errored_epcs[["MAINHEAT_ENERGY_EFF", "MAINHEAT_DESCRIPTION", "MAIN_FUEL"]].drop_duplicates()
|
|
||||||
i = 2
|
|
||||||
x = errored_epcs[
|
|
||||||
(errored_epcs["MAINHEAT_ENERGY_EFF"] == unique_combs["MAINHEAT_ENERGY_EFF"].values[i]) &
|
|
||||||
(errored_epcs["MAINHEAT_DESCRIPTION"] == unique_combs["MAINHEAT_DESCRIPTION"].values[i]) &
|
|
||||||
(errored_epcs["MAIN_FUEL"] == unique_combs["MAIN_FUEL"].values[i])
|
|
||||||
].head(1).squeeze()
|
|
||||||
|
|
||||||
most_prominent_combinations = epc_df.groupby(
|
|
||||||
["MAINHEAT_ENERGY_EFF", "MAINHEAT_DESCRIPTION", "MAIN_FUEL"]
|
|
||||||
)["LMK_KEY"].nunique().reset_index().sort_values("LMK_KEY", ascending=False).head(30).to_dict("records")
|
|
||||||
|
|
|
||||||
|
|
@ -3,11 +3,12 @@ import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import msgpack
|
import msgpack
|
||||||
import inspect
|
import inspect
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from etl.epc_clean.EpcClean import EpcClean
|
from etl.epc_clean.EpcClean import EpcClean
|
||||||
from etl.epc.settings import EARLIEST_EPC_DATE
|
from etl.epc.settings import EARLIEST_EPC_DATE
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from utils.s3 import save_data_to_s3
|
from utils.s3 import save_data_to_s3, read_from_s3
|
||||||
|
|
||||||
src_file_path = inspect.getfile(lambda: None)
|
src_file_path = inspect.getfile(lambda: None)
|
||||||
|
|
||||||
|
|
@ -22,7 +23,7 @@ LAND_REGISTRY_PATHS = [
|
||||||
os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2017-part2.csv",
|
os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2017-part2.csv",
|
||||||
]
|
]
|
||||||
|
|
||||||
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
|
EPC_DIRECTORY = Path("/Users/khalimconn-kowlessar/Downloads") / "all-domestic-certificates"
|
||||||
|
|
||||||
ENVIRONMENT = os.getenv("ENVIRONMENT", "dev")
|
ENVIRONMENT = os.getenv("ENVIRONMENT", "dev")
|
||||||
|
|
||||||
|
|
@ -74,6 +75,18 @@ def app():
|
||||||
# data being read in will be extremely small, meaning quicker load times. We'll begin by storing as a single
|
# data being read in will be extremely small, meaning quicker load times. We'll begin by storing as a single
|
||||||
# file and monitor usage patterns to see if it makes sense to split the data up
|
# file and monitor usage patterns to see if it makes sense to split the data up
|
||||||
|
|
||||||
|
# TODO: Copy the existing cleaned to an archive location, in case we wish to roll back easily
|
||||||
|
cleaned_historic = read_from_s3(
|
||||||
|
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||||
|
bucket_name=f"retrofit-data-{ENVIRONMENT}"
|
||||||
|
)
|
||||||
|
cleaned_historic = msgpack.unpackb(cleaned_historic, raw=False)
|
||||||
|
save_data_to_s3(
|
||||||
|
data=msgpack.packb(cleaned_historic, use_bin_type=True),
|
||||||
|
s3_file_name=f"cleaned_epc_data/archive/{str(datetime.now())} - cleaned.bson",
|
||||||
|
bucket_name=f"retrofit-data-{ENVIRONMENT}"
|
||||||
|
)
|
||||||
|
|
||||||
save_data_to_s3(
|
save_data_to_s3(
|
||||||
data=msgpack.packb(cleaned_data, use_bin_type=True),
|
data=msgpack.packb(cleaned_data, use_bin_type=True),
|
||||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,10 @@ class MainHeatAttributes(Definitions):
|
||||||
"dim system ar gael, rhagdybir bod gwresogyddion trydan, trydan": "no system present, electric heaters assumed",
|
"dim system ar gael, rhagdybir bod gwresogyddion trydan, trydan": "no system present, electric heaters assumed",
|
||||||
# Should be handled by edge cases
|
# Should be handled by edge cases
|
||||||
", trydan": ", electric",
|
", trydan": ", electric",
|
||||||
'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas'
|
'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas',
|
||||||
|
"bwyler a rheiddiaduron, nwy prif gyflenwad, gwresogyddion ystafell, trydan": "Boiler and radiators, "
|
||||||
|
"mains gas, Room heaters, "
|
||||||
|
"electric"
|
||||||
}
|
}
|
||||||
|
|
||||||
REMAP = {
|
REMAP = {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
tqdm
|
||||||
|
pandas
|
||||||
|
msgpack
|
||||||
|
textblob
|
||||||
|
boto3
|
||||||
Loading…
Add table
Reference in a new issue