From a66b3782950366f2213019ed533b8031be16c5fe Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 26 Apr 2024 18:51:59 +0100 Subject: [PATCH] Markdown file with all possible improvements --- etl/epc_recommendations/Pipeline.py | 35 +++++++++-- .../improvement_description.md | 59 +++++++++++++++++++ 2 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 etl/epc_recommendations/improvement_description.md diff --git a/etl/epc_recommendations/Pipeline.py b/etl/epc_recommendations/Pipeline.py index a6de78e5..014d3c56 100644 --- a/etl/epc_recommendations/Pipeline.py +++ b/etl/epc_recommendations/Pipeline.py @@ -59,6 +59,26 @@ class EPCRecommendationsPipeline: # self.improvement_descriptions = improvement_description + def extract_full_improvement_dataset(self): + with mp.Pool() as pool: + results = list( + tqdm( + pool.imap(self._task_extract_full_improvement_dataset, directories), + total=len(directories), + ), + ) + + results_df = pd.concat(results) + + # Only sample one for each improvement as we just want to hit the find my energy website minimally for now + sampled_df = results_df.groupby("IMPROVEMENT_ID").sample(1) + + improvement_description = self._get_descriptions_of_improvements(sampled_df) + + self.improvement_description = improvement_description + + # self.full_improvement_df = sampled_df + def _task_check_number_of_improvement_ids(self, directory: Path): """ Parallel task for checking the number of improvement ids @@ -81,10 +101,6 @@ class EPCRecommendationsPipeline: def _task_extract_improvement_description(self, directory: Path) -> pd.DataFrame: """ Parallel task for checking the number of improvement ids - Flow will be get the certificates, - Find the latest EPC certificate for the UPRN, - Load the recommendations, - Merge on the LMK_KEY, """ recommendations_filepath = directory / "recommendations.csv" @@ -142,7 +158,9 @@ class EPCRecommendationsPipeline: output = certificates_df.merge(recommendations_df, on="LMK_KEY", how="inner") - return output + res = output.groupby("IMPROVEMENT_ID").sample(1) + + return res def _get_descriptions_of_improvements( self, improvement_description_df: pd.DataFrame @@ -232,6 +250,8 @@ class EPCRecommendationsPipeline: improvement_description_mapping[row.IMPROVEMENT_ID] = col_name + return improvement_description_mapping + # headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'} # postcode_input = postcode_input.replace(" ", "+") @@ -318,6 +338,11 @@ if __name__ == "__main__": e.extract_improvement_description() e.improvement_description_df + e.extract_full_improvement_dataset() + pd.DataFrame.from_dict( + e.improvement_description, orient="index", columns=["improvement_description"] + ).to_markdown("improvement_description.md") + full_id = pd.DataFrame(e.number_improvement_ids, columns=["IMPROVEMENT_ID"]) e.improvement_description_df.merge( diff --git a/etl/epc_recommendations/improvement_description.md b/etl/epc_recommendations/improvement_description.md new file mode 100644 index 00000000..abbf1eff --- /dev/null +++ b/etl/epc_recommendations/improvement_description.md @@ -0,0 +1,59 @@ +| | improvement_description | +|---:|:---------------------------------------------------------| +| 1 | Hot water cylinder insulation | +| 2 | Hot water cylinder insulation | +| 3 | Hot water cylinder insulation | +| 4 | Hot water cylinder thermostat | +| 5 | Floor insulation (suspended floor) | +| 6 | Cavity wall insulation | +| 7 | Internal or external wall insulation | +| 8 | Double glazed windows | +| 9 | Secondary glazing | +| 10 | Solar water heating | +| 11 | Heating controls (programmer, room thermostat and TRVs) | +| 12 | Heating controls (room thermostat and TRVs) | +| 13 | Heating controls (thermostatic radiator valves) | +| 14 | Heating controls (room thermostat) | +| 15 | Heating controls (programmer and TRVs) | +| 16 | Heating controls (time and temperature zone control) | +| 17 | Heating controls (programmer and room thermostat) | +| 18 | Heating controls (room thermostat) | +| 19 | Solar water heating | +| 20 | Replace boiler with new condensing boiler | +| 21 | Replace boiler with new condensing boiler | +| 22 | Replace boiler with biomass boiler | +| 23 | Biomass stove with boiler | +| 24 | Fan assisted storage heaters and dual immersion cylinder | +| 25 | Fan assisted storage heaters | +| 26 | Replacement warm air unit | +| 27 | Change heating to gas condensing boiler | +| 28 | Condensing oil boiler with radiators | +| 29 | Gas condensing boiler | +| 30 | Internal or external wall insulation | +| 31 | Fan-assisted storage heaters | +| 32 | Change heating to gas condensing boiler | +| 34 | Solar photovoltaic panels, 2.5 kWp | +| 35 | Low energy lighting | +| 36 | Condensing heating unit | +| 37 | Condensing boiler (separate from the range cooker) | +| 38 | Condensing boiler (separate from the range cooker) | +| 39 | Biomass stove with boiler | +| 40 | Change room heaters to condensing boiler | +| 41 | Translation missing | +| 42 | Mains gas condensing heating unit | +| 43 | Translation missing | +| 44 | Wind turbine | +| 45 | Flat roof or sloping ceiling insulation | +| 46 | Room-in-roof insulation | +| 47 | Floor insulation (solid floor) | +| 48 | High performance external doors | +| 49 | Heat recovery system for mixer showers | +| 50 | Flue gas heat recovery device in conjunction with boiler | +| 56 | Replacement glazing units | +| 57 | Floor insulation (suspended floor) | +| 58 | Floor insulation (solid floor) | +| 59 | High heat retention storage heaters | +| 60 | High heat retention storage heaters | +| 61 | High heat retention storage heaters | +| 62 | High heat retention storage heaters | +| 63 | Party wall insulation | \ No newline at end of file