removed rubbish code from epc clean

2026-06-08 11:17:27 +00:00 · 2024-07-02 10:31:52 +01:00 · 2024-07-02 10:31:52 +01:00 · eac2046765
commit eac2046765
parent f830c37c8a
5 changed files with 82 additions and 14 deletions
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@ -213,6 +213,10 @@ class GoogleSolarApi:
        # 1) Convert Solar Energy AD production from the DC production
        panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate

+        # This is just a benchmark figure, based on the national figure. This doesn't not respect the fact that a
+        # property could be 100% electric
+        average_electricity_consumption
+
        # Remove anything where the total ac energy is less than half of the array wattage
        panel_performance = panel_performance[
            (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -284,16 +284,16 @@ async def trigger_plan(body: PlanTriggerRequest):
            property_id, is_new = create_property(
                session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
            )
-            # if not is_new:
-            #     continue
-            #
-            # create_property_targets(
-            #     session,
-            #     property_id=property_id,
-            #     portfolio_id=body.portfolio_id,
-            #     epc_target=body.goal_value,
-            #     heat_demand_target=None
-            # )
+            if not is_new:
+                continue
+
+            create_property_targets(
+                session,
+                property_id=property_id,
+                portfolio_id=body.portfolio_id,
+                epc_target=body.goal_value,
+                heat_demand_target=None
+            )

            epc_records = {
                'original_epc': epc_searcher.newest_epc.copy(),
@ -356,7 +356,7 @@ async def trigger_plan(body: PlanTriggerRequest):
            p.get_spatial_data(uprn_filenames)
            # Call Google Solar API
            # TODO: Complete me
-            # solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
+            solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])

        logger.info("Getting components and epc recommendations")
        recommendations = {}
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@ -1,5 +1,16 @@
 import numpy as np

+QUARTERLY_ENERGY_PRICES = [
+    # 2024 Q1
+    {"start": "2024-01-01", "end": "2024-03-31", "electricity": 0.2, "gas": 0.042},
+    # 2023 Q4
+    {"start": "2023-10-01", "end": "2023-12-31", "electricity": 0.202, "gas": 0.51},
+    # 2023 Q3
+    {"start": "2023-07-01", "end": "2023-09-30", "electricity": 0.188, "gas": 0.46},
+    # 2023 Q2
+    {"start": "2023-04-01", "end": "2023-06-30", "electricity": 0.177, "gas": 0.456},
+]
+

 class AnnualBillSavings:
    """
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@ -0,0 +1,56 @@
+import inspect
+import pandas as pd
+from tqdm import tqdm
+from etl.epc_clean.EpcClean import EpcClean
+from etl.epc.settings import EARLIEST_EPC_DATE
+from pathlib import Path
+
+src_file_path = inspect.getfile(lambda: None)
+
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+    """
+    This application is tasked with pulling a large quantity of data from the find my epc website, containing the
+    estimated energy consumption for properties
+    :return:
+    """
+
+    cleaned_data = {}
+    epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+
+    data = []
+    for directory in tqdm(epc_directories):
+        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+        # Rename the columns to the same format as the api returns
+        data.columns = [c.replace("_", "-").lower() for c in data.columns]
+        # Take just date before the date threshold
+        data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+        data = data[~pd.isnull(data["uprn"])]
+        data = data[data["mains-gas-flag"] == "N"]
+        data = data[data["main-fuel"] == "electricity (not community)"]
+        data[data["current-energy-efficiency"].astype(float) > 80]["uprn"].astype(int)
+
+        # Convert to list of dictioaries as returned by the api
+        data = data.to_dict("records")
+
+        # Incorporate input data into cleaning
+        cleaner = EpcClean(data)
+
+        cleaner.clean()
+        # Extended cleaned_data
+        for k, data in cleaner.cleaned.items():
+            if k not in cleaned_data:
+                cleaned_data[k] = data
+            else:
+                existing_descriptions = [x["original_description"] for x in cleaned_data[k]]
+                new_data = [x for x in data if x["original_description"] not in existing_descriptions]
+                cleaned_data[k].extend(new_data)
+
+    # Basic check to make sure all descriptions are unique
+    for _, cleaned in cleaned_data.items():
+        descriptions = [x["original_description"] for x in cleaned]
+        if len(descriptions) != len(set(descriptions)):
+            raise ValueError("Duplicated descriptions found, check me")
--- a/etl/epc_clean/app.py
+++ b/etl/epc_clean/app.py
@ -39,11 +39,8 @@ def app():
    cleaned_data = {}
    epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]

-    WALLS = []
    for directory in tqdm(epc_directories):
        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
-        z = data["WALLS_DESCRIPTION"].unique().tolist()
-        WALLS.extend(z)
        # Rename the columns to the same format as the api returns
        data.columns = [c.replace("_", "-").lower() for c in data.columns]
        # Take just date before the date threshold