diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 6d2ddf6c..d29e3da5 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -213,6 +213,10 @@ class GoogleSolarApi: # 1) Convert Solar Energy AD production from the DC production panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate + # This is just a benchmark figure, based on the national figure. This doesn't not respect the fact that a + # property could be 100% electric + average_electricity_consumption + # Remove anything where the total ac energy is less than half of the array wattage panel_performance = panel_performance[ (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5 diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 80392c88..258449c2 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -284,16 +284,16 @@ async def trigger_plan(body: PlanTriggerRequest): property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn ) - # if not is_new: - # continue - # - # create_property_targets( - # session, - # property_id=property_id, - # portfolio_id=body.portfolio_id, - # epc_target=body.goal_value, - # heat_demand_target=None - # ) + if not is_new: + continue + + create_property_targets( + session, + property_id=property_id, + portfolio_id=body.portfolio_id, + epc_target=body.goal_value, + heat_demand_target=None + ) epc_records = { 'original_epc': epc_searcher.newest_epc.copy(), @@ -356,7 +356,7 @@ async def trigger_plan(body: PlanTriggerRequest): p.get_spatial_data(uprn_filenames) # Call Google Solar API # TODO: Complete me - # solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"]) + solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"]) logger.info("Getting components and epc recommendations") recommendations = {} diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 7395ab6b..e6494bcd 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -1,5 +1,16 @@ import numpy as np +QUARTERLY_ENERGY_PRICES = [ + # 2024 Q1 + {"start": "2024-01-01", "end": "2024-03-31", "electricity": 0.2, "gas": 0.042}, + # 2023 Q4 + {"start": "2023-10-01", "end": "2023-12-31", "electricity": 0.202, "gas": 0.51}, + # 2023 Q3 + {"start": "2023-07-01", "end": "2023-09-30", "electricity": 0.188, "gas": 0.46}, + # 2023 Q2 + {"start": "2023-04-01", "end": "2023-06-30", "electricity": 0.177, "gas": 0.456}, +] + class AnnualBillSavings: """ diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py new file mode 100644 index 00000000..25023894 --- /dev/null +++ b/etl/bill_savings/data_collection.py @@ -0,0 +1,56 @@ +import inspect +import pandas as pd +from tqdm import tqdm +from etl.epc_clean.EpcClean import EpcClean +from etl.epc.settings import EARLIEST_EPC_DATE +from pathlib import Path + +src_file_path = inspect.getfile(lambda: None) + +EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates" + + +def app(): + """ + This application is tasked with pulling a large quantity of data from the find my epc website, containing the + estimated energy consumption for properties + :return: + """ + + cleaned_data = {} + epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()] + + data = [] + for directory in tqdm(epc_directories): + data = pd.read_csv(directory / "certificates.csv", low_memory=False) + # Rename the columns to the same format as the api returns + data.columns = [c.replace("_", "-").lower() for c in data.columns] + # Take just date before the date threshold + data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE] + + data = data[~pd.isnull(data["uprn"])] + data = data[data["mains-gas-flag"] == "N"] + data = data[data["main-fuel"] == "electricity (not community)"] + data[data["current-energy-efficiency"].astype(float) > 80]["uprn"].astype(int) + + # Convert to list of dictioaries as returned by the api + data = data.to_dict("records") + + # Incorporate input data into cleaning + cleaner = EpcClean(data) + + cleaner.clean() + # Extended cleaned_data + for k, data in cleaner.cleaned.items(): + if k not in cleaned_data: + cleaned_data[k] = data + else: + existing_descriptions = [x["original_description"] for x in cleaned_data[k]] + new_data = [x for x in data if x["original_description"] not in existing_descriptions] + cleaned_data[k].extend(new_data) + + # Basic check to make sure all descriptions are unique + for _, cleaned in cleaned_data.items(): + descriptions = [x["original_description"] for x in cleaned] + if len(descriptions) != len(set(descriptions)): + raise ValueError("Duplicated descriptions found, check me") diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py index 59561b3c..1d833b72 100644 --- a/etl/epc_clean/app.py +++ b/etl/epc_clean/app.py @@ -39,11 +39,8 @@ def app(): cleaned_data = {} epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()] - WALLS = [] for directory in tqdm(epc_directories): data = pd.read_csv(directory / "certificates.csv", low_memory=False) - z = data["WALLS_DESCRIPTION"].unique().tolist() - WALLS.extend(z) # Rename the columns to the same format as the api returns data.columns = [c.replace("_", "-").lower() for c in data.columns] # Take just date before the date threshold