removed rubbish code from epc clean

This commit is contained in:
Khalim Conn-Kowlessar 2024-07-02 10:31:52 +01:00
parent f830c37c8a
commit eac2046765
5 changed files with 82 additions and 14 deletions

View file

@ -213,6 +213,10 @@ class GoogleSolarApi:
# 1) Convert Solar Energy AD production from the DC production
panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate
# This is just a benchmark figure, based on the national figure. This doesn't not respect the fact that a
# property could be 100% electric
average_electricity_consumption
# Remove anything where the total ac energy is less than half of the array wattage
panel_performance = panel_performance[
(panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5

View file

@ -284,16 +284,16 @@ async def trigger_plan(body: PlanTriggerRequest):
property_id, is_new = create_property(
session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
)
# if not is_new:
# continue
#
# create_property_targets(
# session,
# property_id=property_id,
# portfolio_id=body.portfolio_id,
# epc_target=body.goal_value,
# heat_demand_target=None
# )
if not is_new:
continue
create_property_targets(
session,
property_id=property_id,
portfolio_id=body.portfolio_id,
epc_target=body.goal_value,
heat_demand_target=None
)
epc_records = {
'original_epc': epc_searcher.newest_epc.copy(),
@ -356,7 +356,7 @@ async def trigger_plan(body: PlanTriggerRequest):
p.get_spatial_data(uprn_filenames)
# Call Google Solar API
# TODO: Complete me
# solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])
logger.info("Getting components and epc recommendations")
recommendations = {}

View file

@ -1,5 +1,16 @@
import numpy as np
QUARTERLY_ENERGY_PRICES = [
# 2024 Q1
{"start": "2024-01-01", "end": "2024-03-31", "electricity": 0.2, "gas": 0.042},
# 2023 Q4
{"start": "2023-10-01", "end": "2023-12-31", "electricity": 0.202, "gas": 0.51},
# 2023 Q3
{"start": "2023-07-01", "end": "2023-09-30", "electricity": 0.188, "gas": 0.46},
# 2023 Q2
{"start": "2023-04-01", "end": "2023-06-30", "electricity": 0.177, "gas": 0.456},
]
class AnnualBillSavings:
"""

View file

@ -0,0 +1,56 @@
import inspect
import pandas as pd
from tqdm import tqdm
from etl.epc_clean.EpcClean import EpcClean
from etl.epc.settings import EARLIEST_EPC_DATE
from pathlib import Path
src_file_path = inspect.getfile(lambda: None)
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
def app():
"""
This application is tasked with pulling a large quantity of data from the find my epc website, containing the
estimated energy consumption for properties
:return:
"""
cleaned_data = {}
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
data = []
for directory in tqdm(epc_directories):
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["uprn"])]
data = data[data["mains-gas-flag"] == "N"]
data = data[data["main-fuel"] == "electricity (not community)"]
data[data["current-energy-efficiency"].astype(float) > 80]["uprn"].astype(int)
# Convert to list of dictioaries as returned by the api
data = data.to_dict("records")
# Incorporate input data into cleaning
cleaner = EpcClean(data)
cleaner.clean()
# Extended cleaned_data
for k, data in cleaner.cleaned.items():
if k not in cleaned_data:
cleaned_data[k] = data
else:
existing_descriptions = [x["original_description"] for x in cleaned_data[k]]
new_data = [x for x in data if x["original_description"] not in existing_descriptions]
cleaned_data[k].extend(new_data)
# Basic check to make sure all descriptions are unique
for _, cleaned in cleaned_data.items():
descriptions = [x["original_description"] for x in cleaned]
if len(descriptions) != len(set(descriptions)):
raise ValueError("Duplicated descriptions found, check me")

View file

@ -39,11 +39,8 @@ def app():
cleaned_data = {}
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
WALLS = []
for directory in tqdm(epc_directories):
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
z = data["WALLS_DESCRIPTION"].unique().tolist()
WALLS.extend(z)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold