add some temp code to add gas prices and electric prices, both current month and the prior month

This commit is contained in:
Michael Duong 2024-07-10 23:20:15 +01:00
parent b63de79043
commit e25a8d2d5e
2 changed files with 111 additions and 2 deletions

View file

@ -1,6 +1,6 @@
import msgpack
import pandas as pd
from datetime import datetime
from datetime import datetime, timedelta
from typing import List
from pathlib import Path
@ -77,6 +77,21 @@ clean_lookup["walls-description"] = new_walls_description_mapping.to_dict(
orient="records"
)
# TODO: Move this to s3 if needed
ENERGY_DIRECTORY = Path(__file__).parent / "local_data" / "energy_data"
electricity_data = pd.read_csv(ENERGY_DIRECTORY / "electricity-prices.csv")
electricity_data.columns = ["lodgement_date", "electricity_price"]
gas_data = pd.read_csv(ENERGY_DIRECTORY / "gas-prices.csv")
gas_data.columns = ["lodgement_date", "gas_price"]
for df in [electricity_data, gas_data]:
df["lodgement_date"] = pd.to_datetime(df["lodgement_date"])
df["lodgement_year"] = df["lodgement_date"].dt.year
df["lodgement_month"] = df["lodgement_date"].dt.month
df.drop(columns=["lodgement_date"], inplace=True)
class EPCPipeline:
"""
@ -243,6 +258,69 @@ class EPCPipeline:
constituency_difference_records = []
constituency_data["lodgement_date"] = pd.to_datetime(
constituency_data["lodgement_date"]
)
constituency_data["previous_date"] = constituency_data[
"lodgement_date"
].dt.to_period("M").dt.to_timestamp() - timedelta(days=1)
constituency_data["lodgement_year"] = constituency_data[
"lodgement_date"
].dt.year
constituency_data["lodgement_month"] = constituency_data[
"lodgement_date"
].dt.month
constituency_data["previous_year"] = constituency_data["previous_date"].dt.year
constituency_data["previous_month"] = constituency_data[
"previous_date"
].dt.month
constituency_data = pd.merge(
constituency_data,
electricity_data[
["electricity_price", "lodgement_year", "lodgement_month"]
],
how="left",
on=["lodgement_year", "lodgement_month"],
)
constituency_data = pd.merge(
constituency_data,
gas_data[["gas_price", "lodgement_year", "lodgement_month"]],
how="left",
on=["lodgement_year", "lodgement_month"],
)
constituency_data = pd.merge(
constituency_data,
electricity_data[
["electricity_price", "lodgement_year", "lodgement_month"]
],
how="left",
left_on=["previous_year", "previous_month"],
right_on=["lodgement_year", "lodgement_month"],
suffixes=("", "_previous"),
)
constituency_data = pd.merge(
constituency_data,
gas_data[["gas_price", "lodgement_year", "lodgement_month"]],
how="left",
left_on=["previous_year", "previous_month"],
right_on=["lodgement_year", "lodgement_month"],
suffixes=("", "_previous"),
)
constituency_data = constituency_data.drop(
columns=[
"lodgement_year",
"lodgement_month",
"previous_year",
"previous_month",
"lodgement_month_previous",
"lodgement_year_previous",
]
)
for uprn, property_data in constituency_data.groupby("uprn", observed=True):
difference_records = self.process_uprn(
uprn=str(uprn), property_data=property_data, directory=directory
@ -280,7 +358,16 @@ class EPCPipeline:
# We include the lodgement date here as we probably need to factor time into the
# model, since EPC standards and rigour have changed over time
variable_data = property_data[VARIABLE_DATA_FEATURES + COST_FEATURES]
variable_data = property_data[
VARIABLE_DATA_FEATURES
+ COST_FEATURES
+ [
"electricity_price",
"gas_price",
"electricity_price_previous",
"gas_price_previous",
]
]
uprn = str(uprn)
epc_records = [

View file

@ -79,6 +79,10 @@ class EPCRecord:
lighting_cost_current: float = None
heating_cost_current: float = None
hot_water_cost_current: float = None
electricity_price: float = None
gas_price: float = None
electricity_price_previous: float = None
gas_price_previous: float = None
# potential_energy_efficiency: float = None
# environment_impact_potential: float = None
# energy_consumption_potential: float = None
@ -255,6 +259,12 @@ class EPCRecord:
self.lighting_cost_current: float = self.prepared_epc["lighting_cost_current"]
self.heating_cost_current: float = self.prepared_epc["heating_cost_current"]
self.hot_water_cost_current: float = self.prepared_epc["hot_water_cost_current"]
self.electricity_price: float = self.prepared_epc["electricity_price"]
self.gas_price: float = self.prepared_epc["gas_price"]
self.electricity_price_previous: float = self.prepared_epc[
"electricity_price_previous"
]
self.gas_price_previous: float = self.prepared_epc["gas_price_previous"]
# self.potential_energy_efficiency: float = float(
# self.prepared_epc["potential_energy_efficiency"]
# )
@ -1056,6 +1066,18 @@ class EPCDifferenceRecord:
"heating_cost_ending": self.record2.get("heating_cost_current"),
"hot_water_cost_starting": self.record1.get("hot_water_cost_current"),
"hot_water_cost_ending": self.record2.get("hot_water_cost_current"),
"electricity_price_starting": self.record1.get("electricity_price"),
"electricity_price_ending": self.record2.get("electricity_price"),
"gas_price_starting": self.record1.get("gas_price"),
"gas_price_ending": self.record2.get("gas_price"),
"electricity_price_previous_starting": self.record1.get(
"electricity_price_previous"
),
"electricity_price_previous_ending": self.record2.get(
"electricity_price_previous"
),
"gas_price_previous_starting": self.record1.get("gas_price_previous"),
"gas_price_previous_ending": self.record2.get("gas_price_previous"),
# "potential_energy_efficiency": self.earliest_record.get(
# "potential_energy_efficiency"
# ),