mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
add some temp code to add gas prices and electric prices, both current month and the prior month
This commit is contained in:
parent
b63de79043
commit
e25a8d2d5e
2 changed files with 111 additions and 2 deletions
|
|
@ -1,6 +1,6 @@
|
|||
import msgpack
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
|
@ -77,6 +77,21 @@ clean_lookup["walls-description"] = new_walls_description_mapping.to_dict(
|
|||
orient="records"
|
||||
)
|
||||
|
||||
# TODO: Move this to s3 if needed
|
||||
ENERGY_DIRECTORY = Path(__file__).parent / "local_data" / "energy_data"
|
||||
|
||||
electricity_data = pd.read_csv(ENERGY_DIRECTORY / "electricity-prices.csv")
|
||||
electricity_data.columns = ["lodgement_date", "electricity_price"]
|
||||
|
||||
gas_data = pd.read_csv(ENERGY_DIRECTORY / "gas-prices.csv")
|
||||
gas_data.columns = ["lodgement_date", "gas_price"]
|
||||
|
||||
for df in [electricity_data, gas_data]:
|
||||
df["lodgement_date"] = pd.to_datetime(df["lodgement_date"])
|
||||
df["lodgement_year"] = df["lodgement_date"].dt.year
|
||||
df["lodgement_month"] = df["lodgement_date"].dt.month
|
||||
df.drop(columns=["lodgement_date"], inplace=True)
|
||||
|
||||
|
||||
class EPCPipeline:
|
||||
"""
|
||||
|
|
@ -243,6 +258,69 @@ class EPCPipeline:
|
|||
|
||||
constituency_difference_records = []
|
||||
|
||||
constituency_data["lodgement_date"] = pd.to_datetime(
|
||||
constituency_data["lodgement_date"]
|
||||
)
|
||||
constituency_data["previous_date"] = constituency_data[
|
||||
"lodgement_date"
|
||||
].dt.to_period("M").dt.to_timestamp() - timedelta(days=1)
|
||||
constituency_data["lodgement_year"] = constituency_data[
|
||||
"lodgement_date"
|
||||
].dt.year
|
||||
constituency_data["lodgement_month"] = constituency_data[
|
||||
"lodgement_date"
|
||||
].dt.month
|
||||
constituency_data["previous_year"] = constituency_data["previous_date"].dt.year
|
||||
constituency_data["previous_month"] = constituency_data[
|
||||
"previous_date"
|
||||
].dt.month
|
||||
|
||||
constituency_data = pd.merge(
|
||||
constituency_data,
|
||||
electricity_data[
|
||||
["electricity_price", "lodgement_year", "lodgement_month"]
|
||||
],
|
||||
how="left",
|
||||
on=["lodgement_year", "lodgement_month"],
|
||||
)
|
||||
constituency_data = pd.merge(
|
||||
constituency_data,
|
||||
gas_data[["gas_price", "lodgement_year", "lodgement_month"]],
|
||||
how="left",
|
||||
on=["lodgement_year", "lodgement_month"],
|
||||
)
|
||||
|
||||
constituency_data = pd.merge(
|
||||
constituency_data,
|
||||
electricity_data[
|
||||
["electricity_price", "lodgement_year", "lodgement_month"]
|
||||
],
|
||||
how="left",
|
||||
left_on=["previous_year", "previous_month"],
|
||||
right_on=["lodgement_year", "lodgement_month"],
|
||||
suffixes=("", "_previous"),
|
||||
)
|
||||
|
||||
constituency_data = pd.merge(
|
||||
constituency_data,
|
||||
gas_data[["gas_price", "lodgement_year", "lodgement_month"]],
|
||||
how="left",
|
||||
left_on=["previous_year", "previous_month"],
|
||||
right_on=["lodgement_year", "lodgement_month"],
|
||||
suffixes=("", "_previous"),
|
||||
)
|
||||
|
||||
constituency_data = constituency_data.drop(
|
||||
columns=[
|
||||
"lodgement_year",
|
||||
"lodgement_month",
|
||||
"previous_year",
|
||||
"previous_month",
|
||||
"lodgement_month_previous",
|
||||
"lodgement_year_previous",
|
||||
]
|
||||
)
|
||||
|
||||
for uprn, property_data in constituency_data.groupby("uprn", observed=True):
|
||||
difference_records = self.process_uprn(
|
||||
uprn=str(uprn), property_data=property_data, directory=directory
|
||||
|
|
@ -280,7 +358,16 @@ class EPCPipeline:
|
|||
|
||||
# We include the lodgement date here as we probably need to factor time into the
|
||||
# model, since EPC standards and rigour have changed over time
|
||||
variable_data = property_data[VARIABLE_DATA_FEATURES + COST_FEATURES]
|
||||
variable_data = property_data[
|
||||
VARIABLE_DATA_FEATURES
|
||||
+ COST_FEATURES
|
||||
+ [
|
||||
"electricity_price",
|
||||
"gas_price",
|
||||
"electricity_price_previous",
|
||||
"gas_price_previous",
|
||||
]
|
||||
]
|
||||
|
||||
uprn = str(uprn)
|
||||
epc_records = [
|
||||
|
|
|
|||
|
|
@ -79,6 +79,10 @@ class EPCRecord:
|
|||
lighting_cost_current: float = None
|
||||
heating_cost_current: float = None
|
||||
hot_water_cost_current: float = None
|
||||
electricity_price: float = None
|
||||
gas_price: float = None
|
||||
electricity_price_previous: float = None
|
||||
gas_price_previous: float = None
|
||||
# potential_energy_efficiency: float = None
|
||||
# environment_impact_potential: float = None
|
||||
# energy_consumption_potential: float = None
|
||||
|
|
@ -255,6 +259,12 @@ class EPCRecord:
|
|||
self.lighting_cost_current: float = self.prepared_epc["lighting_cost_current"]
|
||||
self.heating_cost_current: float = self.prepared_epc["heating_cost_current"]
|
||||
self.hot_water_cost_current: float = self.prepared_epc["hot_water_cost_current"]
|
||||
self.electricity_price: float = self.prepared_epc["electricity_price"]
|
||||
self.gas_price: float = self.prepared_epc["gas_price"]
|
||||
self.electricity_price_previous: float = self.prepared_epc[
|
||||
"electricity_price_previous"
|
||||
]
|
||||
self.gas_price_previous: float = self.prepared_epc["gas_price_previous"]
|
||||
# self.potential_energy_efficiency: float = float(
|
||||
# self.prepared_epc["potential_energy_efficiency"]
|
||||
# )
|
||||
|
|
@ -1056,6 +1066,18 @@ class EPCDifferenceRecord:
|
|||
"heating_cost_ending": self.record2.get("heating_cost_current"),
|
||||
"hot_water_cost_starting": self.record1.get("hot_water_cost_current"),
|
||||
"hot_water_cost_ending": self.record2.get("hot_water_cost_current"),
|
||||
"electricity_price_starting": self.record1.get("electricity_price"),
|
||||
"electricity_price_ending": self.record2.get("electricity_price"),
|
||||
"gas_price_starting": self.record1.get("gas_price"),
|
||||
"gas_price_ending": self.record2.get("gas_price"),
|
||||
"electricity_price_previous_starting": self.record1.get(
|
||||
"electricity_price_previous"
|
||||
),
|
||||
"electricity_price_previous_ending": self.record2.get(
|
||||
"electricity_price_previous"
|
||||
),
|
||||
"gas_price_previous_starting": self.record1.get("gas_price_previous"),
|
||||
"gas_price_previous_ending": self.record2.get("gas_price_previous"),
|
||||
# "potential_energy_efficiency": self.earliest_record.get(
|
||||
# "potential_energy_efficiency"
|
||||
# ),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue