mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
refactoring creation of epc dataset
This commit is contained in:
parent
4a6802a5a2
commit
37cc43adb1
2 changed files with 59 additions and 60 deletions
|
|
@ -202,6 +202,33 @@ class AssetList:
|
|||
This class is used to standardise asset lists so that we can process the core information in a consistent manner.
|
||||
"""
|
||||
|
||||
EPC_API_DATA_NAMES = {
|
||||
"uprn": "epc_os_uprn",
|
||||
"address1": "epc_address1",
|
||||
"address": "epc_address",
|
||||
"postcode": "epc_postcode",
|
||||
"inspection-date": "epc_inspection_date",
|
||||
"current-energy-efficiency": "epc_sap_score_on_register",
|
||||
"current-energy-rating": "epc_rating_on_register",
|
||||
"property-type": "epc_property_type",
|
||||
"built-form": "epc_archetype",
|
||||
"total-floor-area": "epc_total_floor_area",
|
||||
"construction-age-band": "epc_age_band",
|
||||
"floor-height": "epc_floor_height",
|
||||
"number-habitable-rooms": "epc_number_habitable_rooms",
|
||||
"walls-description": "epc_wall_construction",
|
||||
"roof-description": "epc_roof_construction",
|
||||
"floor-description": "epc_floor_construction",
|
||||
"mainheat-description": "epc_heating_type",
|
||||
'mainheatcont-description': "epc_heating_controls",
|
||||
"secondheat-description": "epc_secondary_heating",
|
||||
"transaction-type": "epc_reason",
|
||||
"energy-consumption-current": "epc_heat_demand",
|
||||
}
|
||||
FIND_EPC_DATA_NAMES = {
|
||||
|
||||
}
|
||||
|
||||
DATETIME_REMAP = {
|
||||
"Pre 1900": datetime(year=1899, month=12, day=31),
|
||||
}
|
||||
|
|
@ -590,3 +617,18 @@ class AssetList:
|
|||
|
||||
def create_lookup_mappings(self):
|
||||
pass
|
||||
|
||||
def merge_data(self, df: pd.DataFrame):
|
||||
"""
|
||||
Used to insert data into the standardised asset list, based on the domna property id
|
||||
:return:
|
||||
"""
|
||||
if self.DOMNA_PROPERTY_ID not in df.columns:
|
||||
raise ValueError(f"Dataframe must contain the column {self.DOMNA_PROPERTY_ID}")
|
||||
|
||||
if df[self.DOMNA_PROPERTY_ID].duplicated().sum():
|
||||
raise ValueError(f"{self.DOMNA_PROPERTY_ID} contains duplicated IDs")
|
||||
|
||||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
df, how="left", on=self.DOMNA_PROPERTY_ID
|
||||
)
|
||||
|
|
|
|||
|
|
@ -474,20 +474,22 @@ def app():
|
|||
epc_data.append(csv_data)
|
||||
|
||||
epc_df = pd.concat(epc_data)
|
||||
# TODO: TEMP!!!
|
||||
epc_df = epc_df.rename(columns={"row_id": asset_list.DOMNA_PROPERTY_ID})
|
||||
|
||||
# We expand out the recommendations
|
||||
recommendations_df = epc_df[["row_id", "recommendations"]]
|
||||
recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]]
|
||||
|
||||
unique_recommendations = set()
|
||||
for _, row in recommendations_df.iterrows():
|
||||
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
|
||||
|
||||
columns = ["row_id"] + list(unique_recommendations)
|
||||
columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
|
||||
transformed_data = []
|
||||
for _, row in recommendations_df.iterrows():
|
||||
# Initialize a dictionary for this row with False for all recommendations
|
||||
row_data = {col: False for col in columns}
|
||||
row_data["row_id"] = row["row_id"]
|
||||
row_data[asset_list.DOMNA_PROPERTY_ID] = row[asset_list.DOMNA_PROPERTY_ID]
|
||||
|
||||
# Set True for each recommendation present in this row
|
||||
for rec in row["recommendations"]:
|
||||
|
|
@ -500,10 +502,11 @@ def app():
|
|||
transformed_df = pd.DataFrame(transformed_data)
|
||||
# At the moment, we're only using a limited set of columns - let's jut keep cavity wall insulation
|
||||
# recommendations
|
||||
transformed_df = transformed_df[["row_id", "Cavity wall insulation"]]
|
||||
transformed_df = transformed_df[[asset_list.DOMNA_PROPERTY_ID, "Cavity wall insulation"]]
|
||||
|
||||
# Get the find my epc data
|
||||
find_my_epc_data = epc_df[["row_id", "find_my_epc_data"]].drop(columns=["find_my_epc_data"]).join(
|
||||
find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop(
|
||||
columns=["find_my_epc_data"]).join(
|
||||
pd.json_normalize(epc_df["find_my_epc_data"])
|
||||
)
|
||||
# We check if we get the solar pv column:
|
||||
|
|
@ -513,46 +516,15 @@ def app():
|
|||
# Retrieve just the data we need
|
||||
|
||||
epc_df = epc_df[
|
||||
[
|
||||
"row_id",
|
||||
"uprn",
|
||||
"address1",
|
||||
"address",
|
||||
"postcode",
|
||||
"property-type",
|
||||
"built-form",
|
||||
"inspection-date",
|
||||
"current-energy-rating",
|
||||
"current-energy-efficiency",
|
||||
"roof-description",
|
||||
"walls-description",
|
||||
"floor-description",
|
||||
"transaction-type",
|
||||
"secondheat-description",
|
||||
"total-floor-area",
|
||||
"construction-age-band",
|
||||
"floor-height",
|
||||
"number-habitable-rooms",
|
||||
"mainheat-description",
|
||||
'mainheatcont-description',
|
||||
"energy-consumption-current",
|
||||
"photo-supply",
|
||||
]
|
||||
].rename(
|
||||
columns={"address1": "Address1 on EPC", "address": "Address on EPC", "postcode": "Postcode on EPC"}
|
||||
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
|
||||
].rename(
|
||||
columns=asset_list.EPC_API_DATA_NAMES
|
||||
)
|
||||
|
||||
asset_list.merge_data(epc_df)
|
||||
asset_list.insert_
|
||||
|
||||
asset_list = asset_list.merge(
|
||||
epc_df,
|
||||
how="left",
|
||||
on="row_id"
|
||||
).merge(
|
||||
epc_df = epc_df.merge(
|
||||
find_my_epc_data[
|
||||
[
|
||||
"row_id", "heating_text", "hot_water_text", 'Assessor’s name',
|
||||
asset_list.DOMNA_PROPERTY_ID, "heating_text", "hot_water_text", 'Assessor’s name',
|
||||
"Assessor's Telephone", "Assessor's Email", "Accreditation scheme",
|
||||
"Assessor’s ID", "Solar photovoltaics"
|
||||
]
|
||||
|
|
@ -564,31 +536,16 @@ def app():
|
|||
}
|
||||
),
|
||||
how="left",
|
||||
on="row_id"
|
||||
on=asset_list.DOMNA_PROPERTY_ID
|
||||
)
|
||||
|
||||
asset_list.merge_data(epc_df)
|
||||
|
||||
asset_list["Has Solar PV"] = asset_list["Has Solar PV"] | ~asset_list["photo-supply"].isin(["0.0", 0, None, ""])
|
||||
asset_list = asset_list.drop(columns=["photo-supply"])
|
||||
|
||||
# Rename the columns
|
||||
asset_list = asset_list.rename(columns={
|
||||
"inspection-date": "Date of last EPC",
|
||||
"current-energy-efficiency": "SAP score on register",
|
||||
"current-energy-rating": "EPC rating on register",
|
||||
"property-type": "Property Type",
|
||||
"built-form": "Archetype - EPC",
|
||||
"total-floor-area": "Property Floor Area",
|
||||
"construction-age-band": "Property Age Band",
|
||||
"floor-height": "Property Floor Height",
|
||||
"number-habitable-rooms": "Number of Habitable Rooms",
|
||||
"walls-description": "Wall Construction",
|
||||
"roof-description": "Roof Construction",
|
||||
"floor-description": "Floor Construction",
|
||||
"mainheat-description": "Heating Type",
|
||||
"secondheat-description": "Secondary Heating",
|
||||
"transaction-type": "Reason for last EPC",
|
||||
"energy-consumption-current": "Heat Demand (kWh/m2)",
|
||||
})
|
||||
asset_list = asset_list
|
||||
|
||||
asset_list["Estimated Number of Floors"] = asset_list.apply(
|
||||
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue