mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
refactoring creation of epc dataset
This commit is contained in:
parent
4a6802a5a2
commit
37cc43adb1
2 changed files with 59 additions and 60 deletions
|
|
@ -202,6 +202,33 @@ class AssetList:
|
||||||
This class is used to standardise asset lists so that we can process the core information in a consistent manner.
|
This class is used to standardise asset lists so that we can process the core information in a consistent manner.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
EPC_API_DATA_NAMES = {
|
||||||
|
"uprn": "epc_os_uprn",
|
||||||
|
"address1": "epc_address1",
|
||||||
|
"address": "epc_address",
|
||||||
|
"postcode": "epc_postcode",
|
||||||
|
"inspection-date": "epc_inspection_date",
|
||||||
|
"current-energy-efficiency": "epc_sap_score_on_register",
|
||||||
|
"current-energy-rating": "epc_rating_on_register",
|
||||||
|
"property-type": "epc_property_type",
|
||||||
|
"built-form": "epc_archetype",
|
||||||
|
"total-floor-area": "epc_total_floor_area",
|
||||||
|
"construction-age-band": "epc_age_band",
|
||||||
|
"floor-height": "epc_floor_height",
|
||||||
|
"number-habitable-rooms": "epc_number_habitable_rooms",
|
||||||
|
"walls-description": "epc_wall_construction",
|
||||||
|
"roof-description": "epc_roof_construction",
|
||||||
|
"floor-description": "epc_floor_construction",
|
||||||
|
"mainheat-description": "epc_heating_type",
|
||||||
|
'mainheatcont-description': "epc_heating_controls",
|
||||||
|
"secondheat-description": "epc_secondary_heating",
|
||||||
|
"transaction-type": "epc_reason",
|
||||||
|
"energy-consumption-current": "epc_heat_demand",
|
||||||
|
}
|
||||||
|
FIND_EPC_DATA_NAMES = {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
DATETIME_REMAP = {
|
DATETIME_REMAP = {
|
||||||
"Pre 1900": datetime(year=1899, month=12, day=31),
|
"Pre 1900": datetime(year=1899, month=12, day=31),
|
||||||
}
|
}
|
||||||
|
|
@ -590,3 +617,18 @@ class AssetList:
|
||||||
|
|
||||||
def create_lookup_mappings(self):
|
def create_lookup_mappings(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def merge_data(self, df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
Used to insert data into the standardised asset list, based on the domna property id
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if self.DOMNA_PROPERTY_ID not in df.columns:
|
||||||
|
raise ValueError(f"Dataframe must contain the column {self.DOMNA_PROPERTY_ID}")
|
||||||
|
|
||||||
|
if df[self.DOMNA_PROPERTY_ID].duplicated().sum():
|
||||||
|
raise ValueError(f"{self.DOMNA_PROPERTY_ID} contains duplicated IDs")
|
||||||
|
|
||||||
|
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||||
|
df, how="left", on=self.DOMNA_PROPERTY_ID
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -474,20 +474,22 @@ def app():
|
||||||
epc_data.append(csv_data)
|
epc_data.append(csv_data)
|
||||||
|
|
||||||
epc_df = pd.concat(epc_data)
|
epc_df = pd.concat(epc_data)
|
||||||
|
# TODO: TEMP!!!
|
||||||
|
epc_df = epc_df.rename(columns={"row_id": asset_list.DOMNA_PROPERTY_ID})
|
||||||
|
|
||||||
# We expand out the recommendations
|
# We expand out the recommendations
|
||||||
recommendations_df = epc_df[["row_id", "recommendations"]]
|
recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]]
|
||||||
|
|
||||||
unique_recommendations = set()
|
unique_recommendations = set()
|
||||||
for _, row in recommendations_df.iterrows():
|
for _, row in recommendations_df.iterrows():
|
||||||
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
|
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
|
||||||
|
|
||||||
columns = ["row_id"] + list(unique_recommendations)
|
columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
|
||||||
transformed_data = []
|
transformed_data = []
|
||||||
for _, row in recommendations_df.iterrows():
|
for _, row in recommendations_df.iterrows():
|
||||||
# Initialize a dictionary for this row with False for all recommendations
|
# Initialize a dictionary for this row with False for all recommendations
|
||||||
row_data = {col: False for col in columns}
|
row_data = {col: False for col in columns}
|
||||||
row_data["row_id"] = row["row_id"]
|
row_data[asset_list.DOMNA_PROPERTY_ID] = row[asset_list.DOMNA_PROPERTY_ID]
|
||||||
|
|
||||||
# Set True for each recommendation present in this row
|
# Set True for each recommendation present in this row
|
||||||
for rec in row["recommendations"]:
|
for rec in row["recommendations"]:
|
||||||
|
|
@ -500,10 +502,11 @@ def app():
|
||||||
transformed_df = pd.DataFrame(transformed_data)
|
transformed_df = pd.DataFrame(transformed_data)
|
||||||
# At the moment, we're only using a limited set of columns - let's jut keep cavity wall insulation
|
# At the moment, we're only using a limited set of columns - let's jut keep cavity wall insulation
|
||||||
# recommendations
|
# recommendations
|
||||||
transformed_df = transformed_df[["row_id", "Cavity wall insulation"]]
|
transformed_df = transformed_df[[asset_list.DOMNA_PROPERTY_ID, "Cavity wall insulation"]]
|
||||||
|
|
||||||
# Get the find my epc data
|
# Get the find my epc data
|
||||||
find_my_epc_data = epc_df[["row_id", "find_my_epc_data"]].drop(columns=["find_my_epc_data"]).join(
|
find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop(
|
||||||
|
columns=["find_my_epc_data"]).join(
|
||||||
pd.json_normalize(epc_df["find_my_epc_data"])
|
pd.json_normalize(epc_df["find_my_epc_data"])
|
||||||
)
|
)
|
||||||
# We check if we get the solar pv column:
|
# We check if we get the solar pv column:
|
||||||
|
|
@ -513,46 +516,15 @@ def app():
|
||||||
# Retrieve just the data we need
|
# Retrieve just the data we need
|
||||||
|
|
||||||
epc_df = epc_df[
|
epc_df = epc_df[
|
||||||
[
|
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
|
||||||
"row_id",
|
].rename(
|
||||||
"uprn",
|
columns=asset_list.EPC_API_DATA_NAMES
|
||||||
"address1",
|
|
||||||
"address",
|
|
||||||
"postcode",
|
|
||||||
"property-type",
|
|
||||||
"built-form",
|
|
||||||
"inspection-date",
|
|
||||||
"current-energy-rating",
|
|
||||||
"current-energy-efficiency",
|
|
||||||
"roof-description",
|
|
||||||
"walls-description",
|
|
||||||
"floor-description",
|
|
||||||
"transaction-type",
|
|
||||||
"secondheat-description",
|
|
||||||
"total-floor-area",
|
|
||||||
"construction-age-band",
|
|
||||||
"floor-height",
|
|
||||||
"number-habitable-rooms",
|
|
||||||
"mainheat-description",
|
|
||||||
'mainheatcont-description',
|
|
||||||
"energy-consumption-current",
|
|
||||||
"photo-supply",
|
|
||||||
]
|
|
||||||
].rename(
|
|
||||||
columns={"address1": "Address1 on EPC", "address": "Address on EPC", "postcode": "Postcode on EPC"}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
asset_list.merge_data(epc_df)
|
epc_df = epc_df.merge(
|
||||||
asset_list.insert_
|
|
||||||
|
|
||||||
asset_list = asset_list.merge(
|
|
||||||
epc_df,
|
|
||||||
how="left",
|
|
||||||
on="row_id"
|
|
||||||
).merge(
|
|
||||||
find_my_epc_data[
|
find_my_epc_data[
|
||||||
[
|
[
|
||||||
"row_id", "heating_text", "hot_water_text", 'Assessor’s name',
|
asset_list.DOMNA_PROPERTY_ID, "heating_text", "hot_water_text", 'Assessor’s name',
|
||||||
"Assessor's Telephone", "Assessor's Email", "Accreditation scheme",
|
"Assessor's Telephone", "Assessor's Email", "Accreditation scheme",
|
||||||
"Assessor’s ID", "Solar photovoltaics"
|
"Assessor’s ID", "Solar photovoltaics"
|
||||||
]
|
]
|
||||||
|
|
@ -564,31 +536,16 @@ def app():
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
how="left",
|
how="left",
|
||||||
on="row_id"
|
on=asset_list.DOMNA_PROPERTY_ID
|
||||||
)
|
)
|
||||||
|
|
||||||
|
asset_list.merge_data(epc_df)
|
||||||
|
|
||||||
asset_list["Has Solar PV"] = asset_list["Has Solar PV"] | ~asset_list["photo-supply"].isin(["0.0", 0, None, ""])
|
asset_list["Has Solar PV"] = asset_list["Has Solar PV"] | ~asset_list["photo-supply"].isin(["0.0", 0, None, ""])
|
||||||
asset_list = asset_list.drop(columns=["photo-supply"])
|
asset_list = asset_list.drop(columns=["photo-supply"])
|
||||||
|
|
||||||
# Rename the columns
|
# Rename the columns
|
||||||
asset_list = asset_list.rename(columns={
|
asset_list = asset_list
|
||||||
"inspection-date": "Date of last EPC",
|
|
||||||
"current-energy-efficiency": "SAP score on register",
|
|
||||||
"current-energy-rating": "EPC rating on register",
|
|
||||||
"property-type": "Property Type",
|
|
||||||
"built-form": "Archetype - EPC",
|
|
||||||
"total-floor-area": "Property Floor Area",
|
|
||||||
"construction-age-band": "Property Age Band",
|
|
||||||
"floor-height": "Property Floor Height",
|
|
||||||
"number-habitable-rooms": "Number of Habitable Rooms",
|
|
||||||
"walls-description": "Wall Construction",
|
|
||||||
"roof-description": "Roof Construction",
|
|
||||||
"floor-description": "Floor Construction",
|
|
||||||
"mainheat-description": "Heating Type",
|
|
||||||
"secondheat-description": "Secondary Heating",
|
|
||||||
"transaction-type": "Reason for last EPC",
|
|
||||||
"energy-consumption-current": "Heat Demand (kWh/m2)",
|
|
||||||
})
|
|
||||||
|
|
||||||
asset_list["Estimated Number of Floors"] = asset_list.apply(
|
asset_list["Estimated Number of Floors"] = asset_list.apply(
|
||||||
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
|
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue