adding the solar floor eligibiltiy criteria

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-21 12:39:06 +00:00
parent c0ebffb6cb
commit 4db9d48e36
3 changed files with 105 additions and 11 deletions

View file

@ -306,6 +306,17 @@ class AssetList:
"cavity wall, as built, no insulation",
]
# List of strings that we look for in the EPC data, where substrings indicate that the wall is insulated
EPC_INSULATED_WALLS_SUBSTRINGS = [
", insulated", "with external insulation", "with internal insulation", "filled cavity"
]
# List of strings that we look for in the EPC data, where substrings indicate that the roof is insulated
EPC_INSULATED_ROOF_SUBSTRINGS = [
"(another dwelling above)", ", insulated", ", insulated (assumed) ",
", ceiling insulated",
]
def __init__(
self,
local_filepath,
@ -861,7 +872,10 @@ class AssetList:
processed_age_band, how="left"
)
def identify_worktypes(self):
def identify_worktypes(self, cleaned):
if not self.non_intrusives_present:
raise NotImplementedError("Need to implement the case for non-intrusives")
# If we have non-intrusives completed, we can use this to identify work types
@ -892,6 +906,17 @@ class AssetList:
)
)
self.standardised_asset_list["empty_cavity"] = (
self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] |
self.standardised_asset_list["epc_indicates_empty_cavity"]
)
# We add a reason
self.standardised_asset_list["empty_cavity_reason"] = np.where(
self.standardised_asset_list["non_intrusive_indicates_empty_cavity"],
"Non-Intrusive Data",
"EPC Data"
)
######################################################
# Extraction
######################################################
@ -967,13 +992,55 @@ class AssetList:
)
)
EPC_INSULATED_WALLS_SUBSTRINGS = [
", insulated", "with external insulation", "with internal insulation", "filled cavity"
]
# TODO: We don't have information about the roof from this landlord
self.standardised_asset_list["solar_epc_walls_insulated"] = (
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().str.contains(
"|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS)
)
)
self.standardised_asset_list["landlord_wall_construction"].value_counts()
# We merge on the u-value for average thermal transmittance
roof_uvalue_data = pd.DataFrame(cleaned["roof-description"])
roof_uvalue_data = roof_uvalue_data[
~pd.isnull(roof_uvalue_data["thermal_transmittance"])
][["original_description", "thermal_transmittance"]].rename(
columns={
"original_description": self.EPC_API_DATA_NAMES["roof-description"],
"thermal_transmittance": "roof_u_value"
}
)
EPC_INSULATED_ROOF_SUBSTRINGS = [
"(another dwelling above)", "limited insulation", "(other premises above)",
", no insulation",
]
self.standardised_asset_list = self.standardised_asset_list.merge(
roof_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"]
)
# If the u-value of a roof is less than 0.7 we consider it insulated
self.standardised_asset_list["solar_epc_roof_insulated"] = (
self.standardised_asset_list[self.EPC_API_DATA_NAMES["roof-description"]].str.lower().str.contains(
"|".join(self.EPC_INSULATED_ROOF_SUBSTRINGS), regex=False
) | (
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply(
lambda x: int(x) >= 270 if str(x).isdigit() else False
)
) | (
self.standardised_asset_list["roof_u_value"].apply(
lambda x: x <= 0.7 if not pd.isnull(x) else False
)
)
)
self.standardised_asset_list["solar_epc_loft_needs_topup"] = self.standardised_asset_list[
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply(
lambda x: int(x) < 270 if str(x).isdigit() else False
)
self.standardised_asset_list["solar_epc_floor_is_solid"] = self.standardised_asset_list[
self.EPC_API_DATA_NAMES["floor-description"]
].str.lower().str.contains("solid")
self.standardised_asset_list["solar_epc_floor_is_solid"] = (
self.standardised_asset_list["solar_epc_floor_is_solid"].fillna(False)
)
z = self.standardised_asset_list[
self.standardised_asset_list["solar_epc_floor_is_solid"] == True
]

View file

@ -7,4 +7,5 @@ fuzzywuzzy
boto3
openpyxl
openai
tiktoken
tiktoken
msgpack

View file

@ -391,13 +391,28 @@ def app():
transformed_df = pd.DataFrame(transformed_data)
# At the moment, we're only using a limited set of columns - let's jut keep cavity wall insulation
# recommendations
transformed_df = transformed_df[[asset_list.DOMNA_PROPERTY_ID, "Cavity wall insulation"]]
transformed_df = transformed_df[
[
asset_list.DOMNA_PROPERTY_ID, "Cavity wall insulation", "Floor insulation (solid floor)",
"Floor insulation", "Floor insulation (suspended floor)"
]
]
transformed_df["epc_has_floor_recommendation"] = (
transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
transformed_df["Floor insulation (suspended floor)"]
)
# Get the find my epc data
find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop(
columns=["find_my_epc_data"]).join(
pd.json_normalize(epc_df["find_my_epc_data"])
)
find_my_epc_data = find_my_epc_data.merge(
transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
how="left", on=asset_list.DOMNA_PROPERTY_ID
)
# We check if we get the solar pv column:
if "Solar photovoltaics" not in find_my_epc_data.columns:
find_my_epc_data["Solar photovoltaics"] = False
@ -425,6 +440,17 @@ def app():
asset_list.extract_attributes()
# TODO - Use this!
import msgpack
from utils.s3 import read_from_s3
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
asset_list.identify_worktypes(cleaned)
if HAS_NON_INTRUSIVES:
# Empty cavity:
# 1) Has been flagged on the non-intrusives as being empty or partially filled