debugging flattening of find my epc data

This commit is contained in:
Khalim Conn-Kowlessar 2025-03-20 15:15:36 +00:00
parent f322e55b19
commit 746c42594c
4 changed files with 278 additions and 53 deletions

View file

@ -1024,6 +1024,28 @@ class AssetList:
def identify_worktypes(self, cleaned):
# Before we being, we identify if a property has solar already as we use this
# for identifying cavity jobs
if self.non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF"
)
elif self.old_format_non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin(
["solar pv on roof"]
)
)
else:
# We don't have an indication
existing_solar_non_intrusives_check = False
self.standardised_asset_list["property_has_solar"] = (
(self.standardised_asset_list[self.STANDARD_EXISTING_PV] == "already has PV") |
existing_solar_non_intrusives_check |
(self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR])
)
# If we have non-intrusives completed, we can use this to identify work types
######################################################
# Empty cavity:
@ -1047,22 +1069,76 @@ class AssetList:
# We set the filter to False, as we have no non-intrusives
non_intrusives_wall_filter = False
self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = (
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
non_intrusives_wall_filter &
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) &
(
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
] <= self.EMPTY_CAVITY_SAP_THRESHOLD
if self.landlord_year_built is None:
# The landlord won't always give us year built
self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = (
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
non_intrusives_wall_filter &
(
self.standardised_asset_list["epc_year_upper_bound"] <= 2002
) &
(
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
] <= self.EMPTY_CAVITY_SAP_THRESHOLD
)
)
)
# Let's also flag work that looks eligible without the SAP filter
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = (
# Let's also flag work that looks eligible without the SAP filter
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = (
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
non_intrusives_wall_filter &
(self.standardised_asset_list["epc_year_upper_bound"] <= 2002)
)
else:
self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = (
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
non_intrusives_wall_filter &
(
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) |
(self.standardised_asset_list["epc_year_upper_bound"] <= 2002)
) &
(
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
] <= self.EMPTY_CAVITY_SAP_THRESHOLD
) & (
# If the property has solar, there's a chance it won't qualify
~self.standardised_asset_list["property_has_solar"]
)
)
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] = (
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
non_intrusives_wall_filter &
(
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) |
(self.standardised_asset_list["epc_year_upper_bound"] <= 2002)
) &
(
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
] <= self.EMPTY_CAVITY_SAP_THRESHOLD
) & (
# If the property has solar, there's a chance it won't qualify
self.standardised_asset_list["property_has_solar"]
)
)
# Let's also flag work that looks eligible without the SAP filter
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = (
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
non_intrusives_wall_filter &
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) &
# If the property has solar, there's a chance it won't qualify
~self.standardised_asset_list["property_has_solar"]
)
# We also add a filter on anything that was generally identified by the none-intrusives
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] = (
(~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
non_intrusives_wall_filter &
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002)
non_intrusives_wall_filter
)
# If non_intrusive_indicates_empty_cavity is True,
@ -1073,7 +1149,15 @@ class AssetList:
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"]
)
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] = np.where(
self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] |
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"],
False,
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"]
)
self.standardised_asset_list["epc_indicates_empty_cavity"] = (
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin(
self.EPC_NO_WALL_INSULATION_DESCRIPTIONS
) & (
@ -1083,6 +1167,8 @@ class AssetList:
) & (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= self.EMPTY_CAVITY_SAP_THRESHOLD
) & (
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])
)
)
@ -1096,26 +1182,38 @@ class AssetList:
) & (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]] > self.EMPTY_CAVITY_SAP_THRESHOLD
) & (
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])
)
)
self.standardised_asset_list["landlord_data_indicates_empty_cavity"] = (
self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) &
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) &
(
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) |
(self.standardised_asset_list["epc_year_upper_bound"] <= 1995)
) &
(
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
] <= self.EMPTY_CAVITY_SAP_THRESHOLD
) & (
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])
)
)
self.standardised_asset_list["landlord_data_indicates_empty_cavity_no_sap_filter"] = (
self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) &
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) &
(
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) |
(self.standardised_asset_list["epc_year_upper_bound"] <= 1995)
) &
(
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
] > self.EMPTY_CAVITY_SAP_THRESHOLD
) & (
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])
)
)
@ -1243,27 +1341,6 @@ class AssetList:
).sum():
raise ValueError("Both heating system checks are true - this should not be possible")
# Check 2: Does the property have solar already
if self.non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF"
)
elif self.old_format_non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin(
["solar pv on roof"]
)
)
else:
# We don't have an indication
existing_solar_non_intrusives_check = False
self.standardised_asset_list["property_has_solar"] = (
(self.standardised_asset_list[self.STANDARD_EXISTING_PV] == "already has PV") |
existing_solar_non_intrusives_check |
(self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR])
)
# Check 3: Does the property meet the fabric condition
# Solar PV installs are subject to the minimum insulation requirements which means:
# 1) one of the following insulation measures must be installed as part of the same
@ -1627,6 +1704,26 @@ class AssetList:
# SAP below threshold
self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW]
)
self.standardised_asset_list["test"] = (
not_a_flat &
# Landlord data or EPC data indicates the heating system is appropriate
correct_heating_system &
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_roof_insulated"] &
# SAP below threshold
self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW]
)
self.standardised_asset_list["test"] = np.where(
self.standardised_asset_list["solar_eligible_other_floor"],
False,
self.standardised_asset_list["test"]
)
self.standardised_asset_list["solar_eligible_other_floor_sap_above_threshold"] = (
not_a_flat &
# Landlord data or EPC data indicates the heating system is appropriate
@ -1773,14 +1870,30 @@ class AssetList:
self.standardised_asset_list["cavity_reason"]
)
self.standardised_asset_list["cavity_reason"] = np.where(
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"],
"Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed",
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] &
pd.isnull(self.standardised_asset_list["cavity_reason"]),
"Non-Intrusive Data Showed Empty Cavity - property already has solar",
self.standardised_asset_list["cavity_reason"]
)
self.standardised_asset_list["cavity_reason"] = np.where(
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] &
pd.isnull(self.standardised_asset_list["cavity_reason"]),
"Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed",
self.standardised_asset_list["cavity_reason"]
)
self.standardised_asset_list["cavity_reason"] = np.where(
self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] &
pd.isnull(self.standardised_asset_list["cavity_reason"]),
"Non-Intrusive Data Showed Empty Cavity but all SAP scores and year built allowed",
self.standardised_asset_list["cavity_reason"]
)
self.standardised_asset_list["cavity_reason"] = np.where(
(
self.standardised_asset_list["epc_indicates_empty_cavity"] &
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
"EPC Data Showed Empty Cavity",
self.standardised_asset_list["cavity_reason"]
@ -1788,7 +1901,8 @@ class AssetList:
self.standardised_asset_list["cavity_reason"] = np.where(
(
self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"] &
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"]
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
"EPC Data Showed Empty Cavity but all SAP scores allowed",
self.standardised_asset_list["cavity_reason"]
@ -1798,7 +1912,8 @@ class AssetList:
(
self.standardised_asset_list["landlord_data_indicates_empty_cavity"] &
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
~self.standardised_asset_list["epc_indicates_empty_cavity"]
~self.standardised_asset_list["epc_indicates_empty_cavity"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
"Landlord Data Showed Empty Cavity",
self.standardised_asset_list["cavity_reason"]
@ -1807,7 +1922,8 @@ class AssetList:
(
self.standardised_asset_list["landlord_data_indicates_empty_cavity_no_sap_filter"] &
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] &
~self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"]
~self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"] &
pd.isnull(self.standardised_asset_list["cavity_reason"])
),
"Landlord Data Showed Empty Cavity but all SAP scores allowed",
self.standardised_asset_list["cavity_reason"],

View file

@ -100,8 +100,8 @@ def app():
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype"
landlord_built_form = "Archetype"
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
@ -418,6 +418,8 @@ def app():
epc_df = pd.concat(epc_data)
epc_df["estimated"] = epc_df["estimated"].fillna(False)
z = epc_df[epc_df["domna_property_id"] == eg["domna_property_id"].values[0]]
# We expand out the recommendations
recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]]
@ -457,10 +459,24 @@ def app():
if "find_my_epc_data" not in epc_df.columns:
epc_df["find_my_epc_data"] = None
find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop(
columns=["find_my_epc_data"]).join(
pd.json_normalize(epc_df["find_my_epc_data"])
)
find_my_epc_data = []
for _, x in epc_df.iterrows():
if x["find_my_epc_data"]:
find_my_epc_data.append(
{
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
**x["find_my_epc_data"]
}
)
else:
find_my_epc_data.append(
{
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
}
)
find_my_epc_data = pd.DataFrame(find_my_epc_data)
find_my_epc_data = find_my_epc_data.merge(
transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
how="left", on=asset_list.DOMNA_PROPERTY_ID
@ -511,6 +527,91 @@ def app():
asset_list.flat_analysis()
################################################################
# WESTWARD - comparison between Kieran's method & automated
################################################################
# Check 1)
cavity_fills = pd.read_excel(
os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
sheet_name="Straight Fill"
)
cavity_fills = cavity_fills.merge(
asset_list.standardised_asset_list[
[asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"]
],
how="left",
left_on=asset_list.landlord_property_id,
right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
)
cavity_fills["cavity_reason"] = cavity_fills["cavity_reason"].fillna("Not identified")
cavity_fills["cavity_reason"].value_counts()
# Didn't identify 3 properties because they're bedsits
# 4 properties were identified, not based on the non-intrusives but instead because
# Westward said they were built in 2003/2007. Have adjusted this to use the age from the
# epc as well, as EPC says 1975 and they look like 1975 properties
# 58 properties flagged as already having solar:
#
z = cavity_fills[
cavity_fills["cavity_reason"] == "Non-Intrusive Data Showed Empty Cavity - property already has solar"
]
df = asset_list.standardised_asset_list[
asset_list.standardised_asset_list[asset_list.STANDARD_LANDLORD_PROPERTY_ID].isin(
z[asset_list.landlord_property_id].values)
]
eg = df[df[asset_list.STANDARD_LANDLORD_PROPERTY_ID] == "TOTNEWINA0102300"]
z[["Address", "WFT EDIT Postcode", asset_list.landlord_property_id]]
z[[asset_list.STANDARD_FULL_ADDRESS, asset_list.STANDARD_POSTCODE, asset_list.ATTRIBUTE_HAS_SOLAR]]
# Check 2)
cavity_fills_with_solar = pd.read_excel(
os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
sheet_name="Solar PV - Straight Fill"
)
cavity_fills_with_solar = cavity_fills_with_solar.merge(
asset_list.standardised_asset_list[
[asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"]
],
how="left",
left_on=asset_list.landlord_property_id,
right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
)
cavity_fills_with_solar["cavity_reason"] = cavity_fills_with_solar["cavity_reason"].fillna("Not identified")
# 203 properties total
# 140 properties were flagged up based on non-intrusives (Non-Intrusive Data Showed Empty Cavity)
check = cavity_fills_with_solar[
cavity_fills_with_solar["cavity_reason"] == "Non-Intrusive Data Showed Empty Cavity"
]
z = asset_list.standardised_asset_list[
asset_list.standardised_asset_list[asset_list.STANDARD_LANDLORD_PROPERTY_ID].isin(
check[asset_list.landlord_property_id].values)
]
z[asset_list.ATTRIBUTE_HAS_SOLAR].value_counts()
pd.set_option('display.max_columns', None)
z[[asset_list.STANDARD_FULL_ADDRESS, asset_list.STANDARD_POSTCODE, asset_list.ATTRIBUTE_HAS_SOLAR]]
not_flagged = asset_list.standardised_asset_list[
pd.isnull(asset_list.standardised_asset_list["solar_reason"])
]
# For everything not flagged for solar, identify why
reasons = []
for _, x in not_flagged.iterrows():
if x[asset_list.STANDARD_PROPERTY_TYPE] == "flat":
reason = "property is a flat"
else:
x[asset_list.EPC_API_DATA_NAMES["mainheat-description"]]
reasons.append(
{
asset_list.DOMNA_PROPERTY_ID: x["asset_list.DOMNA_PROPERTY_ID"],
"solar_exclusion_reason": reason,
}
)
asset_list.load_contact_details(
local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
sheet_name="Report 1",

View file

@ -187,7 +187,9 @@ class GoogleSolarApi:
# We constrain the roof area, based on the floor area to be more conservative
self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
if self.roof_area > property_instance.roof_area * self.ROOF_AREA_TOLERANCE:
if (
self.roof_area > property_instance.roof_area * self.ROOF_AREA_TOLERANCE
) | (self.roof_area < (2 - self.ROOF_AREA_TOLERANCE) * property_instance.roof_area):
self.roof_area = property_instance.roof_area
self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']

View file

@ -368,10 +368,16 @@ def app():
pprint(measure_counts[scenario_ids[0]])
pprint(measure_counts[scenario_ids[1]])
df = scenario_data[scenario_ids[1]]
z = df[
(df["Walls"] == "Cavity wall, as built, no insulation") & (~df["Recommendation: cavity_wall_insulation"])
]
# Do not get to EPC B:
# 5 are flats
# 1) 34 Luffenham Place, Chicksands SG17 5XH, has been surveyed as having a low performing heat pump -
# should be looked at but several surrounding properties have been surveyed in a similar fashion
# 2) 42, Muscott Close, Shipton Bellinger SP9 7TX, has an oil boiler and the bills go up recommending HHRSH.
# we could non-intrusively recommend a heat pump.
# 3) 33 Blenheim Crescent, Ruislip, HA4 7HA, 100021455241 Solar potential modelling returned nothing -
# manual review indicates that there are multiple trees surrouding the south facing side of the property
# 4) 10 Bower Green, Shrivenham, SN6 8TU - Solar isn't recommended without further survey due to the local
# area being surrounded by trees
# Scenario adjustments:
# Exclude: boiler_upgrade