Merge pull request #287 from Hestia-Homes/ha-analysis-3

Ha analysis 3
This commit is contained in:
KhalimCK 2024-03-26 18:05:46 +00:00 committed by GitHub
commit c81b03c458
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 6431 additions and 887 deletions

2
.idea/.gitignore generated vendored
View file

@ -1,3 +1,5 @@
# Default ignored files
/shelf/
/workspace.xml
# GitHub Copilot persisted chat sessions
/copilot/chatSessions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -147,7 +147,8 @@ class Property:
# self.base_difference_record.df
def adjust_difference_record_with_recommendations(
self, property_recommendations,
self,
property_recommendations,
property_representative_recommendations
):
"""

View file

@ -30,7 +30,7 @@ vartypes = {
'environment-impact-potential': "Int64",
'glazed-type': 'str',
'heating-cost-current': 'float',
'address3': 'str',
# 'address3': 'str',
'mainheatcont-description': 'str',
'sheating-energy-eff': 'str',
'property-type': 'str',
@ -40,7 +40,7 @@ vartypes = {
'mechanical-ventilation': 'str',
'hot-water-cost-current': 'str',
'county': 'str',
'postcode': 'str',
# 'postcode': 'str',
'solar-water-heating-flag': 'str',
'constituency': 'str',
'co2-emissions-potential': 'float',
@ -55,7 +55,7 @@ vartypes = {
# 'inspection-date': str,
'mains-gas-flag': 'str',
'co2-emiss-curr-per-floor-area': 'float',
'address1': 'str',
# 'address1': 'str',
'heat-loss-corridor': 'str',
'flat-storey-count': "Int64",
'constituency-label': 'str',
@ -67,7 +67,7 @@ vartypes = {
'roof-description': 'str',
'floor-energy-eff': 'str',
'number-habitable-rooms': 'float',
'address2': 'str',
# 'address2': 'str',
'hot-water-env-eff': 'str',
'posttown': 'str',
'mainheatc-energy-eff': 'str',
@ -98,7 +98,7 @@ vartypes = {
# 'lodgement-date',
'extension-count': "Int64",
'mainheatc-env-eff': 'str',
'lmk-key': 'str',
# 'lmk-key': 'str',
'wind-turbine-count': "Int64",
'tenure': 'str',
'floor-level': 'str',
@ -147,6 +147,7 @@ class SearchEpc:
uprn: [int, None] = None,
size=None,
property_type=None,
fast=False
):
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
@ -187,6 +188,7 @@ class SearchEpc:
self.size = size if size is not None else 25
self.property_type = property_type
self.fast = fast
@classmethod
def get_house_number(cls, address: str) -> str | None:
@ -365,9 +367,6 @@ class SearchEpc:
# Finally, we identify the newest epc and the rest, and then return
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
# Retrieve postcode and address
address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
# Ge the uprn from the newest record for this home
uprns = {r["uprn"] for r in rows if r["uprn"]}
# We can sometimes have no uprn for a property
@ -384,6 +383,12 @@ class SearchEpc:
uprn = uprns.pop() if uprns else None
if self.fast:
return newest_epc, [], {}, "", "", None
# Retrieve postcode and address
address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
@staticmethod
@ -575,6 +580,11 @@ class SearchEpc:
property_type=property_type
)
# If we have missing lodgment date, we fill it with inspection-date
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
# If we still have missing dates, we set it to the mean of the non NA dates
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
# For each attribute, we need to determine the datatype and use an appropriate method
# to estimate.
estimated_epc = {}
@ -609,7 +619,11 @@ class SearchEpc:
# Insert an estimated lodgement datetime, with a weighted average
estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
# Extract logement date
estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
# It is possible that there is still no lodgement date, so we need to handle this
if pd.isnull(estimated_epc["lodgement-datetime"]):
estimated_epc["lodgement-date"] = None
else:
estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
estimated_epc["postcode"] = self.postcode
estimated_epc["uprn"] = self.uprn

View file

@ -145,6 +145,7 @@ class Eligibility:
"reason": None,
"thickness_classification": thickness_classification
}
return
# Insulation is already thick enough
self.loft = {
@ -164,8 +165,10 @@ class Eligibility:
"""
is_cavity = self.walls["is_cavity_wall"]
is_empty = (not self.walls["is_filled_cavity"]) or (
is_empty = (not self.walls["is_filled_cavity"])
is_as_built = (
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
and self.walls["is_assumed"]
)
is_partial_filled = "partial" in self.walls["clean_description"].lower()
# We look for potentially under performing cavities - anything that is assumed, as built and insulated
@ -175,6 +178,7 @@ class Eligibility:
is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled)
is_partial_filled_cavity = is_cavity and is_partial_filled
is_assumed_filled_cavity = is_cavity and is_as_built
is_underperforming_cavity = is_cavity and is_underperforming
# Check if it has internal or external wall insulation
@ -195,6 +199,13 @@ class Eligibility:
}
return
if is_assumed_filled_cavity:
self.cavity = {
"suitability": True,
"type": "as built assumed",
}
return
if is_partial_filled_cavity:
self.cavity = {
"suitability": True,
@ -340,13 +351,35 @@ class Eligibility:
# Check if the property is suitable for cavity wall
self.cavity_insulation()
self.loft_insulation()
self.gbis_warmfront = (self.cavity["suitability"]) and (
int(self.epc["current-energy-efficiency"]) <= 68
)
current_sap = int(self.epc["current-energy-efficiency"])
# We have a strict suitability check and a non-strict check
def check_eco4_warmfront(self, post_retrofit_sap=None):
# Perfect strictness
if (self.cavity["type"] == "empty") and (current_sap < 69):
self.gbis_warmfront = {
"eligible": True,
"strict": True,
"message": "Perfect suitability",
}
return
# Near perfect
if self.cavity["suitability"] and (current_sap < 69):
self.gbis_warmfront = {
"eligible": True,
"strict": True,
"message": "Near perfect suitability",
}
return
self.gbis_warmfront = {
"eligible": False,
"strict": False,
"message": "All conditions fail",
}
def check_eco4_warmfront(self):
"""
This funciton will check if the property is eligible for funding under the ECO4 scheme
@ -378,49 +411,121 @@ class Eligibility:
self.cavity_insulation()
self.loft_insulation()
# make sure conditions 2 and 3 are true
is_eligible = self.cavity["suitability"] & self.loft["suitability"]
# We put in a placeholder when the roof is not a loft
if self.loft["reason"] == "roof not loft":
self.loft["thickness"] = 999
if current_sap >= 69:
# Case 1: No conditions meet
if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
self.eco4_warmfront = {
"eligible": False,
"message": "sap too high",
"strict": False,
"message": "All conditions fail",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
if post_retrofit_sap is None:
if current_sap >= 55:
message = "Possibly eligible but property currently EPC D"
else:
message = "subject to post retrofit sap" if is_eligible else "not eligible"
# Update the message to flag properties that failed just because of a full cavity.
# We need to double check that the wall is a cavity, that the loft is suitable and that the
# sap is within reason
# We can then estimate the age of the cavity fill
if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
message = "Failed due to full cavity - check cavity age"
# Case 2 - perfect match
if (self.cavity["type"] == "empty") and (self.loft["thickness"] <= 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": is_eligible,
"message": message,
"eligible": True,
"strict": True,
"message": "Perfect suitability",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
is_eligible = is_eligible & (post_retrofit_sap >= 69)
# Case 2.5 - near perfect match - but we would not recommend this using the model
if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": True,
"strict": True,
"message": "Near perfect suitability",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
self.eco4_warmfront = {
"eligible": is_eligible,
"message": None,
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 3 - cavity is suitable, loft is within 150mm, sap is good
if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets cavity, loft borderline, meets sap",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 3 - cavity is suitable, loft is not, sap is good
if self.cavity["suitability"] and (self.loft["thickness"] > 150) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets cavity and sap",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 4 - cavity is not suitable, loft is, sap is not - we say this is not elifible
if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": False,
"strict": False,
"message": "failed fabric check",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 5 - cavity and loft suitable, sap too high
if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap >= 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets fabric, fails SAP check",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 6 - meets just cavity
if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap >= 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets just cavity",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 7 - fails cavity, loft but meets sap
if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": False,
"strict": False,
"message": "Fails cavity and loft, meets SAP",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 8 - fails cavity, meets loft, fails sap
if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
self.eco4_warmfront = {
"eligible": False,
"strict": False,
"message": "Fails cavity, meets loft, fails SAP",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
raise ValueError("Implement me")
def check_gbis(self):

View file

@ -387,17 +387,19 @@ def prepare_model_data_row(
}
simulations = [
[cavity_simulation],
[loft_simulation]
cavity_simulation,
loft_simulation
]
p.adjust_difference_record_with_recommendations(simulations)
recommendation_record = p.base_difference_record.df.to_dict("records")[0].copy()
scoring_dict = p.create_recommendation_scoring_data(
property_id=p.id,
recommendation_record=recommendation_record,
recommendations=simulations,
primary_recommendation_id=cavity_simulation["recommendation_id"]
)
# Make sure we definitely have the correct data
cavity_scoring = [x for x in p.recommendations_scoring_data if "cavity" in x["id"]][0]
loft_scoring = [x for x in p.recommendations_scoring_data if "loft" in x["id"]][0]
return [cavity_scoring, loft_scoring]
return [scoring_dict]
def get_ha_32data(ha_data, cleaned, cleaning_data, created_at):

File diff suppressed because it is too large Load diff

View file

@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
self.df = self.df.loc[
:,
no_suffix_cols
+ only_ending_cols
+ [col for cols in common_cols for col in cols],
]
:,
no_suffix_cols
+ only_ending_cols
+ [col for cols in common_cols for col in cols],
]
def _remove_abnormal_change_in_floor_area(self):
"""
@ -509,7 +509,7 @@ class TrainingDataset(BaseDataset):
expanded_df["is_sandstone_or_limestone"]
== expanded_df["is_sandstone_or_limestone_ending"]
)
]
]
elif component == "floor":
expanded_df = expanded_df[
(expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@ -526,7 +526,7 @@ class TrainingDataset(BaseDataset):
expanded_df["is_to_external_air"]
== expanded_df["is_to_external_air_ending"]
)
]
]
elif component == "roof":
expanded_df = expanded_df[
(expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@ -539,7 +539,7 @@ class TrainingDataset(BaseDataset):
expanded_df["has_dwelling_above"]
== expanded_df["has_dwelling_above_ending"]
)
]
]
return expanded_df

View file

@ -725,26 +725,26 @@ class EPCRecord:
if self.prepared_epc["construction-age-band"] in DATA_ANOMALY_MATCHES:
if self.old_data:
# Take the most recent
max_datetime = max(
[
old_record["lodgement-datetime"]
for old_record in self.old_data
if old_record["construction-age-band"]
not in DATA_ANOMALY_MATCHES
]
)
most_recent = [
old_record
old_age_bands = [
old_record["lodgement-datetime"]
for old_record in self.old_data
if old_record["lodgement-datetime"] == max_datetime
if old_record["construction-age-band"] not in DATA_ANOMALY_MATCHES
]
self.prepared_epc["construction-age-band"] = (
EPCDataProcessor.clean_construction_age_band(
most_recent[0]["construction-age-band"]
if old_age_bands:
max_datetime = max(old_age_bands)
most_recent = [
old_record
for old_record in self.old_data
if old_record["lodgement-datetime"] == max_datetime
]
self.prepared_epc["construction-age-band"] = (
EPCDataProcessor.clean_construction_age_band(
most_recent[0]["construction-age-band"]
)
)
)
self.construction_age_band = self.prepared_epc["construction-age-band"]
self.age_band = england_wales_age_band_lookup.get(self.construction_age_band)

View file

@ -36,8 +36,11 @@ def app():
cleaned_data = {}
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
WALLS = []
for directory in tqdm(epc_directories):
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
z = data["WALLS_DESCRIPTION"].unique().tolist()
WALLS.extend(z)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold

View file

@ -122,6 +122,13 @@ class RoofAttributes(Definitions):
result["is_valid"] = "invalid" not in description
description = description.replace("invalid", "")
# We handle an edge case where the description is "pitched, 150 loft insulation" and is missing the mm
if result["is_pitched"] or result["is_loft"]:
# Search for a regular expression that matches 150 insulation
match = re.search(r"(\d+\+?)\s*insulation", description)
if match:
result['insulation_thickness'] = match.group(1)
# insulation thickness
thickness_map = {
"ceiling insulated": "average",
@ -137,11 +144,11 @@ class RoofAttributes(Definitions):
# Remove the match from the description
# description = description.replace(key, "")
break
else:
# Extract insulation thickness in mm, if present
match = re.search(r'(\d+\+?)\s*mm', description)
if match:
result['insulation_thickness'] = match.group(1)
# Extract insulation thickness in mm, if present
match = re.search(r'(\d+\+?)\s*mm', description)
if match:
result['insulation_thickness'] = match.group(1)
if "insulation_thickness" not in result:
result['insulation_thickness'] = None

View file

@ -184,7 +184,7 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
logger.errpr("Incomplete credentials provided.")
return None
except Exception as e:
logger.errpr(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
return None
# Deserialize data from pickle format