adding matcing from sumissions sheet to asset list

This commit is contained in:
Khalim Conn-Kowlessar 2025-04-13 21:39:35 +01:00
parent 2d71ad25ef
commit 3cfe938e27
16 changed files with 509 additions and 45 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -4,6 +4,8 @@ import re
import tiktoken
from pprint import pprint
from datetime import datetime
from docutils.utils.math.tex2mathml_extern import blahtexml
from openai import OpenAI
import numpy as np
import pandas as pd
@ -663,7 +665,10 @@ class AssetList:
non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN)
if self.old_format_non_intrusives_present:
non_intrusive_columns = self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES
# We check if we have the ECO Eligibility column, which we might not have
non_intrusive_columns = [
c for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES if c in self.standardised_asset_list.columns
]
self.keep_variables += non_intrusive_columns
@ -731,7 +736,7 @@ class AssetList:
'PIMSS EMPTY'
]
if pd.isnull(date_str) or date_str in known_errors:
if pd.isnull(date_str) or date_str in known_errors or (date_str == 0):
return None
if isinstance(date_str, str):
@ -752,6 +757,10 @@ class AssetList:
if isinstance(date_str, datetime):
return date_str.year
if isinstance(date_str, float):
if str(int(date_str)).isdigit() & (len(str(int(date_str))) == 4):
return int(date_str)
# Check if date_str is a year itself
if str(date_str).isdigit() & (len(str(date_str)) == 4):
return int(date_str)
@ -1325,7 +1334,7 @@ class AssetList:
)
self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] = (
self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
["electric storage heaters", "room heaters", "electric radiators"]
["electric storage heaters", "room heaters", "electric radiators", "no heating"]
)
)
@ -2099,6 +2108,9 @@ class AssetList:
nomatch = []
for _, x in tqdm(self.outcomes.iterrows(), total=len(self.outcomes)):
if pd.isnull(x[outcomes_address]):
continue
# Check if we have an id
oid = x[outcomes_id] if outcomes_id is not None else None
@ -2120,6 +2132,8 @@ class AssetList:
address_clean = x[outcomes_address].lower().replace(",", "").replace(" ", " ")
self.outcomes["Outcome"] = self.outcomes["Outcome"].str.lower()
matched = self.standardised_asset_list[
(self.standardised_asset_list[
self.STANDARD_FULL_ADDRESS
@ -2140,7 +2154,9 @@ class AssetList:
].copy()
if not matched.empty:
matched["houseno"] = matched.apply(
lambda x: SearchEpc.get_house_number(x[self.STANDARD_ADDRESS_1], x[self.STANDARD_POSTCODE]),
lambda x: SearchEpc.get_house_number(
str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE])
),
axis=1
)
@ -2155,8 +2171,6 @@ class AssetList:
}
)
continue
elif matched.shape[0] > 1:
raise NotImplementedError("Check me")
elif not matched.empty:
# Use levenstein distance to match
matched["address"] = matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE]
@ -2254,19 +2268,123 @@ class AssetList:
"SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
)
# We just need to check if any were cancelled
master_to_append = master_data[
["UPRN", install_col, submission_col]
].rename(
if "UPRN" in master_data.columns:
# We just need to check if any were cancelled
master_to_append = master_data[
["UPRN", install_col, submission_col]
].rename(
columns={
"UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
install_col: "survey_status",
submission_col: "submission_date"
}
)
master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
master_surveyed.append(master_to_append)
continue
master_data["row_id"] = master_data.index
self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply(
lambda x: SearchEpc.get_house_number(
str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE])
),
axis=1
)
# Otherwise, we need to match algorithmically
logger.info("Matching master data to asset list")
matched = []
unmatched = []
for _, row in tqdm(master_data.iterrows(), total=len(master_data)):
if pd.isnull(row["POSTCODE"]):
continue
postcode_no_space = row["POSTCODE"].strip().replace(" ", "").lower()
df = self.standardised_asset_list[
(
self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip().str.lower().str.replace(" ",
"")
== postcode_no_space
)
]
house_no = row["NO"]
if house_no in df["house_no"].values:
df = df[df["house_no"] == house_no]
if df.shape[0] != 1:
# Levenstein distance
if any(df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"])):
df = df[
df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"])
]
else:
# Levenstein distance
df = df[
df[self.STANDARD_FULL_ADDRESS].str.lower().apply(
lambda x: process.extractOne(
" ".join([row["NO"], row["Street / Block Name"], row["TOWN"]]).lower(),
x
)[1]
) > 90
]
if df.shape[0] == 0:
unmatched.append(row["row_id"])
continue
if any(df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(
" ".join([row["NO"], row["Street / Block Name"]]).lower()
)):
df = df[
df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(
" ".join([row["NO"], row["Street / Block Name"]]).lower()
)
]
if any(
df[self.STANDARD_PROPERTY_TYPE].str.contains(
row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower()
)
):
# We ignore "block of flats" entries
df = df[
df[self.STANDARD_PROPERTY_TYPE].str.contains(
row["PROPERTY TYPE As per table emailed"].split(" ")[-1].lower()
) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats")
]
if df.shape[0] != 1:
# We have multiple matches
raise NotImplementedError("FIX ME")
matched.append(
{
"row_id": row["row_id"],
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
}
)
self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no")
# We match the "UPRN" which is the landlords ID, onto the master sheet
matched = pd.DataFrame(matched)
master_to_append = master_data[["row_id", install_col, submission_col]].merge(
matched, how="left", on="row_id"
).rename(
columns={
"UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
install_col: "survey_status",
submission_col: "submission_date"
}
)
master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
master_surveyed.append(master_to_append)
unmatched_df = master_data[
master_data["row_id"].isin(unmatched)
]
submissions_unmatched.append(unmatched_df)
master_surveyed = pd.concat(master_surveyed)
master_surveyed = master_surveyed[~pd.isnull(master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID])]

View file

@ -89,6 +89,42 @@ def app():
# - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
# - Or the insulation required is loft/cavity (floors should be solid)
# Bromford
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
"Rebuild/Prepared data/")
data_filename = "asset_list.xlsx"
sheet_name = "Sheet1"
postcode_column = 'PostCode'
fulladdress_column = "FullAddress"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "ConYear"
landlord_os_uprn = None
landlord_property_type = "AssetTypeDesc"
landlord_built_form = "PropTypeDesc"
landlord_wall_construction = "Construction type"
landlord_roof_construction = None
landlord_heating_system = "Heating Type"
landlord_existing_pv = None
landlord_property_id = "Asset"
landlord_sap = None
outcomes_filename = "outcomes.xlsx"
outcomes_sheetname = "Sheet1"
outcomes_postcode = "Postcode"
outcomes_houseno = "No"
outcomes_id = None
outcomes_address = "Address"
master_filepaths = [
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO "
"3 submissions.csv",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO "
"4 submissions.csv",
]
master_to_asset_list_filepath = None
phase = False
# Torus
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
data_filename = "Torus Property Asset List - Phase 1.xlsx"

View file

@ -107,5 +107,42 @@ BUILT_FORM_MAPPINGS = {
'Semi-detached': 'semi-detached',
'Detached': 'detached',
'Flat / maisonette': 'unknown',
'2014 onwards': 'unknown'
'2014 onwards': 'unknown',
'Semi Detached': 'semi-detached',
'End Terraced': 'end-terrace',
'Basement': 'basement',
'No': 'unknown',
'Mid Terrace': 'mid-terrace',
'Link Detached': 'detached',
'Mid Terraced': 'mid-terrace',
'Ground Floor': 'ground floor',
'End Terrace': 'end-terrace',
'Sheltrd Semi Det': 'semi-detached',
'Shop': 'unknown',
'Fourth Floor': 'mid-floor',
'Terraced': 'mid-terrace',
'Leasehold Terr': 'mid-terrace',
'Room': 'unknown',
'Second Floor': 'mid-floor',
'Third Floor': 'mid-floor',
'Office': 'unknown',
'First Floor Over Arch': 'ground floor',
'16-25 IND-PPL': 'unknown',
'Seventh Floor': 'top-floor',
'Sheltered': 'unknown',
'Shelt Bung End': 'end-terrace',
'Room In Shared Accommodation': 'unknown',
'Sheltred Bung Terrace': 'mid-terrace',
'Garage In Block': 'unknown',
'First Floor': 'ground floor',
'First Floor Over Garage': 'ground floor',
'Leasehold': 'unknown',
'Sheltred Bung': 'unknown',
'Garage': 'unknown',
'Sixth Floor': 'top-floor',
'Sheltered Bung': 'semi-detached',
'Guest': 'unknown',
'Fifth Floor': 'mid-floor'
}

View file

@ -23,7 +23,8 @@ STANDARD_HEATING_SYSTEMS = {
'gas combi boiler',
'unknown',
"electric ceiling",
"electric underfloor"
"electric underfloor",
"no heating"
}
HEATING_MAPPINGS = {
@ -87,7 +88,7 @@ HEATING_MAPPINGS = {
'Heat pump (air) Electricity': 'air source heat pump',
'Room heaters Electricity': 'electric radiators',
'Room heaters Oil': 'room heaters',
'No heating system ND': 'unknown',
'No heating system ND': 'no heating',
'Heat pump (wet) Electricity': 'ground source heat pump',
'Room heaters Biomass': 'room heaters',
'ND Solid fuel': 'unknown',
@ -98,11 +99,11 @@ HEATING_MAPPINGS = {
'Storage heating Electricity': 'electric storage heaters',
'ND Electricity': 'unknown',
'Community heating Community (non-gas)': 'district heating',
'No heating system N/A': 'unknown',
'No heating system N/A': 'no heating',
'Boiler Solid fuel': 'boiler - other fuel',
'Community heating Community (mains gas)': 'communal gas boiler',
'Boiler Biomass': 'boiler - other fuel',
'No heating system Mains gas': 'unknown',
'No heating system Mains gas': 'no heating',
'Storage heaters': 'electric storage heaters',
'Air Source': 'air source heat pump',
@ -170,5 +171,36 @@ HEATING_MAPPINGS = {
'Heat pump (wet)': 'air source heat pump',
'Electric ceiling heating': 'electric ceiling',
'Electric under floor heating': 'electric underfloor',
'Community heating': 'district heating'
'Community heating': 'district heating',
'Wet - Radiators Air Source Heat Pump': 'air source heat pump',
'Wet - Radiators Electric': 'electric boiler',
'Storage Heaters': 'high heat retention storage heaters',
'Wet - Radiators Oil': 'oil boiler',
'Communal Wet - Radiators Gas': 'communal gas boiler',
'Electric - Storage/Panel Heaters Electric': 'electric storage heaters',
'Gas Central Heating': 'gas combi boiler',
'Wet - Radiators Solar': 'other',
'Electric - Storage/Panel Heaters LPG': 'electric storage heaters',
'No Heating Solid': 'no heating',
'Wet - Underfloor Gas': 'gas condensing boiler',
'No Heating Electric': 'no heating',
'Oil Fired Central Heating': 'oil boiler',
'Warm Air Gas': 'other',
'Communal Boilers': 'communal gas boiler',
'Wet - Radiators Gas': 'gas combi boiler',
'Wet - Radiators Solid': 'solid fuel',
'Wet - Radiators LPG': 'other',
'No Heating Gas': 'no heating',
'No Heating': 'no heating',
'Panel Heaters': 'electric radiators',
'Rointe Electric Heating': 'electric storage heaters',
'Underfloor Heating': 'electric underfloor',
'Air Source Heating': 'air source heat pump',
'Warm Air Electric': 'other',
'Communal Wet - Radiators Electric': 'communal gas boiler',
'Wet - Underfloor Solar': 'other',
'No Heating Required Gas': 'unknown',
'Electric - Storage/Panel Heaters Gas': 'electric storage heaters',
'Electric - Storage/Panel Heaters Solid': 'electric storage heaters'
}

View file

@ -151,5 +151,32 @@ PROPERTY_MAPPING = {
'Flat: Enclosed End Terrace: Mid Floor': 'flat',
'Flat: Enclosed End Terrace: Ground Floor': 'flat',
'Flat: Enclosed Mid Terrace: Top Floor': 'flat',
'2013 onwards': 'unknown'
'2013 onwards': 'unknown',
'House 2 Storey': 'house',
'Bung': 'bungalow',
'House 3 Storey': 'house',
'Shared Flat': 'flat',
'd': 'unknown',
'Mais': 'maisonette',
'e': 'unknown',
'Shared House': 'house',
'House 4 Storey': 'house',
'Shared Bungalow': 'bungalow',
'Detch': 'house',
'Shop': 'other',
'Terr': 'house',
'Terrace': 'house',
'Description': 'unknown',
'Hse': 'house',
'Room': 'other',
'Office': 'other',
'Room In Shared Accommodation': 'other',
'Apartment': 'flat',
'm': 'unknown',
'Garage': 'other',
'Parking Space': 'other',
'Community Centre': 'other',
'Communal Facility': 'other',
'Semi': 'house'
}

View file

@ -22,5 +22,6 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'ND (inferred)': 'unknown',
'2018 onwards': 'unknown',
'Pitched (vaulted ceiling)': 'pitched insulated',
np.nan: "unknown"
np.nan: "unknown",
None: "unknown"
}

View file

@ -157,5 +157,14 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Timber frame': 'timber frame unknown insulation',
'2017 onwards': 'new build - average thermal transmittance',
'ND (inferred)': 'unknown',
'Flat / maisonette': 'other'
'Flat / maisonette': 'other',
'Other': 'other',
'Timber Frame': 'timber frame unknown insulation',
'Cavity Wall': 'cavity unknown insulation',
'Non-Traditional': 'system built',
'PRC': 'system built',
'Cross Wall': 'system built',
'Solid Wall': 'solid brick unknown insulation',
'Traditional': 'other'
}

View file

@ -107,7 +107,10 @@ class Property:
# cost and instead, provide a message that the measure has already been installed
self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
self.non_invasive_recommendations = non_invasive_recommendations
self.non_invasive_recommendations = (
non_invasive_recommendations['recommendations'] if
non_invasive_recommendations else []
)
# This is a list of measures that have been recommended for the property
if isinstance(measures, list):
self.measures = measures

View file

@ -83,7 +83,8 @@ class PlanTriggerRequest(BaseModel):
exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
# This is a list of measures that we want to be included, if they are options
required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
# Default to empty
required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=[], min_length=1)
scenario_name: Optional[str] = ""
multi_plan: Optional[bool] = False

View file

@ -0,0 +1,192 @@
"""
12th April 2025
This script attempts to clean up the various pieces of data we have for Bromford, with the intention of producing a
standardised asset list
"""
import pandas as pd
# Step 1
# The inspectons data is spread across three different files. We attempt to produce one finalised asset list, with
# comprehensive inspections
# Primary asset list
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford Asset "
"List.xlsx",
sheet_name="Asset List"
)
#
inspections_1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
"MDS.xlsx",
sheet_name="Data list"
)
inspections_1["Heating Type"] = (inspections_1["Heating Type"] + " " + inspections_1["Heating fuel"]).str.strip()
inspections_2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
"MERLIN LANE.xlsx",
sheet_name="Report"
)
inspections_2["AssetTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[-1]
inspections_2["PropTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[:-1].str.join(" ")
inspections_3 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
"SEVERN VALE - KLARKE.xlsx",
sheet_name="Asset report"
)
inspections_3["FullAddress"] = inspections_3["T1_Address1"] + ", " + inspections_3["T1_Address2"]
# On inspections 3, we have multiple sheets which describe the heating
heating_systems = []
for sheet_name in [
"Storage Heaters", "No Heating", "Underfloor Heating", "Rointe Electric Heating", "Air Source Heating",
"Gas Central Heating", "Electric Boiler", "Oil Fired Central Heating",
"Communal Boilers", "Panel Heaters"
]:
df = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
"Rebuild/Inspections/BROMFORD "
"SEVERN VALE - KLARKE.xlsx",
sheet_name=sheet_name
)
df = df[["UPRN"]]
df["Heating Type"] = sheet_name
heating_systems.append(df)
heating_systems = pd.concat(heating_systems)
# We have no clue which one is correct, we have some dupes
heating_systems = heating_systems.drop_duplicates("UPRN")
heating_systems = heating_systems.rename(columns={"UPRN": "Asset"})
heating_systems["Asset"] = heating_systems["Asset"].astype(int)
inspections_3 = inspections_3.merge(heating_systems, how="left", on="Asset")
# Create a consolidated inspections sheet
inspections = pd.concat(
[
inspections_1[["Asset", "Construction type", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
inspections_2[["Asset", "Construction type", "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
inspections_3[["Asset", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
]
)
inspections_address_data = pd.concat(
[
inspections_1[
["Asset", "FullAddress", "PostCode", "ConYear", "Beds", "AssetTypeDesc", "PropTypeDesc", 'ManAreaDesc', ]
],
inspections_2[
['Asset', 'FullAddress', 'AccomType', "AssetTypeDesc", "PropTypeDesc", 'ConYear', 'Postcode']
].rename(columns={"Postcode": "PostCode"}),
inspections_3[
['Asset', "FullAddress", 'T1_Postcode', 'T1_Build Year', 'T1_AssetType']
].rename(
columns={"T1_Postcode": "PostCode", "T1_Build Year": "ConYear", "T1_AssetType": "AssetTypeDesc"}
),
]
)
# Remove some error values
inspections = inspections[~inspections["Asset"].isin(
[
"They're all green partial fill they're all green this",
"South Staffordshire District Council",
'Blk Milton Crt F9-10, Perton, Wolverhampton'
]
)]
inspections["Asset"] = inspections["Asset"].astype(str)
asset_list["Asset"] = asset_list["Asset"].astype(str)
inspections_address_data["Asset"] = inspections_address_data["Asset"].astype(str)
inspections['WFT Findings'] = inspections['WFT Findings'].replace(r'^\s*$', pd.NA, regex=True)
# We have some cases where the inspetions data has dupes on Asset (the ID column). We take the instance that is
# populated
inspections = inspections.sort_values(by='WFT Findings', na_position='last')
inspections = inspections.drop_duplicates(subset='Asset', keep='first')
# We have dupes in the asset list
asset_list = asset_list.drop_duplicates("Asset")
# Merge on
missed_asset_ids = inspections[
~inspections["Asset"].isin(asset_list["Asset"].values)
]["Asset"].values
missed_assets = inspections_address_data[
inspections_address_data["Asset"].isin(missed_asset_ids)
]
missed_assets = missed_assets.drop_duplicates("Asset")
# We produce a larger asset list
asset_list = pd.concat([asset_list, missed_assets])
asset_list = asset_list.merge(
inspections, how="left", on="Asset"
)
asset_list["WFT Findings"] = asset_list["WFT Findings"].fillna("No Inspections Note")
# Store
# asset_list.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
# "data/asset_list.xlsx"
# )
# We now prepare outcomes into a single file
pv_outcomes = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford PV "
"Outcomes.csv",
encoding='cp1252'
)
pv_outcomes["measure_type"] = "solar"
other_outcomes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/(Bromford) "
"15.04.2024.xlsx",
sheet_name="ECO4 & GBIS",
header=1
)
other_outcomes["measure_type"] = "cwi"
combined_outcomes = pd.concat(
[
other_outcomes[["NO", "ADDRESS", "POSTCODE", "WEEK COMMENCING", "OUTCOMES", "NOTES"]].rename(
columns={
"NO": "No", "ADDRESS": "Address", "POSTCODE": "Postcode", "WEEK COMMENCING": "Week Commencing",
"OUTCOMES": "Outcome", "NOTES": "Notes"
}
),
pv_outcomes[['No', 'Address', 'Postcode', "Week Commencing", "Outcome", "Notes"]]
]
)
# Store
# combined_outcomes.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
# "data/outcomes.xlsx"
# )
# Submissions sheet -
eco3_submissions = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 Submissions.csv",
encoding='cp1252'
)
# Get rid of the unnamed columns
unnamed_columns = [c for c in eco3_submissions.columns if "Unnamed: " in c]
eco3_submissions = eco3_submissions.drop(columns=unnamed_columns)
# Store
eco3_submissions.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 submissions.csv",
index=False
)
eco4_submissions = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 4 submissions.csv",
)
same_cols = [c for c in eco4_submissions.columns if c in eco3_submissions.columns]

View file

@ -4,7 +4,7 @@ from dotenv import load_dotenv
from utils.s3 import save_csv_to_s3
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
PORTFOLIO_ID = 140
PORTFOLIO_ID = 141
USER_ID = 8
load_dotenv(dotenv_path="backend/.env")
@ -19,17 +19,20 @@ def app():
asset_list = [
{
"address": "Brow Cottage",
"postcode": "YO18 7PZ",
"uprn": 10007630752,
"property_type": "House",
"built_form": "Semi-Detached",
"address": "196 Merrow Street",
"postcode": "SE17 2NP",
"uprn": 200003423454,
"patch": True
},
{
"address": "Wyburn",
"postcode": "DT1 2LL",
"uprn": 100040630290
"address": "65 Liverpool Grove",
"postcode": "SE17 2HP",
"uprn": 200003423194
},
{
"address": "2 Brettell Street",
"postcode": "SE17 2NZ",
"uprn": 200003423607
},
]
asset_list = pd.DataFrame(asset_list)
@ -71,12 +74,16 @@ def app():
valuation_data = [
{
"valuation": 469_000,
"uprn": 10007630752,
"valuation": 339_000,
"uprn": 200003423454,
},
{
"valuation": 373_000,
"uprn": 100040630290
"valuation": 374_000,
"uprn": 200003423194
},
{
"valuation": 719_000,
"uprn": 200003423607
},
]
# Store valuation data to s3

View file

@ -1,7 +1,7 @@
import os
import re
import openpyxl
import Levenshtein
from fuzzywuzzy import fuzz
from pathlib import Path
import msgpack
from datetime import datetime
@ -2771,7 +2771,8 @@ class DataLoader:
match_to = [x.replace(" ", "") for x in match_to]
# Perform matching between full key and match_to
distances = [Levenshtein.distance(matching_string, s) for s in match_to]
distances = [100 - fuzz.ratio(matching_string, s) for s in match_to]
best_match_index = distances.index(min(distances))
# We might want to consider a threshold for the distance, however for the momeny,
# we don't consider this for the moment

View file

@ -635,7 +635,7 @@ class Recommendations:
# By limiting here, we don't change the value in current_phase_values. This means that the
# future recommendations won't have an impact that is too large
li_sap_limit = RoofRecommendations.get_loft_insulation_sap_limit(
property_instance.data["roof-energy-eff"], property_instance.data["extension-count"]
property_instance.data["roof-energy-eff"], property_instance.roof["insulation_thickness"]
)
if li_sap_limit is not None:
property_phase_impact["sap"] = min(property_phase_impact["sap"], li_sap_limit)

View file

@ -64,16 +64,16 @@ class RoofRecommendations:
)
@classmethod
def get_loft_insulation_sap_limit(cls, roof_energy_eff, extension_count):
def get_loft_insulation_sap_limit(cls, roof_energy_eff, existing_thickness):
"""
Get the SAP limit for loft insulation
:param roof_energy_eff:
:return:
"""
if extension_count == 0:
# No limit
return None
if str(existing_thickness).isdigit():
if float(existing_thickness) >= 250:
return 0
if roof_energy_eff in ["Good", "Very Good"]:
return 1