re-building thrive's programme

This commit is contained in:
Khalim Conn-Kowlessar 2025-05-20 15:59:38 +01:00
parent 97eaf948c5
commit c0cf848db2
7 changed files with 411 additions and 77 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -2507,7 +2507,7 @@ class AssetList:
else:
raise NotImplementedError("Invalid date in outcomes - implement me")
notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes"
notes_col = "Notes" if "Notes" in self.outcomes.columns else "Notes / Outcomes"
lookup = lookup.merge(
self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id"
@ -2576,6 +2576,7 @@ class AssetList:
def flag_survey_master(
self,
master_filepaths,
master_id_colnames,
master_to_asset_list_filepath=None
):
# TODO: This probably needs further expansion
@ -2591,7 +2592,7 @@ class AssetList:
logger.info("Getting masters and merging onto asset list")
master_surveyed = []
unmatched_submissions = []
for filepath in master_filepaths:
for idx, filepath in enumerate(master_filepaths):
master_data = pd.read_csv(filepath)
# Strip columns
master_data.columns = [c.strip() for c in master_data.columns]
@ -2618,22 +2619,6 @@ class AssetList:
"SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
)
# if "UPRN" in master_data.columns:
# # We just need to check if any were cancelled
# master_to_append = master_data[
# ["UPRN", install_col, submission_col]
# ].rename(
# columns={
# "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
# install_col: "survey_status",
# submission_col: "submission_date"
# }
# )
# master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
#
# master_surveyed.append(master_to_append)
# continue
master_data["row_id"] = master_data.index
self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply(
@ -2656,8 +2641,6 @@ class AssetList:
)
measure_mix_col = "MEASURE COMBO"
# Otherwise, we need to match algorithmically
has_property_id = "UPRN" in master_data.columns
logger.info("Matching master data to asset list")
matched = []
unmatched = []
@ -2670,13 +2653,22 @@ class AssetList:
if pd.isnull(row[postcode_col]):
continue
# if has_property_id:
# submission_uprn = row["UPRN"]
#
# if not pd.isnull(submission_uprn):
# df = self.standardised_asset_list[
# self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn
# ]
if master_id_colnames[idx] is not None:
# Filter the standardised asset list on this
df = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]]
]
if df.shape[0] == 1:
matched.append(
{
"row_id": row["row_id"],
"original_house_no": original_house_no,
"original_street": original_street,
"original_postcode": original_postcode,
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
}
)
continue
postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
@ -2721,6 +2713,7 @@ class AssetList:
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
}
)
continue
if house_no in df["house_no"].values:
df = df[df["house_no"] == house_no]
@ -2793,6 +2786,7 @@ class AssetList:
}
)
master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed")
master_surveyed.append(master_to_append)
unmatched_df = master_data[
master_data["row_id"].isin(unmatched)

View file

@ -62,36 +62,42 @@ def app():
Property UPRN
"""
# Thurrock
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
data_filename = "THURROCK COUNCIL - For analysis.xlsx"
sheet_name = "Assets"
postcode_column = 'Postcode'
fulladdress_column = "Full Address"
address1_column = None
address1_method = "house_number_extraction"
# Thrive - reconciliation
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
sheet_name = "Sheet1"
postcode_column = 'postcode'
fulladdress_column = "full_address"
address1_column = "address_line_1"
address1_method = None
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Construction Date"
landlord_year_built = "age_band_calculated"
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = "Property Subtype"
landlord_property_type = "property_type"
landlord_built_form = "build_form"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = "Main Heating Type"
landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
landlord_heating_system = "heating_type_updated"
landlord_existing_pv = None
landlord_property_id = "Property Reference"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
landlord_property_id = "thrive_property_id"
landlord_sap = "sap_rating_updated"
outcomes_filename = [
os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
]
outcomes_sheetname = ["Sheet1"]
outcomes_postcode = ["postcode"]
outcomes_houseno = ["No."]
outcomes_id = ["thrive_property_id"]
outcomes_address = ["address"]
master_filepaths = [
os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
]
master_to_asset_list_filepath = None
master_id_colnames = ["thrive_property_id", "thrive_property_id"]
phase = False
ecosurv_landlords = None
ecosurv_landlords = "thrive"
# Medway
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"

View file

@ -292,4 +292,6 @@ HEATING_MAPPINGS = {
'Communal Heating': 'communal heating',
'No Data': 'unknown',
'Boiler System': 'gas condensing boiler',
'Storage heating': 'electric storage heaters',
'Storage heating (HHRSH)': 'high heat retention storage heaters'
}

View file

@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
"pitched unknown access to loft",
"piched unknown insulation",
"pitched insulated",
"pitched less than 100mm insulation"
"pitched less than 100mm insulation",
"another dwelling above",
"flat unknown insulation",
"unknown insulated",
@ -38,4 +38,11 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'200mm': 'pitched insulated',
'0-49mm': 'pitched less than 100mm insulation',
'50mm': 'pitched less than 100mm insulation',
'': 'unknown',
'NR': 'unknown',
'Non-joist': 'unknown',
'25mm': 'pitched less than 100mm insulation',
'400mm+': 'pitched insulated',
'12mm': 'pitched less than 100mm insulation'
}

View file

@ -8,6 +8,8 @@ address the following concerns:
"""
import pandas as pd
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
# This is Thrive's list of properties and when they should have been surveyed
thrive_tracker = pd.read_excel(
@ -51,27 +53,10 @@ original_columns = {
}
original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns)
original_asset_list["Data Source"] = "Thrive Tracker"
original_asset_list["Data Source"] = "Original Asset List"
original_asset_list = original_asset_list.drop_duplicates()
# We append on the missed properties, with the information we have
# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#',
# 'Adress Line 1', 'Postcode', 'Property Type', 'Build Form',
# 'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number',
# 'Secondary Number', 'Email', 'Thrive use: Tenancy Number',
# 'Special Requirements ', 'CIGA', 'Date CIGA check received',
# 'Proposed Progamme', 'New Proposed Programme',
# 'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type',
# 'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation',
# 'Cost for Vents', 'Cavity Depth', 'Cavity Condition',
# 'Date Submitted to installer', 'PRRN Number',
# 'Loft insulation required? (Thrive)', 'Date booked ',
# 'Completed\n(yes/no)', 'Date Completed',
# 'Vents installed?\n(number and location)',
# 'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ',
# 'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added',
# 'SA Cavity Condition Updated', 'SA Loft & Energy Updated',
# 'PRRN Submitted '
missed_properties["Full Address"] = (
missed_properties["#"].astype(str) + ", " +
missed_properties["Adress Line 1"].astype(str) + ", " +
@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected"
missed_properties["ECO Eligibility"] = "Property Not Inspected"
missed_properties["Data Source"] = "Thrive Tracker"
# We de-dupe ides in original_asset_list
dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique()
dupes = original_asset_list[
original_asset_list["thrive_property_id"].isin(dupe_ids)
].copy()
dupes = dupes.sort_values("thrive_property_id")
original_asset_list = original_asset_list.rename(
columns={
"detailed_property_type": "build_form"
}
)
master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True)
# We were provided with a data update for a sample of properties. We update the data with this information
@ -103,12 +101,339 @@ data_update = pd.read_excel(
header=0
)
new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)]
new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy()
new_properties["Full Address"] = (
new_properties["#"].astype(str) + ", " +
new_properties["Adress Line 1"].astype(str) + ", " +
new_properties["Postcode"].astype(str)
)
new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns)
new_properties["WFT Findings"] = "Property Not Inspected"
new_properties["ECO Eligibility"] = "Property Not Inspected"
new_properties["Data Source"] = "13.05.2025 Data Update"
master_list = pd.concat([new_properties, master_list])
# We append any new data on heating system, heating type, and insulation type, based on the data update
master_list = master_list.merge(
data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename(
columns={
"Heating Type": "heating_type_updated",
"Assumed mm ": "assumed_loft_insulation_thickness_updated",
"SAP": "sap_rating_updated"
}
),
how="left",
left_on="thrive_property_id",
right_on="UPRN"
)
# We fill the missings
master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"])
master_list["assumed_loft_insulation_thickness_updated"] = master_list[
"assumed_loft_insulation_thickness_updated"
].fillna(master_list["assumed_loft_insulation_thickness"])
master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"])
assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list"
master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin(
thrive_tracker["UPRN"].astype(str).values
)
# Those the asset list - call it master asset list updated May2025
master_list = master_list.drop(columns=["UPRN"])
master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str)
# master_list.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
# "Complete - Updated May 2025.xlsx",
# )
master_list["house_number_TEMP"] = master_list.apply(
lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]),
axis=1
)
# We add in the status of the property
# TODO: Add the status of the property from the Thrive tracker
outcomes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April "
"24-March25.xlsx",
header=0
)
outcomes["row_id"] = outcomes.index
# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these
tracker_for_matching = thrive_tracker[
~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1'])
].copy()
tracker_for_matching["Full Address"] = (
tracker_for_matching["#"].astype(str) + ", " +
tracker_for_matching["Adress Line 1"].astype(str) + ", " +
tracker_for_matching["Postcode"].astype(str)
)
outcomes_id_lookup = []
for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
hn = str(x["No."])
address = x["Address"]
postcode = x["Postcode"]
contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"])
contact_no = None if contact_no == "nan" else contact_no
if address == "292 Micklefield Road":
hn = "292"
if (address == "Micklefield Road") & (hn == "302"):
hn = "292"
if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
hn = "103a"
if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
hn = "105a"
if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
hn = "107a"
#
# # We match this to the tracker
# m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no]
# # Many of the phone numbers don't have a leading zero in the tracker so we add them
# if (m1.shape[0] != 1) and not pd.isnull(contact_no):
# m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")]
#
# if m1.shape[0] > 1:
# raise ValueError(
# f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
# )
# if m1.empty:
m1 = tracker_for_matching[
(tracker_for_matching["#"].astype(str) == hn) &
(tracker_for_matching["Postcode"] == postcode)
]
if m1.empty:
# Some properties aren't in the tracker, we match to the master list
m1 = master_list[
(master_list["house_number_TEMP"].astype(str) == hn) &
(master_list["postcode"] == postcode)
]
outcomes_id_lookup.append(
{
"row_id": x["row_id"],
"thrive_property_id": m1["thrive_property_id"].values[0],
"address": m1["full_address"].values[0],
"postcode": m1["postcode"].values[0],
}
)
continue
if m1.shape[0] != 1:
raise ValueError(
f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
)
# We add the status to the master list
outcomes_id_lookup.append(
{
"row_id": x["row_id"],
"thrive_property_id": m1["UPRN"].values[0],
"address": m1["Full Address"].values[0],
"postcode": m1["Postcode"].values[0],
}
)
outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup)
outcomes = outcomes.merge(
outcomes_id_lookup,
how="left",
left_on="row_id",
right_on="row_id"
)
outcomes = outcomes.drop(columns=["row_id"])
outcomes = outcomes.rename(
columns={
"Outcomes": "Outcome",
"Notes (If 'no "
"answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes",
}
)
# Store the corrected outcomes
# outcomes.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes -
# April 24-March25 - Corrected.xlsx",
# index=False
# )
data_update = = data_update[["UPRN", ""]]
# TODO: Flag the Thrive priorities and create a separate project code for these
# TODO: Add the general project code
# TODO: Add the thrive
def parse_date(value):
# Strip any 'W.C' or 'w/c' prefix and clean whitespace
value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip()
try:
# Try parsing the date with dayfirst=True
return pd.to_datetime(value, dayfirst=True, errors='coerce')
except Exception:
return pd.NaT
outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date)
# Next step - match the submissions master to the asset list. We will append on the UPRN
eco3_submissions = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO3.csv",
header=0
)
eco3_submissions["row_id"] = eco3_submissions.index
eco4_submissions = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO4.csv",
header=0
)
eco4_submissions["row_id"] = eco4_submissions.index
# List of properties never on the asset list
not_on_master = [
"7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL",
"20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA",
"26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN",
"9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN",
"12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN",
"20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN",
"25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN",
"33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN",
'37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN',
'41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN',
'46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN',
'50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN'
]
eco3_remap = {
"19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'),
"29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'),
"31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'),
"44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'),
"64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'),
"11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'),
"16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'),
"58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'),
"10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'),
"25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
"32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
"94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'),
'33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'),
'120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'),
'35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'),
'18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'),
'34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'),
'58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'),
'48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'),
'45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'),
'6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'),
'2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'),
'29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'),
'61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'),
'2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'),
}
eco3_lookup = []
for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)):
hn = row["NO "]
pc = row["Post Code"]
street = row["Street / Block Name"]
key = f"{hn}+{street}+{pc}"
if key in not_on_master:
continue
if key in eco3_remap:
hn, street, pc = eco3_remap[key]
# The postcode is different to the asse
# We filter the asset list, because it's hard to know how accurate this is
m1 = master_list[
(master_list["house_number_TEMP"].astype(str) == hn) &
(master_list["postcode"] == pc)
]
if m1.shape[0] != 1:
raise ValueError(
f"Error for {key} in the tracker"
)
eco3_lookup.append(
{
"row_id": row["row_id"],
"thrive_property_id": m1["thrive_property_id"].values[0],
"submission_house_number": row["NO "],
"submission_address1": row["Street / Block Name"],
"submission_postcode": row["Post Code"],
}
)
eco4_lookup = []
for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)):
hn = row["NO."]
pc = row["Post Code"]
street = row["Street / Block Name"]
key = f"{hn}+{street}+{pc}"
if key in not_on_master:
continue
if key in eco3_remap:
hn, street, pc = eco3_remap[key]
# The postcode is different to the asse
# We filter the asset list, because it's hard to know how accurate this is
m1 = master_list[
(master_list["house_number_TEMP"].astype(str) == hn) &
(master_list["postcode"].str.lower() == pc.lower())
]
if m1.shape[0] != 1:
raise ValueError(
f"Error for {key} in the tracker"
)
eco4_lookup.append(
{
"row_id": row["row_id"],
"thrive_property_id": m1["thrive_property_id"].values[0],
"submission_house_number": row["NO."],
"submission_address1": row["Street / Block Name"],
"submission_postcode": row["Post Code"],
}
)
# We match the lookups back to the submission sheets
eco3_lookup = pd.DataFrame(eco3_lookup)
eco3_submissions = eco3_submissions.merge(
eco3_lookup,
how="left",
on="row_id",
)
eco4_lookup = pd.DataFrame(eco4_lookup)
eco4_submissions = eco4_submissions.merge(
eco4_lookup,
how="left",
on="row_id",
)
# Store
eco3_submissions.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO3 - with IDS.csv",
index=False
)
eco4_submissions.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO4 - with IDS.csv",
index=False
)