mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
re-building thrive's programme
This commit is contained in:
parent
97eaf948c5
commit
c0cf848db2
7 changed files with 411 additions and 77 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -2507,7 +2507,7 @@ class AssetList:
|
|||
else:
|
||||
raise NotImplementedError("Invalid date in outcomes - implement me")
|
||||
|
||||
notes_col = "Notes" if "Notes" in outcomes.columns else "Notes / Outcomes"
|
||||
notes_col = "Notes" if "Notes" in self.outcomes.columns else "Notes / Outcomes"
|
||||
|
||||
lookup = lookup.merge(
|
||||
self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id"
|
||||
|
|
@ -2576,6 +2576,7 @@ class AssetList:
|
|||
def flag_survey_master(
|
||||
self,
|
||||
master_filepaths,
|
||||
master_id_colnames,
|
||||
master_to_asset_list_filepath=None
|
||||
):
|
||||
# TODO: This probably needs further expansion
|
||||
|
|
@ -2591,7 +2592,7 @@ class AssetList:
|
|||
logger.info("Getting masters and merging onto asset list")
|
||||
master_surveyed = []
|
||||
unmatched_submissions = []
|
||||
for filepath in master_filepaths:
|
||||
for idx, filepath in enumerate(master_filepaths):
|
||||
master_data = pd.read_csv(filepath)
|
||||
# Strip columns
|
||||
master_data.columns = [c.strip() for c in master_data.columns]
|
||||
|
|
@ -2618,22 +2619,6 @@ class AssetList:
|
|||
"SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
|
||||
)
|
||||
|
||||
# if "UPRN" in master_data.columns:
|
||||
# # We just need to check if any were cancelled
|
||||
# master_to_append = master_data[
|
||||
# ["UPRN", install_col, submission_col]
|
||||
# ].rename(
|
||||
# columns={
|
||||
# "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
|
||||
# install_col: "survey_status",
|
||||
# submission_col: "submission_date"
|
||||
# }
|
||||
# )
|
||||
# master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
|
||||
#
|
||||
# master_surveyed.append(master_to_append)
|
||||
# continue
|
||||
|
||||
master_data["row_id"] = master_data.index
|
||||
|
||||
self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply(
|
||||
|
|
@ -2656,8 +2641,6 @@ class AssetList:
|
|||
)
|
||||
measure_mix_col = "MEASURE COMBO"
|
||||
|
||||
# Otherwise, we need to match algorithmically
|
||||
has_property_id = "UPRN" in master_data.columns
|
||||
logger.info("Matching master data to asset list")
|
||||
matched = []
|
||||
unmatched = []
|
||||
|
|
@ -2670,13 +2653,22 @@ class AssetList:
|
|||
if pd.isnull(row[postcode_col]):
|
||||
continue
|
||||
|
||||
# if has_property_id:
|
||||
# submission_uprn = row["UPRN"]
|
||||
#
|
||||
# if not pd.isnull(submission_uprn):
|
||||
# df = self.standardised_asset_list[
|
||||
# self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == submission_uprn
|
||||
# ]
|
||||
if master_id_colnames[idx] is not None:
|
||||
# Filter the standardised asset list on this
|
||||
df = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]]
|
||||
]
|
||||
if df.shape[0] == 1:
|
||||
matched.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"original_house_no": original_house_no,
|
||||
"original_street": original_street,
|
||||
"original_postcode": original_postcode,
|
||||
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
|
||||
|
||||
|
|
@ -2721,6 +2713,7 @@ class AssetList:
|
|||
self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if house_no in df["house_no"].values:
|
||||
df = df[df["house_no"] == house_no]
|
||||
|
|
@ -2793,6 +2786,7 @@ class AssetList:
|
|||
}
|
||||
)
|
||||
master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
|
||||
master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed")
|
||||
master_surveyed.append(master_to_append)
|
||||
unmatched_df = master_data[
|
||||
master_data["row_id"].isin(unmatched)
|
||||
|
|
|
|||
|
|
@ -62,36 +62,42 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
# Thurrock
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
|
||||
data_filename = "THURROCK COUNCIL - For analysis.xlsx"
|
||||
sheet_name = "Assets"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = "Full Address"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
# Thrive - reconciliation
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
|
||||
data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'postcode'
|
||||
fulladdress_column = "full_address"
|
||||
address1_column = "address_line_1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Construction Date"
|
||||
landlord_year_built = "age_band_calculated"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Property Subtype"
|
||||
landlord_property_type = "property_type"
|
||||
landlord_built_form = "build_form"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = "Main Heating Type"
|
||||
landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
|
||||
landlord_heating_system = "heating_type_updated"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Property Reference"
|
||||
landlord_sap = None
|
||||
outcomes_filename = []
|
||||
outcomes_sheetname = []
|
||||
outcomes_postcode = []
|
||||
outcomes_houseno = []
|
||||
outcomes_id = []
|
||||
outcomes_address = []
|
||||
master_filepaths = []
|
||||
landlord_property_id = "thrive_property_id"
|
||||
landlord_sap = "sap_rating_updated"
|
||||
outcomes_filename = [
|
||||
os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
|
||||
]
|
||||
outcomes_sheetname = ["Sheet1"]
|
||||
outcomes_postcode = ["postcode"]
|
||||
outcomes_houseno = ["No."]
|
||||
outcomes_id = ["thrive_property_id"]
|
||||
outcomes_address = ["address"]
|
||||
master_filepaths = [
|
||||
os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
|
||||
os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
|
||||
]
|
||||
master_to_asset_list_filepath = None
|
||||
master_id_colnames = ["thrive_property_id", "thrive_property_id"]
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
ecosurv_landlords = "thrive"
|
||||
|
||||
# Medway
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
|
||||
|
|
|
|||
|
|
@ -292,4 +292,6 @@ HEATING_MAPPINGS = {
|
|||
'Communal Heating': 'communal heating',
|
||||
'No Data': 'unknown',
|
||||
'Boiler System': 'gas condensing boiler',
|
||||
'Storage heating': 'electric storage heaters',
|
||||
'Storage heating (HHRSH)': 'high heat retention storage heaters'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
|
|||
"pitched unknown access to loft",
|
||||
"piched unknown insulation",
|
||||
"pitched insulated",
|
||||
"pitched less than 100mm insulation"
|
||||
"pitched less than 100mm insulation",
|
||||
"another dwelling above",
|
||||
"flat unknown insulation",
|
||||
"unknown insulated",
|
||||
|
|
@ -38,4 +38,11 @@ ROOF_CONSTRUCTION_MAPPINGS = {
|
|||
'200mm': 'pitched insulated',
|
||||
'0-49mm': 'pitched less than 100mm insulation',
|
||||
'50mm': 'pitched less than 100mm insulation',
|
||||
'': 'unknown',
|
||||
'NR': 'unknown',
|
||||
'Non-joist': 'unknown',
|
||||
'25mm': 'pitched less than 100mm insulation',
|
||||
'400mm+': 'pitched insulated',
|
||||
'12mm': 'pitched less than 100mm insulation'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ address the following concerns:
|
|||
"""
|
||||
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
|
||||
# This is Thrive's list of properties and when they should have been surveyed
|
||||
thrive_tracker = pd.read_excel(
|
||||
|
|
@ -51,27 +53,10 @@ original_columns = {
|
|||
}
|
||||
|
||||
original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns)
|
||||
original_asset_list["Data Source"] = "Thrive Tracker"
|
||||
original_asset_list["Data Source"] = "Original Asset List"
|
||||
original_asset_list = original_asset_list.drop_duplicates()
|
||||
|
||||
# We append on the missed properties, with the information we have
|
||||
# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#',
|
||||
# 'Adress Line 1', 'Postcode', 'Property Type', 'Build Form',
|
||||
# 'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number',
|
||||
# 'Secondary Number', 'Email', 'Thrive use: Tenancy Number',
|
||||
# 'Special Requirements ', 'CIGA', 'Date CIGA check received',
|
||||
# 'Proposed Progamme', 'New Proposed Programme',
|
||||
# 'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type',
|
||||
# 'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation',
|
||||
# 'Cost for Vents', 'Cavity Depth', 'Cavity Condition',
|
||||
# 'Date Submitted to installer', 'PRRN Number',
|
||||
# 'Loft insulation required? (Thrive)', 'Date booked ',
|
||||
# 'Completed\n(yes/no)', 'Date Completed',
|
||||
# 'Vents installed?\n(number and location)',
|
||||
# 'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ',
|
||||
# 'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added',
|
||||
# 'SA Cavity Condition Updated', 'SA Loft & Energy Updated',
|
||||
# 'PRRN Submitted '
|
||||
|
||||
missed_properties["Full Address"] = (
|
||||
missed_properties["#"].astype(str) + ", " +
|
||||
missed_properties["Adress Line 1"].astype(str) + ", " +
|
||||
|
|
@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected"
|
|||
missed_properties["ECO Eligibility"] = "Property Not Inspected"
|
||||
missed_properties["Data Source"] = "Thrive Tracker"
|
||||
|
||||
# We de-dupe ides in original_asset_list
|
||||
dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique()
|
||||
dupes = original_asset_list[
|
||||
original_asset_list["thrive_property_id"].isin(dupe_ids)
|
||||
].copy()
|
||||
dupes = dupes.sort_values("thrive_property_id")
|
||||
|
||||
original_asset_list = original_asset_list.rename(
|
||||
columns={
|
||||
"detailed_property_type": "build_form"
|
||||
}
|
||||
)
|
||||
|
||||
master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True)
|
||||
|
||||
# We were provided with a data update for a sample of properties. We update the data with this information
|
||||
|
|
@ -103,12 +101,339 @@ data_update = pd.read_excel(
|
|||
header=0
|
||||
)
|
||||
|
||||
new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)]
|
||||
new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy()
|
||||
new_properties["Full Address"] = (
|
||||
new_properties["#"].astype(str) + ", " +
|
||||
new_properties["Adress Line 1"].astype(str) + ", " +
|
||||
new_properties["Postcode"].astype(str)
|
||||
)
|
||||
new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns)
|
||||
new_properties["WFT Findings"] = "Property Not Inspected"
|
||||
new_properties["ECO Eligibility"] = "Property Not Inspected"
|
||||
new_properties["Data Source"] = "13.05.2025 Data Update"
|
||||
|
||||
master_list = pd.concat([new_properties, master_list])
|
||||
|
||||
# We append any new data on heating system, heating type, and insulation type, based on the data update
|
||||
master_list = master_list.merge(
|
||||
data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename(
|
||||
columns={
|
||||
"Heating Type": "heating_type_updated",
|
||||
"Assumed mm ": "assumed_loft_insulation_thickness_updated",
|
||||
"SAP": "sap_rating_updated"
|
||||
}
|
||||
),
|
||||
how="left",
|
||||
left_on="thrive_property_id",
|
||||
right_on="UPRN"
|
||||
)
|
||||
|
||||
# We fill the missings
|
||||
master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"])
|
||||
master_list["assumed_loft_insulation_thickness_updated"] = master_list[
|
||||
"assumed_loft_insulation_thickness_updated"
|
||||
].fillna(master_list["assumed_loft_insulation_thickness"])
|
||||
master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"])
|
||||
|
||||
assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list"
|
||||
|
||||
master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin(
|
||||
thrive_tracker["UPRN"].astype(str).values
|
||||
)
|
||||
|
||||
# Those the asset list - call it master asset list updated May2025
|
||||
master_list = master_list.drop(columns=["UPRN"])
|
||||
master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str)
|
||||
# master_list.to_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
|
||||
# "Complete - Updated May 2025.xlsx",
|
||||
# )
|
||||
|
||||
master_list["house_number_TEMP"] = master_list.apply(
|
||||
lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]),
|
||||
axis=1
|
||||
)
|
||||
|
||||
# We add in the status of the property
|
||||
# TODO: Add the status of the property from the Thrive tracker
|
||||
outcomes = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April "
|
||||
"24-March25.xlsx",
|
||||
header=0
|
||||
)
|
||||
outcomes["row_id"] = outcomes.index
|
||||
|
||||
# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these
|
||||
tracker_for_matching = thrive_tracker[
|
||||
~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1'])
|
||||
].copy()
|
||||
tracker_for_matching["Full Address"] = (
|
||||
tracker_for_matching["#"].astype(str) + ", " +
|
||||
tracker_for_matching["Adress Line 1"].astype(str) + ", " +
|
||||
tracker_for_matching["Postcode"].astype(str)
|
||||
)
|
||||
|
||||
outcomes_id_lookup = []
|
||||
for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
|
||||
|
||||
hn = str(x["No."])
|
||||
address = x["Address"]
|
||||
postcode = x["Postcode"]
|
||||
contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"])
|
||||
contact_no = None if contact_no == "nan" else contact_no
|
||||
|
||||
if address == "292 Micklefield Road":
|
||||
hn = "292"
|
||||
|
||||
if (address == "Micklefield Road") & (hn == "302"):
|
||||
hn = "292"
|
||||
|
||||
if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
|
||||
hn = "103a"
|
||||
|
||||
if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
|
||||
hn = "105a"
|
||||
|
||||
if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
|
||||
hn = "107a"
|
||||
|
||||
#
|
||||
# # We match this to the tracker
|
||||
# m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no]
|
||||
# # Many of the phone numbers don't have a leading zero in the tracker so we add them
|
||||
# if (m1.shape[0] != 1) and not pd.isnull(contact_no):
|
||||
# m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")]
|
||||
#
|
||||
# if m1.shape[0] > 1:
|
||||
# raise ValueError(
|
||||
# f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
|
||||
# )
|
||||
|
||||
# if m1.empty:
|
||||
m1 = tracker_for_matching[
|
||||
(tracker_for_matching["#"].astype(str) == hn) &
|
||||
(tracker_for_matching["Postcode"] == postcode)
|
||||
]
|
||||
|
||||
if m1.empty:
|
||||
# Some properties aren't in the tracker, we match to the master list
|
||||
m1 = master_list[
|
||||
(master_list["house_number_TEMP"].astype(str) == hn) &
|
||||
(master_list["postcode"] == postcode)
|
||||
]
|
||||
outcomes_id_lookup.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
"thrive_property_id": m1["thrive_property_id"].values[0],
|
||||
"address": m1["full_address"].values[0],
|
||||
"postcode": m1["postcode"].values[0],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if m1.shape[0] != 1:
|
||||
raise ValueError(
|
||||
f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
|
||||
)
|
||||
|
||||
# We add the status to the master list
|
||||
outcomes_id_lookup.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
"thrive_property_id": m1["UPRN"].values[0],
|
||||
"address": m1["Full Address"].values[0],
|
||||
"postcode": m1["Postcode"].values[0],
|
||||
}
|
||||
)
|
||||
|
||||
outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup)
|
||||
outcomes = outcomes.merge(
|
||||
outcomes_id_lookup,
|
||||
how="left",
|
||||
left_on="row_id",
|
||||
right_on="row_id"
|
||||
)
|
||||
|
||||
outcomes = outcomes.drop(columns=["row_id"])
|
||||
outcomes = outcomes.rename(
|
||||
columns={
|
||||
"Outcomes": "Outcome",
|
||||
"Notes (If 'no "
|
||||
"answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# Store the corrected outcomes
|
||||
# outcomes.to_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes -
|
||||
# April 24-March25 - Corrected.xlsx",
|
||||
# index=False
|
||||
# )
|
||||
|
||||
data_update = = data_update[["UPRN", ""]]
|
||||
|
||||
# TODO: Flag the Thrive priorities and create a separate project code for these
|
||||
# TODO: Add the general project code
|
||||
# TODO: Add the thrive
|
||||
def parse_date(value):
|
||||
# Strip any 'W.C' or 'w/c' prefix and clean whitespace
|
||||
value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip()
|
||||
try:
|
||||
# Try parsing the date with dayfirst=True
|
||||
return pd.to_datetime(value, dayfirst=True, errors='coerce')
|
||||
except Exception:
|
||||
return pd.NaT
|
||||
|
||||
|
||||
outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date)
|
||||
|
||||
# Next step - match the submissions master to the asset list. We will append on the UPRN
|
||||
eco3_submissions = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
|
||||
"ECO3.csv",
|
||||
header=0
|
||||
)
|
||||
eco3_submissions["row_id"] = eco3_submissions.index
|
||||
|
||||
eco4_submissions = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
|
||||
"ECO4.csv",
|
||||
header=0
|
||||
)
|
||||
eco4_submissions["row_id"] = eco4_submissions.index
|
||||
|
||||
# List of properties never on the asset list
|
||||
not_on_master = [
|
||||
"7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL",
|
||||
"20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA",
|
||||
"26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN",
|
||||
"9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN",
|
||||
"12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN",
|
||||
"20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN",
|
||||
"25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN",
|
||||
"33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN",
|
||||
'37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN',
|
||||
'41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN',
|
||||
'46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN',
|
||||
'50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN'
|
||||
]
|
||||
|
||||
eco3_remap = {
|
||||
"19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'),
|
||||
"29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'),
|
||||
"31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'),
|
||||
"44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'),
|
||||
"64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'),
|
||||
"11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'),
|
||||
"16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'),
|
||||
"58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'),
|
||||
"10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'),
|
||||
"25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
|
||||
"32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
|
||||
"94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'),
|
||||
'33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'),
|
||||
'120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'),
|
||||
'35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'),
|
||||
'18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'),
|
||||
'34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'),
|
||||
'58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'),
|
||||
'48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'),
|
||||
'45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'),
|
||||
'6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'),
|
||||
'2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'),
|
||||
'29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'),
|
||||
'61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'),
|
||||
'2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'),
|
||||
}
|
||||
|
||||
eco3_lookup = []
|
||||
for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)):
|
||||
hn = row["NO "]
|
||||
pc = row["Post Code"]
|
||||
street = row["Street / Block Name"]
|
||||
key = f"{hn}+{street}+{pc}"
|
||||
if key in not_on_master:
|
||||
continue
|
||||
|
||||
if key in eco3_remap:
|
||||
hn, street, pc = eco3_remap[key]
|
||||
# The postcode is different to the asse
|
||||
|
||||
# We filter the asset list, because it's hard to know how accurate this is
|
||||
m1 = master_list[
|
||||
(master_list["house_number_TEMP"].astype(str) == hn) &
|
||||
(master_list["postcode"] == pc)
|
||||
]
|
||||
|
||||
if m1.shape[0] != 1:
|
||||
raise ValueError(
|
||||
f"Error for {key} in the tracker"
|
||||
)
|
||||
|
||||
eco3_lookup.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"thrive_property_id": m1["thrive_property_id"].values[0],
|
||||
"submission_house_number": row["NO "],
|
||||
"submission_address1": row["Street / Block Name"],
|
||||
"submission_postcode": row["Post Code"],
|
||||
}
|
||||
)
|
||||
|
||||
eco4_lookup = []
|
||||
for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)):
|
||||
hn = row["NO."]
|
||||
pc = row["Post Code"]
|
||||
street = row["Street / Block Name"]
|
||||
key = f"{hn}+{street}+{pc}"
|
||||
if key in not_on_master:
|
||||
continue
|
||||
|
||||
if key in eco3_remap:
|
||||
hn, street, pc = eco3_remap[key]
|
||||
# The postcode is different to the asse
|
||||
|
||||
# We filter the asset list, because it's hard to know how accurate this is
|
||||
m1 = master_list[
|
||||
(master_list["house_number_TEMP"].astype(str) == hn) &
|
||||
(master_list["postcode"].str.lower() == pc.lower())
|
||||
]
|
||||
|
||||
if m1.shape[0] != 1:
|
||||
raise ValueError(
|
||||
f"Error for {key} in the tracker"
|
||||
)
|
||||
|
||||
eco4_lookup.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"thrive_property_id": m1["thrive_property_id"].values[0],
|
||||
"submission_house_number": row["NO."],
|
||||
"submission_address1": row["Street / Block Name"],
|
||||
"submission_postcode": row["Post Code"],
|
||||
}
|
||||
)
|
||||
|
||||
# We match the lookups back to the submission sheets
|
||||
eco3_lookup = pd.DataFrame(eco3_lookup)
|
||||
eco3_submissions = eco3_submissions.merge(
|
||||
eco3_lookup,
|
||||
how="left",
|
||||
on="row_id",
|
||||
)
|
||||
|
||||
eco4_lookup = pd.DataFrame(eco4_lookup)
|
||||
eco4_submissions = eco4_submissions.merge(
|
||||
eco4_lookup,
|
||||
how="left",
|
||||
on="row_id",
|
||||
)
|
||||
|
||||
# Store
|
||||
eco3_submissions.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
|
||||
"ECO3 - with IDS.csv",
|
||||
index=False
|
||||
)
|
||||
eco4_submissions.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
|
||||
"ECO4 - with IDS.csv",
|
||||
index=False
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue