Adding postcode summary to stonewater

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-12 15:49:28 +00:00
parent fe6e83314f
commit dfa37f86d4

View file

@ -916,13 +916,14 @@ def main():
"18 Nelson House, Short Street": 'StonewaterSurveys_15/25-3- 18 Short Street- GU11 1HX',
'3 Nelson House, Short Street': 'StonewaterSurveys_2/138-1-3 Short Street-GU11 1HX',
'16, Copthorn House, Brighton Road': 'StonewaterSurveys_13/78-3-16 Brighton Road-KT20 6BQ',
'20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX'
'20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX',
'7 Croft Street': 'StonewaterSurveys_8/333-2-7 Croft Street-HR6 8LA'
}
# We now match this retrofit packages board to the extracted data
matching_lookup = []
for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
# Handle the case that has the wrong postcode in the asset data
if home["Name"] in manual_filters:
filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy()
@ -986,11 +987,11 @@ def main():
missing_ids = list(missing_ids)
if missing_ids:
# We check that the missing ids have no data yet
missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)]
missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")
# missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)]
# missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
# CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")
if len(missing_ids) != 8:
if len(missing_ids) != 6:
raise Exception("Unacceptable number of missings")
if matching_lookup["Address ID"].duplicated().sum():
@ -1083,12 +1084,20 @@ def main():
stonewater_data["Package Includes Windows"] = ~pd.isnull(stonewater_data["Window Upgrade"])
windows_data["Address ID"] = windows_data["Address ID"].astype(float)
stonewater_data = stonewater_data.merge(windows_data, on="Address ID", how="left")
stonewater_data = stonewater_data.sort_values("Archetype ID", ascending=True)
if stonewater_data["Address ID"].duplicated().sum():
raise Exception("Duplicate Address IDs")
for c in [
'Window attributes - Fitted/renewed date',
'Parent Asset Window attributes - Fitted/renewed date',
'Fitted/renewed date'
]:
stonewater_data[c] = stonewater_data[c].astype(str)
# Save this data to excel
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages.xlsx", index=False)
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V2.xlsx", index=False)
cost_sheet = [
{
@ -1173,7 +1182,7 @@ def main():
create_proposed_wave_3_bid(
costed_packages_filepath=os.path.join(
CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) MR Review v1.xlsx"
CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) Single Model V3.xlsx"
),
archetypes_sheet_filepath=os.path.join(
CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx"
@ -1183,8 +1192,8 @@ def main():
def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepath):
# We read in the costed packages
# Note: Header as 12 is for Matt Ratcliff's reviewed version
costed_packages = pd.read_excel(costed_packages_filepath, header=13, sheet_name="Modelled Packages")
costed_packages = costed_packages[~pd.isnull(costed_packages["Address"])]
archetypes_to_cost = costed_packages[
[
@ -1213,16 +1222,11 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
'Existing Primary Heating System',
'Existing Primary Heating PCDF Reference'])
# We take properties that are EPC D and below (61% of units)
# We take properties that are EPC D and below (59% of units)
archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])]
archetypes_to_cost["Has been modelled"] = ~pd.isnull(archetypes_to_cost["Modelled SAP Band"])
average_cost = archetypes_to_cost[
archetypes_to_cost["Has been modelled"]
]['Total Cost of Measures inc Contingency'].mean()
print(average_cost)
# These are the Arhetypes that will likely be suitable for Wave 3
archetypes_sheet = pd.read_excel(archetypes_sheet_filepath, header=4)
archetypes_sheet = archetypes_sheet[~pd.isnull(archetypes_sheet["Address ID"])]
@ -1236,7 +1240,21 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
how="left"
)
proposed_sample = archetypes_sheet[archetypes_sheet["Archetype ID"].isin(archetypes_to_cost["Archetype ID"])]
proposed_sample = archetypes_sheet[
archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str))
]
not_proposed = archetypes_sheet[
~archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str))
]
# archetypes_without_survey = []
# for p in list(set(not_proposed)):
# filtered = costed_packages[costed_packages["Archetype ID"].astype(int).astype(str) == p]
# if filtered.empty:
# archetypes_without_survey.append(p)
# Can we propose anything about archetypes that were not surveyed?
proposed_sample = proposed_sample[
[
@ -1247,6 +1265,8 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
# We classify into high and low confidence
archetypes_to_cost["Surveyed Main Roof"] = archetypes_to_cost["Surveyed Main Roof"].fillna("")
match_classification = []
for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)):
@ -1331,8 +1351,33 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
None, proposed_sample["Total Cost of Measures inc Contingency"]
)
proposed_sample = proposed_sample.sort_values("Archetype ID", ascending=True)
# Save excel
proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid (WIP).xlsx", index=False)
proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid V2 (WIP).xlsx", index=False)
# For each postcode that's in the bid, we also summarise the number of units in the bid and number left out
proposed_sample_postcodes = proposed_sample["Postcode"].unique()
postcode_summary = []
for postcode in proposed_sample_postcodes:
in_proposal = proposed_sample[proposed_sample["Postcode"] == postcode]
not_in_proposal = not_proposed[not_proposed["Postcode"] == postcode]
postcode_summary.append(
{
"Postcode": postcode,
"Number of properties in Proposal": len(in_proposal),
"Number of properties not in Proposal": len(not_in_proposal)
}
)
postcode_summary = pd.DataFrame(postcode_summary)
postcode_summary = postcode_summary.sort_values(
"Number of properties not in Proposal",
ascending=False).reset_index(drop=True)
postcode_summary.to_excel(
CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid Postcode Summary.xlsx", index=False
)
def find_remaining_surveys():