mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
merging EPC data and survey outcomes to asset list
This commit is contained in:
parent
0142e6fe5f
commit
dc80313eca
1 changed files with 334 additions and 79 deletions
|
|
@ -3459,7 +3459,7 @@ class DataLoader:
|
|||
"not eligible",
|
||||
asset_list["ECO Eligibility"]
|
||||
)
|
||||
asset_list = asset_list.drop(columns=["has_eco3"])
|
||||
# asset_list = asset_list.drop(columns=["has_eco3"])
|
||||
|
||||
# Report on sales
|
||||
sales_report = {}
|
||||
|
|
@ -6778,6 +6778,339 @@ def identify_eco_works(loader):
|
|||
breakdowns = breakdowns.fillna(0)
|
||||
|
||||
|
||||
def unitas_data_prep(loader):
|
||||
#####
|
||||
# Adhoc - for UNITAS, stripping out additional surveys that have been completed
|
||||
unitas_data = loader.data["HA50"].copy()
|
||||
unitas_asset_list = unitas_data["asset_list"].copy()
|
||||
unitas_survey_sheet = unitas_data["survey_list"].copy()
|
||||
|
||||
# We remove the surveyed properties from the asset sheet
|
||||
unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
|
||||
unitas_asset_list = unitas_asset_list.merge(
|
||||
unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
|
||||
how="left",
|
||||
on="asset_list_row_id"
|
||||
)
|
||||
unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
|
||||
unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
|
||||
|
||||
# We read in the data for the further completed surveys
|
||||
unitas_phase_1_workbook = openpyxl.load_workbook(
|
||||
"local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
|
||||
)
|
||||
phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
|
||||
phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
|
||||
phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
|
||||
phase_1_rows_data = []
|
||||
for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
phase_1_rows_data.append(row_data)
|
||||
|
||||
phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
|
||||
|
||||
# Correct phase 1 surveys in the same fashion as the previous approach
|
||||
phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
|
||||
|
||||
# We check all phase 1 surveys are contained in the data we had before
|
||||
additional = []
|
||||
for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
|
||||
# We look for the entry in the old survey sheet:
|
||||
# matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
|
||||
# if matched_uprn.shape[0] == 1:
|
||||
# continue
|
||||
|
||||
matched_1 = unitas_survey_sheet[
|
||||
(unitas_survey_sheet["Post Code"] == row["Post Code"]) &
|
||||
(unitas_survey_sheet["NO."] == row["NO."])
|
||||
]
|
||||
|
||||
if matched_1.shape[0] == 1:
|
||||
continue
|
||||
|
||||
matched_2 = unitas_survey_sheet[
|
||||
(unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
|
||||
(unitas_survey_sheet["NO."] == row["NO."])
|
||||
]
|
||||
|
||||
if matched_2.shape[0] == 1:
|
||||
continue
|
||||
|
||||
additional.append(row.to_dict())
|
||||
additional = pd.DataFrame(additional)
|
||||
|
||||
phase_2_rows_data = []
|
||||
for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
phase_2_rows_data.append(row_data)
|
||||
|
||||
phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
|
||||
phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
|
||||
# Drop all of the occurances of "OFFICE USE ONLY" columns
|
||||
phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
|
||||
common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
|
||||
additional_filtered = additional[common_columns]
|
||||
|
||||
further_unitas_completed_surveys = pd.concat(
|
||||
[phase_2_surveys, additional_filtered],
|
||||
axis=0,
|
||||
ignore_index=True
|
||||
)
|
||||
|
||||
# Add a phase 2 key
|
||||
further_unitas_completed_surveys["survey_list_row_id"] = [
|
||||
"unitas_phase_2" + str(i) for i in further_unitas_completed_surveys.index
|
||||
]
|
||||
|
||||
not_in_asset_list = [
|
||||
"unitas_phase_20", "unitas_phase_234", "unitas_phase_2163", "unitas_phase_2173", "unitas_phase_2374"
|
||||
]
|
||||
|
||||
additional_postcodes = ["st28bg"]
|
||||
|
||||
full_asset_list = unitas_data["asset_list"].copy()
|
||||
full_asset_list["matching_postcode"] = full_asset_list["matching_postcode"].str.lower().str.replace(" ", "")
|
||||
further_unitas_completed_surveys["Post Code"] = further_unitas_completed_surveys["Post Code"].str.replace(
|
||||
"ST 5DT", "ST3 5DT"
|
||||
)
|
||||
|
||||
# We match these back to the asset list
|
||||
matching_lookup = []
|
||||
for _, row in tqdm(further_unitas_completed_surveys.iterrows(), total=len(further_unitas_completed_surveys)):
|
||||
|
||||
if row["survey_list_row_id"] in not_in_asset_list:
|
||||
continue
|
||||
|
||||
postcode_lower = row["Post Code"].lower().strip().replace(" ", "")
|
||||
if postcode_lower in additional_postcodes:
|
||||
continue
|
||||
|
||||
# Confirmed not in asset lsit
|
||||
# Filter asset list on postcode
|
||||
df = full_asset_list[
|
||||
full_asset_list["matching_postcode"].str.contains(postcode_lower)
|
||||
]
|
||||
|
||||
df = df[df["HouseNo"] == str(row["NO."])]
|
||||
|
||||
if df.shape[0] != 1:
|
||||
raise Exception("NOT FOUND")
|
||||
|
||||
matching_lookup.append(
|
||||
{
|
||||
"survey_list_row_id": row["survey_list_row_id"],
|
||||
"asset_list_row_id": df["asset_list_row_id"].values[0],
|
||||
}
|
||||
)
|
||||
|
||||
matching_lookup = pd.DataFrame(matching_lookup)
|
||||
matching_lookup["phase_2_surveyed"] = True
|
||||
|
||||
# We merge this onto the asset list and remove the rows
|
||||
unitas_asset_list = unitas_asset_list.merge(
|
||||
matching_lookup, how="left", on="asset_list_row_id"
|
||||
)
|
||||
# Drop rows where phase_2_surveyed is populated
|
||||
unitas_asset_list = unitas_asset_list[
|
||||
pd.isnull(unitas_asset_list["phase_2_surveyed"])
|
||||
]
|
||||
|
||||
# We add in the new CIGA submissions
|
||||
unitas_round_2_ciga_workbook = openpyxl.load_workbook("local_data/ha_data/Unitas second round CIGA checks.xlsx")
|
||||
ciga_round_2_worksheet = unitas_round_2_ciga_workbook["Worksheet"]
|
||||
ciga_round_2_colnames = [cell.value for cell in ciga_round_2_worksheet[1]]
|
||||
round_2_rows_data = []
|
||||
for row in ciga_round_2_worksheet.iter_rows(min_row=2, values_only=False):
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
round_2_rows_data.append(row_data)
|
||||
|
||||
ciga_round_2 = pd.DataFrame(round_2_rows_data, columns=ciga_round_2_colnames)
|
||||
# We merge the ciga sheet to the asset list
|
||||
ciga_dependent_asset_list = unitas_asset_list[
|
||||
unitas_asset_list["ECO Eligibility"].str.contains("subject to ciga")
|
||||
].copy()
|
||||
|
||||
# We merge the ciga sheet to the asset list
|
||||
ciga_round_2_matched = ciga_dependent_asset_list.merge(
|
||||
ciga_round_2, how="inner", on=["Address Line 1", "Post Code"]
|
||||
)
|
||||
# Filter on just the properties that had no guarantee
|
||||
ciga_round_2_matched = ciga_round_2_matched[ciga_round_2_matched["Guarantee"] == "No"]
|
||||
|
||||
# ECO Eligibility
|
||||
# not eligible 9227
|
||||
# failed ciga 2711
|
||||
# eco4 (subject to ciga) 2238
|
||||
# eco4 - passed ciga 901
|
||||
# gbis 114
|
||||
# eco4 91
|
||||
|
||||
# We filter on the properties we're looking to re-survey
|
||||
unitas_properties_to_survey = unitas_asset_list[
|
||||
unitas_asset_list["ECO Eligibility"].isin(
|
||||
[
|
||||
"eco4 - passed ciga",
|
||||
"eco4"
|
||||
]
|
||||
)
|
||||
].copy()
|
||||
|
||||
unitas_properties_to_survey = pd.concat(
|
||||
[
|
||||
unitas_properties_to_survey,
|
||||
ciga_round_2_matched[unitas_properties_to_survey.columns]
|
||||
]
|
||||
)
|
||||
|
||||
epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
|
||||
|
||||
# We now retrieve the lastest EPC data
|
||||
epc_data = []
|
||||
for _, unitas_property in tqdm(unitas_properties_to_survey.iterrows(), total=len(unitas_properties_to_survey)):
|
||||
property_type, _ = get_property_type_and_built_form(property_meta=unitas_property, ha_name="HA50")
|
||||
|
||||
full_address = unitas_property["matching_address"]
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=str(unitas_property["HouseNo"]),
|
||||
postcode=unitas_property["matching_postcode"],
|
||||
auth_token=epc_api_key,
|
||||
os_api_key="",
|
||||
property_type=property_type,
|
||||
full_address=full_address,
|
||||
fast=True
|
||||
)
|
||||
# Force the skipping of estimating the EPC
|
||||
searcher.ordnance_survey_client.property_type = None
|
||||
searcher.ordnance_survey_client.built_form = None
|
||||
|
||||
searcher.find_property(skip_os=True)
|
||||
if searcher.newest_epc is None:
|
||||
continue
|
||||
|
||||
epc = {
|
||||
"asset_list_row_id": unitas_property["asset_list_row_id"],
|
||||
**searcher.newest_epc.copy()
|
||||
}
|
||||
|
||||
epc_data.append(epc)
|
||||
|
||||
epc_df = pd.DataFrame(epc_data)
|
||||
# Pull out just the columns we need
|
||||
epc_df = epc_df[
|
||||
[
|
||||
"asset_list_row_id",
|
||||
"address1", "postcode",
|
||||
"current-energy-efficiency",
|
||||
"current-energy-rating",
|
||||
"inspection-date",
|
||||
"transaction-type",
|
||||
"built-form"
|
||||
]
|
||||
]
|
||||
|
||||
epc_df["EPC Rating"] = (
|
||||
epc_df["current-energy-efficiency"].astype(str) +
|
||||
epc_df["current-energy-rating"].astype(str)
|
||||
)
|
||||
|
||||
# Merge onto the Unitas data:
|
||||
unitas_properties_to_survey_full = unitas_properties_to_survey.merge(
|
||||
epc_df[
|
||||
[
|
||||
"asset_list_row_id",
|
||||
"EPC Rating",
|
||||
"inspection-date",
|
||||
"transaction-type",
|
||||
"built-form"
|
||||
]
|
||||
],
|
||||
how="left",
|
||||
on="asset_list_row_id"
|
||||
)
|
||||
|
||||
unitas_properties_to_survey_full["ECO Eligibility"] = unitas_properties_to_survey_full["ECO Eligibility"].replace(
|
||||
"eco4 (subject to ciga)", "eco4 - passed ciga, phase 2 check"
|
||||
)
|
||||
|
||||
for col in ["EPC Rating", "inspection-date", "transaction-type", "built-form"]:
|
||||
unitas_properties_to_survey_full[col] = np.where(
|
||||
pd.isnull(unitas_properties_to_survey_full[col]),
|
||||
"No EPC found",
|
||||
unitas_properties_to_survey_full[col]
|
||||
)
|
||||
unitas_properties_to_survey_full[col] = unitas_properties_to_survey_full[col].fillna(
|
||||
"No EPC found"
|
||||
)
|
||||
unitas_properties_to_survey_full[col] = unitas_properties_to_survey_full[col].astype(str)
|
||||
|
||||
unitas_properties_to_survey_full = unitas_properties_to_survey_full.rename(
|
||||
columns={
|
||||
"inspection-date": "Last EPC Inspection Date",
|
||||
"transaction-type": "Last EPC Reason",
|
||||
"built-form": "Last EPC Built Form",
|
||||
}
|
||||
)
|
||||
|
||||
# We now match to the survey outcomes
|
||||
unitas_survey_outcomes_workbook = openpyxl.load_workbook(
|
||||
"local_data/ha_data/UNITAS - survey outcomes 26.03.2024.xlsx"
|
||||
)
|
||||
unitas_survey_outcomes_worksheet = unitas_survey_outcomes_workbook["OUTCOMES"]
|
||||
unitas_outcomes_colnames = [cell.value for cell in unitas_survey_outcomes_worksheet[2]]
|
||||
outcomes_rows_data = []
|
||||
for row in unitas_survey_outcomes_worksheet.iter_rows(min_row=3, values_only=False):
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
outcomes_rows_data.append(row_data)
|
||||
|
||||
unitas_outcomes = pd.DataFrame(outcomes_rows_data, columns=unitas_outcomes_colnames)
|
||||
unitas_outcomes = unitas_outcomes.rename(
|
||||
columns={
|
||||
"Notes (If 'no answer' under outcomes, have you checked around the property for access "
|
||||
"issues where possible?)": "Notes"
|
||||
}
|
||||
)
|
||||
|
||||
unitas_outcomes["Postcode"].unique()
|
||||
eg1 = unitas_properties_to_survey_full[
|
||||
(unitas_properties_to_survey_full["Post Code"] == "ST6 6RF")
|
||||
]
|
||||
eg1_outcomes = unitas_outcomes[
|
||||
(unitas_outcomes["Postcode"] == "ST6 6RF")
|
||||
]
|
||||
|
||||
# Merge outcomes onto properties to survey. Will probably have to do algorithmically
|
||||
full_asset_list["matching_postcode_nospace"] = full_asset_list["matching_postcode"].str.lower().str.replace(" ", "")
|
||||
outcome_matching = []
|
||||
for _, outcome in tqdm(unitas_outcomes.iterrows(), total=len(unitas_outcomes)):
|
||||
# We search for the corresponding entry in the asset list
|
||||
postcode_lower = outcome["Postcode"].lower().strip().replace(" ", "")
|
||||
|
||||
# Confirmed not in asset lsit
|
||||
# Filter asset list on postcode
|
||||
df = unitas_properties_to_survey_full[
|
||||
unitas_properties_to_survey_full["matching_postcode_nospace"].str.contains(postcode_lower)
|
||||
]
|
||||
|
||||
df = df[df["HouseNo"] == str(outcome["No."])]
|
||||
if df.empty:
|
||||
continue
|
||||
|
||||
if df.shape[0] == 1:
|
||||
outcome_matching.append(
|
||||
{
|
||||
"asset_list_row_id": df["asset_list_row_id"].values[0],
|
||||
**outcome.to_dict()
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
raise Exception("something went wrong")
|
||||
|
||||
# Store as an excel
|
||||
unitas_properties_to_survey_full.to_excel("Unitas - phase 2 properties to Survey.xlsx")
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
|
||||
|
|
@ -6907,81 +7240,3 @@ def app():
|
|||
december_figures["ECO4 remaining"]
|
||||
)
|
||||
december_figures["ECO4 remaining"].sum()
|
||||
|
||||
# Adhoc - for UNITAS, stripping out additional surveys that have been completed
|
||||
unitas_data = loader.data["HA50"].copy()
|
||||
unitas_asset_list = unitas_data["asset_list"].copy()
|
||||
unitas_survey_sheet = unitas_data["survey_list"].copy()
|
||||
# We remove the surveyed properties from the asset sheet
|
||||
unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
|
||||
unitas_asset_list = unitas_asset_list.merge(
|
||||
unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
|
||||
how="left",
|
||||
on="asset_list_row_id"
|
||||
)
|
||||
unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
|
||||
unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
|
||||
|
||||
# We read in the data for the further completed surveys
|
||||
unitas_phase_1_workbook = openpyxl.load_workbook(
|
||||
"local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
|
||||
)
|
||||
phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
|
||||
phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
|
||||
phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
|
||||
phase_1_rows_data = []
|
||||
for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
phase_1_rows_data.append(row_data)
|
||||
|
||||
phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
|
||||
|
||||
# Correct phase 1 surveys in the same fashion as the previous approach
|
||||
phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
|
||||
|
||||
# We check all phase 1 surveys are contained in the data we had before
|
||||
additional = []
|
||||
for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
|
||||
# We look for the entry in the old survey sheet:
|
||||
# matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
|
||||
# if matched_uprn.shape[0] == 1:
|
||||
# continue
|
||||
|
||||
matched_1 = unitas_survey_sheet[
|
||||
(unitas_survey_sheet["Post Code"] == row["Post Code"]) &
|
||||
(unitas_survey_sheet["NO."] == row["NO."])
|
||||
]
|
||||
|
||||
if matched_1.shape[0] == 1:
|
||||
continue
|
||||
|
||||
matched_2 = unitas_survey_sheet[
|
||||
(unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
|
||||
(unitas_survey_sheet["NO."] == row["NO."])
|
||||
]
|
||||
|
||||
if matched_2.shape[0] == 1:
|
||||
continue
|
||||
|
||||
additional.append(row.to_dict())
|
||||
additional = pd.DataFrame(additional)
|
||||
|
||||
phase_2_rows_data = []
|
||||
for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
phase_2_rows_data.append(row_data)
|
||||
|
||||
phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
|
||||
phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
|
||||
# Drop all of the occurances of "OFFICE USE ONLY" columns
|
||||
phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
|
||||
common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
|
||||
additional_filtered = additional[common_columns]
|
||||
|
||||
further_unitas_completed_surveys = pd.concat(
|
||||
[phase_2_surveys, additional_filtered],
|
||||
axis=0,
|
||||
ignore_index=True
|
||||
)
|
||||
|
||||
# We match these back to the asset list
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue