wip matching completed surveys back to the asset list

This commit is contained in:
Khalim Conn-Kowlessar 2024-04-08 15:29:52 +01:00
parent 35a288fd74
commit 0142e6fe5f
3 changed files with 80 additions and 2 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -6907,3 +6907,81 @@ def app():
december_figures["ECO4 remaining"]
)
december_figures["ECO4 remaining"].sum()
# Adhoc - for UNITAS, stripping out additional surveys that have been completed
unitas_data = loader.data["HA50"].copy()
unitas_asset_list = unitas_data["asset_list"].copy()
unitas_survey_sheet = unitas_data["survey_list"].copy()
# We remove the surveyed properties from the asset sheet
unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
unitas_asset_list = unitas_asset_list.merge(
unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
how="left",
on="asset_list_row_id"
)
unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
# We read in the data for the further completed surveys
unitas_phase_1_workbook = openpyxl.load_workbook(
"local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
)
phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
phase_1_rows_data = []
for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
row_data = [cell.value for cell in row] # This will get you the cell values
phase_1_rows_data.append(row_data)
phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
# Correct phase 1 surveys in the same fashion as the previous approach
phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
# We check all phase 1 surveys are contained in the data we had before
additional = []
for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
# We look for the entry in the old survey sheet:
# matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
# if matched_uprn.shape[0] == 1:
# continue
matched_1 = unitas_survey_sheet[
(unitas_survey_sheet["Post Code"] == row["Post Code"]) &
(unitas_survey_sheet["NO."] == row["NO."])
]
if matched_1.shape[0] == 1:
continue
matched_2 = unitas_survey_sheet[
(unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
(unitas_survey_sheet["NO."] == row["NO."])
]
if matched_2.shape[0] == 1:
continue
additional.append(row.to_dict())
additional = pd.DataFrame(additional)
phase_2_rows_data = []
for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
row_data = [cell.value for cell in row] # This will get you the cell values
phase_2_rows_data.append(row_data)
phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
# Drop all of the occurances of "OFFICE USE ONLY" columns
phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
additional_filtered = additional[common_columns]
further_unitas_completed_surveys = pd.concat(
[phase_2_surveys, additional_filtered],
axis=0,
ignore_index=True
)
# We match these back to the asset list