mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
setting up the stonewater assessment extraction process
This commit is contained in:
parent
11a4bc24a1
commit
86deed8115
2 changed files with 112 additions and 5 deletions
|
|
@ -1 +0,0 @@
|
|||
# The address we're looking from for the remote assessments is Natwest House, Shenley Rd, Borehamwood WD6 1DL
|
||||
|
|
@ -2928,14 +2928,122 @@ def revised_model():
|
|||
original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
|
||||
original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
|
||||
|
||||
original_archetypes = original_archetypes[
|
||||
["Address ID", "Archetype ID", ""]
|
||||
]
|
||||
|
||||
# Check if we have all of the addresses
|
||||
missed = original_archetypes[
|
||||
~original_archetypes["Address ID"].isin(new_priority_postcodes["Address ID"].values)
|
||||
]["Archetype ID"].unique()
|
||||
|
||||
assert set(missed) == {'NOT PRIORITY POSTCODE', 'IN WAVE 2.1', 'EPC C OR ABOVE'}
|
||||
|
||||
original_archetypes = original_archetypes[
|
||||
["Address ID", "Archetype ID", "Archetype Group Rank"]
|
||||
]
|
||||
|
||||
# Merge these archetypes on to the new priority postcodes
|
||||
new_priority_postcodes = new_priority_postcodes.merge(
|
||||
original_archetypes, how="left", on="Address ID"
|
||||
)
|
||||
|
||||
# Basic check, should have no rows with missing Archetype ID, where
|
||||
assert float(new_priority_postcodes[pd.isnull(new_priority_postcodes["Archetype ID"])]["Address ID"].isin(
|
||||
original_archetypes["Address ID"]
|
||||
).sum()) == 0
|
||||
|
||||
# We pull together the survey data sheet
|
||||
survey_folders = []
|
||||
|
||||
# Loop over each survey folder and list its contents
|
||||
for i in range(1, NUM_FOLDERS + 1):
|
||||
folder_path = os.path.join(CUSTOMER_FOLDER_PATH, f"StonewaterSurveys_{i}")
|
||||
if os.path.isdir(folder_path): # Check if folder exists
|
||||
folder_contents = [os.path.join(f"StonewaterSurveys_{i}", file) for file in os.listdir(folder_path)]
|
||||
survey_folders.extend(folder_contents) # Append contents to the master list
|
||||
|
||||
wave_21_folders = [
|
||||
"1. Herefordshire",
|
||||
"2. Bedfordshire",
|
||||
"3. Wiltshire",
|
||||
"4. Bournemouth",
|
||||
"5. Coventry",
|
||||
"6. West Sussex",
|
||||
"7. Dorset",
|
||||
"8. Cambridgeshire",
|
||||
"9. Guildford",
|
||||
"10. Little Island",
|
||||
"11. CCS Dorset"
|
||||
]
|
||||
|
||||
for wave_2_1_folder in wave_21_folders:
|
||||
folder_path = os.path.join(CUSTOMER_FOLDER_PATH, "Wave 2.1 Surveys", wave_2_1_folder)
|
||||
if os.path.isdir(folder_path): # Check if folder exists
|
||||
folder_contents = [os.path.join("Wave 2.1 Surveys", wave_2_1_folder, file) for file in
|
||||
os.listdir(folder_path)]
|
||||
survey_folders.extend(folder_contents) # Append contents to the master list
|
||||
|
||||
# We now do a large pull of all of the data
|
||||
extracted_data = []
|
||||
for survey_folder in tqdm(survey_folders):
|
||||
survey_folder_path = os.path.join(CUSTOMER_FOLDER_PATH, survey_folder)
|
||||
|
||||
# List the folders inside of the survey folder
|
||||
survey_subfolders = [
|
||||
name for name in os.listdir(survey_folder_path)
|
||||
if os.path.isdir(os.path.join(survey_folder_path, name))
|
||||
]
|
||||
|
||||
# Check if there's a "retrofit assessment" folder
|
||||
retrofit_folder = next((name for name in survey_subfolders if "retrofit assessment" in name.lower()), None)
|
||||
|
||||
ra_folder = next(
|
||||
(name for name in survey_subfolders if "ra coordinator info" in name.lower() or "ra info" in name.lower()),
|
||||
None
|
||||
)
|
||||
|
||||
# If retrofit assessment folder exists, check if it has content
|
||||
if retrofit_folder or ra_folder:
|
||||
if retrofit_folder:
|
||||
retrofit_folder_path = os.path.join(survey_folder_path, retrofit_folder)
|
||||
else:
|
||||
retrofit_folder_path = os.path.join(survey_folder_path, ra_folder)
|
||||
|
||||
# Check if everything inside is a sub-folder and the number of folders is 2
|
||||
items = [item for item in os.listdir(retrofit_folder_path) if item != '.DS_Store']
|
||||
all_folders = [os.path.isdir(os.path.join(retrofit_folder_path, item)) for item in items]
|
||||
if all(all_folders) and len(all_folders) == 2 and "Property Pics" in items:
|
||||
# Get the folder that isn't Property Pics
|
||||
retrofit_folder_path = os.path.join(
|
||||
retrofit_folder_path, [item for item in items if item != "Property Pics"][0]
|
||||
)
|
||||
|
||||
if os.listdir(retrofit_folder_path): # If not empty
|
||||
summary_data = extract_retrofit_pdfs(data_folder_path=retrofit_folder_path)
|
||||
if summary_data:
|
||||
summary_data = {
|
||||
"survey_folder": survey_folder,
|
||||
**summary_data,
|
||||
}
|
||||
extracted_data.append(summary_data)
|
||||
continue
|
||||
else:
|
||||
# Then we have an empty Retrofit Assessment folder
|
||||
continue
|
||||
|
||||
# If no retrofit folder or it was empty, check files in survey_folder
|
||||
summary_data = extract_retrofit_pdfs(data_folder_path=survey_folder_path)
|
||||
if not summary_data:
|
||||
if len(survey_subfolders) == 1:
|
||||
survey_folder_path = os.path.join(survey_folder_path, survey_subfolders[0])
|
||||
summary_data = extract_retrofit_pdfs(data_folder_path=survey_folder_path)
|
||||
|
||||
if summary_data:
|
||||
summary_data = {
|
||||
"survey_folder": survey_folder,
|
||||
**summary_data,
|
||||
}
|
||||
extracted_data.append(summary_data)
|
||||
|
||||
retrofit_assessment_data = pd.DataFrame(extracted_data)
|
||||
# TODO - Save this data
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue