mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
155 lines
6.8 KiB
Python
155 lines
6.8 KiB
Python
import os
|
|
import shutil
|
|
from tqdm import tqdm
|
|
from etl.access_reporting.app import SharePointClient
|
|
|
|
|
|
def delete_large_files():
|
|
"""
|
|
This function deletes photos, designs and other files which we don't need
|
|
:return:
|
|
"""
|
|
|
|
folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys"
|
|
|
|
# List the contents of this folder since in each sub-folder we have the property folders
|
|
contents = os.listdir(folder_path)
|
|
|
|
for subfolder in contents:
|
|
if not os.path.isdir(os.path.join(folder_path, subfolder)):
|
|
continue
|
|
subfolder_path = os.path.join(folder_path, subfolder)
|
|
# List the contents
|
|
property_folders = os.listdir(subfolder_path)
|
|
|
|
for property in tqdm(property_folders):
|
|
# Check if it's a directory
|
|
if not os.path.isdir(os.path.join(subfolder_path, property)):
|
|
continue
|
|
|
|
property_path = os.path.join(subfolder_path, property)
|
|
property_contents = os.listdir(property_path)
|
|
# We delete the contents of the following folders:
|
|
# '1. RA Property Pics'
|
|
# '4. Air Tightness Tests'
|
|
# '5. RD Design Info'
|
|
for folder_to_delete in ["1. RA Property Pics", "4. Air Tightness Tests", "5. RD Design Info",
|
|
"1. RA Property PIcs", "Post EPC Photos", "4. RD Design Info",
|
|
"5. Installer Info", "6. Trustmark lodgement", "7.Post Install Inspection Photos",
|
|
"6. Trustmark Lodgement", "7. Post Inspection Photos"]:
|
|
if folder_to_delete not in property_contents:
|
|
continue
|
|
folder_to_delete_path = os.path.join(property_path, folder_to_delete)
|
|
if os.path.isdir(folder_to_delete_path):
|
|
# Delete the folder, even if it's not empty
|
|
shutil.rmtree(folder_to_delete_path)
|
|
|
|
# We now check the '2. RA Coordinator Info' folder for any .MOV files and delete them
|
|
if "2. RA Coordinator Info" not in property_contents:
|
|
coordinator_folder = "1. RA Coordinator Info"
|
|
else:
|
|
coordinator_folder = "2. RA Coordinator Info"
|
|
coordinator_info_path = os.path.join(property_path, coordinator_folder)
|
|
coordinator_info_contents = os.listdir(coordinator_info_path)
|
|
# Look for .MOV files and .jpg files
|
|
for file in coordinator_info_contents:
|
|
if file.endswith(".MOV"):
|
|
os.remove(os.path.join(coordinator_info_path, file))
|
|
|
|
if file.endswith(".jpg"):
|
|
os.remove(os.path.join(coordinator_info_path, file))
|
|
|
|
if "Property Pics" in coordinator_info_contents:
|
|
# Delete folder and contents
|
|
shutil.rmtree(os.path.join(coordinator_info_path, "Property Pics"))
|
|
|
|
|
|
def download_data_from_sharepoint():
|
|
# Given a sharepoint location, this function will download the retrofit assessment folders from the locations
|
|
# specified in the sharepoint location
|
|
|
|
SHAREPOINT_CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID", None)
|
|
SHAREPOINT_CLIENT_SECRET = os.getenv("SHAREPOINT_CLIENT_SECRET", None)
|
|
SHAREPOINT_TENANT_ID = os.getenv("SHAREPOINT_TENANT_ID", None)
|
|
OSMOSIS_SHAREPOINT_SITE_ID = os.getenv("OSMOSIS_SHAREPOINT_SITE_ID", None)
|
|
|
|
sharepoint_client = SharePointClient(
|
|
tenant_id=SHAREPOINT_TENANT_ID,
|
|
client_id=SHAREPOINT_CLIENT_ID,
|
|
client_secret=SHAREPOINT_CLIENT_SECRET,
|
|
site_id=OSMOSIS_SHAREPOINT_SITE_ID
|
|
)
|
|
|
|
# Retrieve the data from Sharepoint and write to local machine
|
|
contents = sharepoint_client.list_folder_contents(
|
|
drive_id=sharepoint_client.document_drive["id"],
|
|
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
|
|
)
|
|
|
|
folders_to_keep = [
|
|
"1. Herefordshire", "2. Bedfordshire", "3. Wiltshire", "4. Bournemouth",
|
|
"5. Coventry", "6. West Sussex", "7. Dorset", "8. Cambridgeshire",
|
|
"9. Guildford", "10. Little Island", "11. CCS Dorset",
|
|
]
|
|
|
|
folders_to_pull = [
|
|
folder for folder in contents["value"] if folder["name"] in folders_to_keep
|
|
]
|
|
for folder_to_pull in folders_to_pull:
|
|
|
|
# Get the contents
|
|
folder_contents = sharepoint_client.list_folder_contents(
|
|
drive_id=sharepoint_client.document_drive["id"],
|
|
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
|
|
folder_to_pull["name"],
|
|
page_size=100
|
|
)
|
|
|
|
property_folders = [f for f in folder_contents["value"]]
|
|
|
|
for property_folder in property_folders:
|
|
# We go into each property folder and get the contents
|
|
property_folder_contents = sharepoint_client.list_folder_contents(
|
|
drive_id=sharepoint_client.document_drive["id"],
|
|
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
|
|
folder_to_pull["name"] + "/" + property_folder["name"]
|
|
)
|
|
if not property_folder_contents.get("value"):
|
|
continue
|
|
# We look for the retrofit assessment folder or mtp folders:
|
|
property_sub_folders = [
|
|
f for f in property_folder_contents["value"] if
|
|
"ra coordinator info" in f["name"].lower() or
|
|
"retrofit assessment" in f["name"].lower() or
|
|
"ra info" in f["name"].lower() or
|
|
"mtp" in f["name"].lower() or
|
|
"mid-term" in f["name"].lower()
|
|
]
|
|
|
|
if not property_sub_folders:
|
|
continue
|
|
|
|
for property_sub_folder in property_sub_folders:
|
|
# if we have this, we download the folder and store it on my laptop!
|
|
|
|
property_folder_path = os.path.join(
|
|
"Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
|
|
folder_to_pull["name"],
|
|
property_folder["name"],
|
|
property_sub_folder["name"]
|
|
)
|
|
|
|
download_dir = os.path.join(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys - 2",
|
|
folder_to_pull["name"],
|
|
property_folder["name"],
|
|
property_sub_folder["name"]
|
|
)
|
|
|
|
# We download the folder
|
|
sharepoint_client.download_sharepoint_folder(
|
|
drive_id=sharepoint_client.document_drive["id"],
|
|
folder_path=property_folder_path,
|
|
download_dir=download_dir,
|
|
excluded_file_types=["MOV", "jpg"]
|
|
)
|