import os import shutil from tqdm import tqdm from etl.access_reporting.app import SharePointClient def delete_large_files(): """ This function deletes photos, designs and other files which we don't need :return: """ folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys" # List the contents of this folder since in each sub-folder we have the property folders contents = os.listdir(folder_path) for subfolder in contents: if not os.path.isdir(os.path.join(folder_path, subfolder)): continue subfolder_path = os.path.join(folder_path, subfolder) # List the contents property_folders = os.listdir(subfolder_path) for property in tqdm(property_folders): # Check if it's a directory if not os.path.isdir(os.path.join(subfolder_path, property)): continue property_path = os.path.join(subfolder_path, property) property_contents = os.listdir(property_path) # We delete the contents of the following folders: # '1. RA Property Pics' # '4. Air Tightness Tests' # '5. RD Design Info' for folder_to_delete in ["1. RA Property Pics", "4. Air Tightness Tests", "5. RD Design Info", "1. RA Property PIcs", "Post EPC Photos", "4. RD Design Info", "5. Installer Info", "6. Trustmark lodgement", "7.Post Install Inspection Photos", "6. Trustmark Lodgement", "7. Post Inspection Photos"]: if folder_to_delete not in property_contents: continue folder_to_delete_path = os.path.join(property_path, folder_to_delete) if os.path.isdir(folder_to_delete_path): # Delete the folder, even if it's not empty shutil.rmtree(folder_to_delete_path) # We now check the '2. RA Coordinator Info' folder for any .MOV files and delete them if "2. RA Coordinator Info" not in property_contents: coordinator_folder = "1. RA Coordinator Info" else: coordinator_folder = "2. RA Coordinator Info" coordinator_info_path = os.path.join(property_path, coordinator_folder) coordinator_info_contents = os.listdir(coordinator_info_path) # Look for .MOV files and .jpg files for file in coordinator_info_contents: if file.endswith(".MOV"): os.remove(os.path.join(coordinator_info_path, file)) if file.endswith(".jpg"): os.remove(os.path.join(coordinator_info_path, file)) if "Property Pics" in coordinator_info_contents: # Delete folder and contents shutil.rmtree(os.path.join(coordinator_info_path, "Property Pics")) def download_data_from_sharepoint(): # Given a sharepoint location, this function will download the retrofit assessment folders from the locations # specified in the sharepoint location SHAREPOINT_CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID", None) SHAREPOINT_CLIENT_SECRET = os.getenv("SHAREPOINT_CLIENT_SECRET", None) SHAREPOINT_TENANT_ID = os.getenv("SHAREPOINT_TENANT_ID", None) OSMOSIS_SHAREPOINT_SITE_ID = os.getenv("OSMOSIS_SHAREPOINT_SITE_ID", None) sharepoint_client = SharePointClient( tenant_id=SHAREPOINT_TENANT_ID, client_id=SHAREPOINT_CLIENT_ID, client_secret=SHAREPOINT_CLIENT_SECRET, site_id=OSMOSIS_SHAREPOINT_SITE_ID ) # Retrieve the data from Sharepoint and write to local machine contents = sharepoint_client.list_folder_contents( drive_id=sharepoint_client.document_drive["id"], folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" ) folders_to_keep = [ "1. Herefordshire", "2. Bedfordshire", "3. Wiltshire", "4. Bournemouth", "5. Coventry", "6. West Sussex", "7. Dorset", "8. Cambridgeshire", "9. Guildford", "10. Little Island", "11. CCS Dorset", ] folders_to_pull = [ folder for folder in contents["value"] if folder["name"] in folders_to_keep ] for folder_to_pull in folders_to_pull: # Get the contents folder_contents = sharepoint_client.list_folder_contents( drive_id=sharepoint_client.document_drive["id"], folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" + folder_to_pull["name"], page_size=100 ) property_folders = [f for f in folder_contents["value"]] for property_folder in property_folders: # We go into each property folder and get the contents property_folder_contents = sharepoint_client.list_folder_contents( drive_id=sharepoint_client.document_drive["id"], folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" + folder_to_pull["name"] + "/" + property_folder["name"] ) if not property_folder_contents.get("value"): continue # We look for the retrofit assessment folder or mtp folders: property_sub_folders = [ f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower() or "retrofit assessment" in f["name"].lower() or "ra info" in f["name"].lower() or "mtp" in f["name"].lower() or "mid-term" in f["name"].lower() ] if not property_sub_folders: continue for property_sub_folder in property_sub_folders: # if we have this, we download the folder and store it on my laptop! property_folder_path = os.path.join( "Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders", folder_to_pull["name"], property_folder["name"], property_sub_folder["name"] ) download_dir = os.path.join( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys - 2", folder_to_pull["name"], property_folder["name"], property_sub_folder["name"] ) # We download the folder sharepoint_client.download_sharepoint_folder( drive_id=sharepoint_client.document_drive["id"], folder_path=property_folder_path, download_dir=download_dir, excluded_file_types=["MOV", "jpg"] )