From d26383ee259a3053fd307347b24bb467cbf39dc6 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 7 Apr 2025 09:55:37 +0000 Subject: [PATCH] proof of concept for script to work --- .db-env | 1 - .devcontainer/docker-compose.yml | 8 ++-- etl/jjc_invoice.py | 42 ++++++++----------- ...osmosis_monday_to_sharepoint_automation.py | 20 ++++----- etl/pdfReader/sitenotes.py | 2 +- etl/scraper/scraper.py | 24 ++++++++++- etl/tests/test_csr_class.py | 2 +- etl/utils/sharepoint/sharepoint.py | 27 +++++++++++- 8 files changed, 81 insertions(+), 45 deletions(-) diff --git a/.db-env b/.db-env index 04f24d5..9174c1b 100644 --- a/.db-env +++ b/.db-env @@ -6,6 +6,5 @@ POSTGRES_PORT=5432 PGADMIN_DEFAULT_EMAIL=junte@domna.homes PGADMIN_DEFAULT_PASSWORD=makingwarmhomes DATABASE_URL=postgresql://postgres:makingwarmhomes@db:5432/postgres - # Prod(dev-aws) Database Don't use!!!! #DATABASE_URL=postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB \ No newline at end of file diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 1b5b5f0..df4eccc 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -1,9 +1,5 @@ version: '3.8' -networks: - survey-net: - driver: bridge - services: survey-extractor: user: "${UID}:${GID}" @@ -45,5 +41,9 @@ services: networks: - survey-net +networks: + survey-net: + driver: bridge + volumes: postgres-data: diff --git a/etl/jjc_invoice.py b/etl/jjc_invoice.py index 98200e0..9c14720 100644 --- a/etl/jjc_invoice.py +++ b/etl/jjc_invoice.py @@ -1,3 +1,8 @@ +import os +os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf" +os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E" +os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f" +os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284" from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING from pprint import pformat from etl.pdfReader.pdfReaderToText import pdfReaderToText @@ -8,11 +13,9 @@ from etl.temp.submissionReader import SubmissionReader import os from fuzzywuzzy import fuzz import os -os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf" -os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E" -os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f" -os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3" -os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284" + + + def get_band(score): bands = [ @@ -154,12 +157,6 @@ def get_no_of_wet_rooms(str): count += 1 return count -def get_insulation_type(str): - if "foam" in str.lower(): - return "FOAM" - else: - return "GENERAL" - def get_jjc_price_matrix(file="empty.csv"): df = pd.read_csv(os.path.join(os.path.dirname(os.path.abspath(__file__)), "ratecard", "jjcRateCards", file)) @@ -238,7 +235,7 @@ def main(): merged_df = pd.DataFrame() if survey.csr: if survey.csr.insulation_info: - insultation = get_insulation_type(survey.csr.insulation_info.type) + insultation = survey.csr.insulation_info.type.upper() if funding_type == "GBIS": data.update({"insulation": insultation}) @@ -247,18 +244,14 @@ def main(): data.update({"funding": funding_type.upper()}) df = pd.DataFrame([data]) merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') - elif "GENERAL" in insultation.upper(): - data.update({"funding": funding_type.upper() + " Remedial"}) - df = pd.DataFrame([data]) - if survey.pre_site_note.survey_information.address == "1 Havergal Walk, STOKE-ON-TRENT, ST3 5ES": - print(f"Insultation is {insultation}" ) - merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') elif "FOAM" in insultation.upper(): data.update({"funding": funding_type.upper() + " Remedial"}) df = pd.DataFrame([data]) merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') else: - raise RuntimeError(f"unknonw insulation in gbis: {insultation}") + data.update({"funding": funding_type.upper() + " Remedial"}) + df = pd.DataFrame([data]) + merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') elif funding_type == "ECO4": data.update({"insulation": insultation}) if insultation is None: @@ -266,18 +259,17 @@ def main(): data.update({"funding": formatted_funding_type}) df = pd.DataFrame([data]) merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') - elif "GENERAL" in insultation.upper(): - formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(get_sap_number(filtered_df["POST INSTALL SAP SCORE"].values[0]))}" - data.update({"funding": formatted_funding_type}) - df = pd.DataFrame([data]) - merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') + elif "FOAM" in insultation.upper(): formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(get_sap_number(filtered_df["POST INSTALL SAP SCORE"].values[0]))}" data.update({"funding": formatted_funding_type}) df = pd.DataFrame([data]) merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') else: - raise RuntimeError(f"unknonw insultation in eco4: {insultation}") + formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(get_sap_number(filtered_df["POST INSTALL SAP SCORE"].values[0]))}" + data.update({"funding": formatted_funding_type}) + df = pd.DataFrame([data]) + merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') else: raise RuntimeError(f"UNKNOWN FUNDING TYPE {funding_type}") diff --git a/etl/osmosis_monday_to_sharepoint_automation.py b/etl/osmosis_monday_to_sharepoint_automation.py index 2e7a423..70294d7 100644 --- a/etl/osmosis_monday_to_sharepoint_automation.py +++ b/etl/osmosis_monday_to_sharepoint_automation.py @@ -1,18 +1,16 @@ -# Make a a folder in a new directory from a string -# Great time to work out asynchrnous functionality if i have time +import os +os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf" +os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E" +os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f" +os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284" from etl.scraper.scraper import SharePointInstaller from etl.scraper.scraper import SharePointScraper +jjc = SharePointScraper(SharePointInstaller.JJC) -a = SharePointScraper(SharePointInstaller.JJC) +print(jjc.does_folder_exists_at("junte was here", "Tom Miller (JJ CRUMP)")) +data = jjc.create_file("junte was here", "Tom Miller (JJ CRUMP)") - - - -from etl.pdfReader.pdfReaderToText import pdfReaderToText -a = pdfReaderToText("/tmp/sharepoint/Horace Maxwell (JJ CRUMP)/W.C. 17.03.2025/Housing association missing/1 Havergal Walk/1 Havergal.pdf") -b = a.get_reader() -b.insulation_info -b. +data["webUrl"] \ No newline at end of file diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index 9f7b384..ce9ed0f 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -51,7 +51,7 @@ class CSR(SiteNotesExtractor): def get_materials(self): lst = self.get_data_between("Detailed description of existing Cavity Wall Insulation ", "Detailed description of Defects in existing Cavity Wall Insulation") if len(lst) > 2: - self.insulation_info = lst[-1] + self.insulation_info = Insulation(type=lst[-1]) else: dict_ = self.two_columns_processor(lst, ["Detailed description of existing Cavity Wall Insulation "], ["Detailed description of Defects in existing Cavity Wall Insulation"]) diff --git a/etl/scraper/scraper.py b/etl/scraper/scraper.py index 3c2b020..8651042 100644 --- a/etl/scraper/scraper.py +++ b/etl/scraper/scraper.py @@ -42,7 +42,7 @@ class SharePointScraper(): assert self.sharepoint_client_id is not None, "Please assign SHAREPOINT_CLIENT_ID env variable" assert self.sharepoint_client_secret is not None, "Please assign SHAREPOINT_CLIENT_SECRET env variable" assert self.sharepoint_tenant_id is not None, "Please assign SHAREPOINT_TENANT_ID env variable" - assert self.sharepoint_drive is not None, "Please set sharepoint driver id env variable. See SharePointInstaller for more information" + assert self.sharepoint_drive.value is not None, "Please set sharepoint driver id env variable. See SharePointInstaller for more information" self.surveyor_names = [] @@ -105,6 +105,28 @@ class SharePointScraper(): self.surveyor_names = new_list + + def does_folder_exists_at(self, file_name, file_path): + for folders in self.get_folders_in_path(file_path)['value']: + if file_name.upper() in folders["name"].upper(): + return True + + return False + + + def create_file(self, file_name, at_path="/"): + + sharepoint_client = SharePointClient( + tenant_id=self.sharepoint_tenant_id, + client_id=self.sharepoint_client_id, + client_secret=self.sharepoint_client_secret, + site_id=self.sharepoint_drive.value, + ) + if self.does_folder_exists_at(file_name, at_path) is False: + return sharepoint_client.create_folder(file_name, at_path) + + + @ensure_surveyor_names_loaded def get_surveryor_names(self): return self.surveyor_names diff --git a/etl/tests/test_csr_class.py b/etl/tests/test_csr_class.py index ddd1a73..db1524a 100644 --- a/etl/tests/test_csr_class.py +++ b/etl/tests/test_csr_class.py @@ -17,4 +17,4 @@ def local_survey(pre_site_note_path): return surveyedDataProcessor("122 Fake Street", [pre_site_note_path]) def test_material(local_survey): - assert local_survey.csr.insulation_info == "Foam" + assert local_survey.csr.insulation_info.type == "Foam" diff --git a/etl/utils/sharepoint/sharepoint.py b/etl/utils/sharepoint/sharepoint.py index 1d1de17..a366161 100644 --- a/etl/utils/sharepoint/sharepoint.py +++ b/etl/utils/sharepoint/sharepoint.py @@ -15,6 +15,7 @@ from etl.utils.logger import Logger import tempfile import os +# Api Documentation: https://learn.microsoft.com/en-us/graph/api/drive-get?view=graph-rest-1.0&tabs=http logger = Logger(name="SharePoint.py", level=logging.DEBUG).get_logger() @@ -88,7 +89,7 @@ def api_call_decorator(func): response = requests.request(http_method, url, headers=self.headers, json=data) # Handle the response - if response.status_code == 200: + if response.status_code == 200 or response.status_code == 201: response_json = response.json() # Store the response JSON if page_size: results.extend(response_json.get('value', [])) @@ -240,6 +241,8 @@ class SharePointClient: @api_call_decorator def list_folder_contents(self, folder_path: str, page_size: int = 100): """ + GET drive/root/children + This function will list the contents of a folder in SharePoint. :param drive_id: The ID of the drive. :param folder_path: The path of the folder. @@ -250,6 +253,28 @@ class SharePointClient: logger.debug(f"Listing folder contents from URL: {url}") return 'GET', url, None + + @api_call_decorator + def create_folder(self, file_name, folder_path): + """ + POST https://graph.microsoft.com/v1.0/me/drive/root/children + Content-Type: application/json + { + "name": "New Folder", + "folder": { }, + "@microsoft.graph.conflictBehavior": "rename" + } + + """ + data = { + "name": file_name, + "folder": { }, + "@microsoft.graph.conflictBehavior": "rename" + } + url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{folder_path}:/children" + + return 'POST', url, data + @staticmethod def download_sharepoint_file(download_url): """