proof of concept for script to work

This commit is contained in:
Jun-te Kim 2025-04-07 09:55:37 +00:00
parent f25d174289
commit d26383ee25
8 changed files with 81 additions and 45 deletions

View file

@ -6,6 +6,5 @@ POSTGRES_PORT=5432
PGADMIN_DEFAULT_EMAIL=junte@domna.homes
PGADMIN_DEFAULT_PASSWORD=makingwarmhomes
DATABASE_URL=postgresql://postgres:makingwarmhomes@db:5432/postgres
# Prod(dev-aws) Database Don't use!!!!
#DATABASE_URL=postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB

View file

@ -1,9 +1,5 @@
version: '3.8'
networks:
survey-net:
driver: bridge
services:
survey-extractor:
user: "${UID}:${GID}"
@ -45,5 +41,9 @@ services:
networks:
- survey-net
networks:
survey-net:
driver: bridge
volumes:
postgres-data:

View file

@ -1,3 +1,8 @@
import os
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
from pprint import pformat
from etl.pdfReader.pdfReaderToText import pdfReaderToText
@ -8,11 +13,9 @@ from etl.temp.submissionReader import SubmissionReader
import os
from fuzzywuzzy import fuzz
import os
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
def get_band(score):
bands = [
@ -154,12 +157,6 @@ def get_no_of_wet_rooms(str):
count += 1
return count
def get_insulation_type(str):
if "foam" in str.lower():
return "FOAM"
else:
return "GENERAL"
def get_jjc_price_matrix(file="empty.csv"):
df = pd.read_csv(os.path.join(os.path.dirname(os.path.abspath(__file__)), "ratecard", "jjcRateCards", file))
@ -238,7 +235,7 @@ def main():
merged_df = pd.DataFrame()
if survey.csr:
if survey.csr.insulation_info:
insultation = get_insulation_type(survey.csr.insulation_info.type)
insultation = survey.csr.insulation_info.type.upper()
if funding_type == "GBIS":
data.update({"insulation": insultation})
@ -247,18 +244,14 @@ def main():
data.update({"funding": funding_type.upper()})
df = pd.DataFrame([data])
merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
elif "GENERAL" in insultation.upper():
data.update({"funding": funding_type.upper() + " Remedial"})
df = pd.DataFrame([data])
if survey.pre_site_note.survey_information.address == "1 Havergal Walk, STOKE-ON-TRENT, ST3 5ES":
print(f"Insultation is {insultation}" )
merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
elif "FOAM" in insultation.upper():
data.update({"funding": funding_type.upper() + " Remedial"})
df = pd.DataFrame([data])
merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
else:
raise RuntimeError(f"unknonw insulation in gbis: {insultation}")
data.update({"funding": funding_type.upper() + " Remedial"})
df = pd.DataFrame([data])
merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
elif funding_type == "ECO4":
data.update({"insulation": insultation})
if insultation is None:
@ -266,18 +259,17 @@ def main():
data.update({"funding": formatted_funding_type})
df = pd.DataFrame([data])
merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
elif "GENERAL" in insultation.upper():
formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(get_sap_number(filtered_df["POST INSTALL SAP SCORE"].values[0]))}"
data.update({"funding": formatted_funding_type})
df = pd.DataFrame([data])
merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
elif "FOAM" in insultation.upper():
formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(get_sap_number(filtered_df["POST INSTALL SAP SCORE"].values[0]))}"
data.update({"funding": formatted_funding_type})
df = pd.DataFrame([data])
merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
else:
raise RuntimeError(f"unknonw insultation in eco4: {insultation}")
formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(get_sap_number(filtered_df["POST INSTALL SAP SCORE"].values[0]))}"
data.update({"funding": formatted_funding_type})
df = pd.DataFrame([data])
merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
else:
raise RuntimeError(f"UNKNOWN FUNDING TYPE {funding_type}")

View file

@ -1,18 +1,16 @@
# Make a a folder in a new directory from a string
# Great time to work out asynchrnous functionality if i have time
import os
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
from etl.scraper.scraper import SharePointInstaller
from etl.scraper.scraper import SharePointScraper
jjc = SharePointScraper(SharePointInstaller.JJC)
a = SharePointScraper(SharePointInstaller.JJC)
print(jjc.does_folder_exists_at("junte was here", "Tom Miller (JJ CRUMP)"))
data = jjc.create_file("junte was here", "Tom Miller (JJ CRUMP)")
from etl.pdfReader.pdfReaderToText import pdfReaderToText
a = pdfReaderToText("/tmp/sharepoint/Horace Maxwell (JJ CRUMP)/W.C. 17.03.2025/Housing association missing/1 Havergal Walk/1 Havergal.pdf")
b = a.get_reader()
b.insulation_info
b.
data["webUrl"]

View file

@ -51,7 +51,7 @@ class CSR(SiteNotesExtractor):
def get_materials(self):
lst = self.get_data_between("Detailed description of existing Cavity Wall Insulation ", "Detailed description of Defects in existing Cavity Wall Insulation")
if len(lst) > 2:
self.insulation_info = lst[-1]
self.insulation_info = Insulation(type=lst[-1])
else:
dict_ = self.two_columns_processor(lst, ["Detailed description of existing Cavity Wall Insulation "], ["Detailed description of Defects in existing Cavity Wall Insulation"])

View file

@ -42,7 +42,7 @@ class SharePointScraper():
assert self.sharepoint_client_id is not None, "Please assign SHAREPOINT_CLIENT_ID env variable"
assert self.sharepoint_client_secret is not None, "Please assign SHAREPOINT_CLIENT_SECRET env variable"
assert self.sharepoint_tenant_id is not None, "Please assign SHAREPOINT_TENANT_ID env variable"
assert self.sharepoint_drive is not None, "Please set sharepoint driver id env variable. See SharePointInstaller for more information"
assert self.sharepoint_drive.value is not None, "Please set sharepoint driver id env variable. See SharePointInstaller for more information"
self.surveyor_names = []
@ -105,6 +105,28 @@ class SharePointScraper():
self.surveyor_names = new_list
def does_folder_exists_at(self, file_name, file_path):
for folders in self.get_folders_in_path(file_path)['value']:
if file_name.upper() in folders["name"].upper():
return True
return False
def create_file(self, file_name, at_path="/"):
sharepoint_client = SharePointClient(
tenant_id=self.sharepoint_tenant_id,
client_id=self.sharepoint_client_id,
client_secret=self.sharepoint_client_secret,
site_id=self.sharepoint_drive.value,
)
if self.does_folder_exists_at(file_name, at_path) is False:
return sharepoint_client.create_folder(file_name, at_path)
@ensure_surveyor_names_loaded
def get_surveryor_names(self):
return self.surveyor_names

View file

@ -17,4 +17,4 @@ def local_survey(pre_site_note_path):
return surveyedDataProcessor("122 Fake Street", [pre_site_note_path])
def test_material(local_survey):
assert local_survey.csr.insulation_info == "Foam"
assert local_survey.csr.insulation_info.type == "Foam"

View file

@ -15,6 +15,7 @@ from etl.utils.logger import Logger
import tempfile
import os
# Api Documentation: https://learn.microsoft.com/en-us/graph/api/drive-get?view=graph-rest-1.0&tabs=http
logger = Logger(name="SharePoint.py", level=logging.DEBUG).get_logger()
@ -88,7 +89,7 @@ def api_call_decorator(func):
response = requests.request(http_method, url, headers=self.headers, json=data)
# Handle the response
if response.status_code == 200:
if response.status_code == 200 or response.status_code == 201:
response_json = response.json() # Store the response JSON
if page_size:
results.extend(response_json.get('value', []))
@ -240,6 +241,8 @@ class SharePointClient:
@api_call_decorator
def list_folder_contents(self, folder_path: str, page_size: int = 100):
"""
GET drive/root/children
This function will list the contents of a folder in SharePoint.
:param drive_id: The ID of the drive.
:param folder_path: The path of the folder.
@ -250,6 +253,28 @@ class SharePointClient:
logger.debug(f"Listing folder contents from URL: {url}")
return 'GET', url, None
@api_call_decorator
def create_folder(self, file_name, folder_path):
"""
POST https://graph.microsoft.com/v1.0/me/drive/root/children
Content-Type: application/json
{
"name": "New Folder",
"folder": { },
"@microsoft.graph.conflictBehavior": "rename"
}
"""
data = {
"name": file_name,
"folder": { },
"@microsoft.graph.conflictBehavior": "rename"
}
url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{folder_path}:/children"
return 'POST', url, data
@staticmethod
def download_sharepoint_file(download_url):
"""