mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
proof of concept for script to work
This commit is contained in:
parent
f25d174289
commit
d26383ee25
8 changed files with 81 additions and 45 deletions
1
.db-env
1
.db-env
|
|
@ -6,6 +6,5 @@ POSTGRES_PORT=5432
|
|||
PGADMIN_DEFAULT_EMAIL=junte@domna.homes
|
||||
PGADMIN_DEFAULT_PASSWORD=makingwarmhomes
|
||||
DATABASE_URL=postgresql://postgres:makingwarmhomes@db:5432/postgres
|
||||
|
||||
# Prod(dev-aws) Database Don't use!!!!
|
||||
#DATABASE_URL=postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB
|
||||
|
|
@ -1,9 +1,5 @@
|
|||
version: '3.8'
|
||||
|
||||
networks:
|
||||
survey-net:
|
||||
driver: bridge
|
||||
|
||||
services:
|
||||
survey-extractor:
|
||||
user: "${UID}:${GID}"
|
||||
|
|
@ -45,5 +41,9 @@ services:
|
|||
networks:
|
||||
- survey-net
|
||||
|
||||
networks:
|
||||
survey-net:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
|
|
|
|||
|
|
@ -1,3 +1,8 @@
|
|||
import os
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
|
||||
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
|
||||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
|
||||
from pprint import pformat
|
||||
from etl.pdfReader.pdfReaderToText import pdfReaderToText
|
||||
|
|
@ -8,11 +13,9 @@ from etl.temp.submissionReader import SubmissionReader
|
|||
import os
|
||||
from fuzzywuzzy import fuzz
|
||||
import os
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
|
||||
os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
|
||||
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
|
||||
|
||||
|
||||
|
||||
|
||||
def get_band(score):
|
||||
bands = [
|
||||
|
|
@ -154,12 +157,6 @@ def get_no_of_wet_rooms(str):
|
|||
count += 1
|
||||
return count
|
||||
|
||||
def get_insulation_type(str):
|
||||
if "foam" in str.lower():
|
||||
return "FOAM"
|
||||
else:
|
||||
return "GENERAL"
|
||||
|
||||
|
||||
def get_jjc_price_matrix(file="empty.csv"):
|
||||
df = pd.read_csv(os.path.join(os.path.dirname(os.path.abspath(__file__)), "ratecard", "jjcRateCards", file))
|
||||
|
|
@ -238,7 +235,7 @@ def main():
|
|||
merged_df = pd.DataFrame()
|
||||
if survey.csr:
|
||||
if survey.csr.insulation_info:
|
||||
insultation = get_insulation_type(survey.csr.insulation_info.type)
|
||||
insultation = survey.csr.insulation_info.type.upper()
|
||||
|
||||
if funding_type == "GBIS":
|
||||
data.update({"insulation": insultation})
|
||||
|
|
@ -247,18 +244,14 @@ def main():
|
|||
data.update({"funding": funding_type.upper()})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
elif "GENERAL" in insultation.upper():
|
||||
data.update({"funding": funding_type.upper() + " Remedial"})
|
||||
df = pd.DataFrame([data])
|
||||
if survey.pre_site_note.survey_information.address == "1 Havergal Walk, STOKE-ON-TRENT, ST3 5ES":
|
||||
print(f"Insultation is {insultation}" )
|
||||
merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
elif "FOAM" in insultation.upper():
|
||||
data.update({"funding": funding_type.upper() + " Remedial"})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
else:
|
||||
raise RuntimeError(f"unknonw insulation in gbis: {insultation}")
|
||||
data.update({"funding": funding_type.upper() + " Remedial"})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
elif funding_type == "ECO4":
|
||||
data.update({"insulation": insultation})
|
||||
if insultation is None:
|
||||
|
|
@ -266,18 +259,17 @@ def main():
|
|||
data.update({"funding": formatted_funding_type})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
elif "GENERAL" in insultation.upper():
|
||||
formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(get_sap_number(filtered_df["POST INSTALL SAP SCORE"].values[0]))}"
|
||||
data.update({"funding": formatted_funding_type})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
|
||||
elif "FOAM" in insultation.upper():
|
||||
formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(get_sap_number(filtered_df["POST INSTALL SAP SCORE"].values[0]))}"
|
||||
data.update({"funding": formatted_funding_type})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
else:
|
||||
raise RuntimeError(f"unknonw insultation in eco4: {insultation}")
|
||||
formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(get_sap_number(filtered_df["POST INSTALL SAP SCORE"].values[0]))}"
|
||||
data.update({"funding": formatted_funding_type})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
else:
|
||||
raise RuntimeError(f"UNKNOWN FUNDING TYPE {funding_type}")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,18 +1,16 @@
|
|||
# Make a a folder in a new directory from a string
|
||||
# Great time to work out asynchrnous functionality if i have time
|
||||
import os
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
|
||||
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
|
||||
from etl.scraper.scraper import SharePointInstaller
|
||||
from etl.scraper.scraper import SharePointScraper
|
||||
|
||||
|
||||
jjc = SharePointScraper(SharePointInstaller.JJC)
|
||||
|
||||
|
||||
a = SharePointScraper(SharePointInstaller.JJC)
|
||||
print(jjc.does_folder_exists_at("junte was here", "Tom Miller (JJ CRUMP)"))
|
||||
data = jjc.create_file("junte was here", "Tom Miller (JJ CRUMP)")
|
||||
|
||||
|
||||
|
||||
|
||||
from etl.pdfReader.pdfReaderToText import pdfReaderToText
|
||||
a = pdfReaderToText("/tmp/sharepoint/Horace Maxwell (JJ CRUMP)/W.C. 17.03.2025/Housing association missing/1 Havergal Walk/1 Havergal.pdf")
|
||||
b = a.get_reader()
|
||||
b.insulation_info
|
||||
b.
|
||||
data["webUrl"]
|
||||
|
|
@ -51,7 +51,7 @@ class CSR(SiteNotesExtractor):
|
|||
def get_materials(self):
|
||||
lst = self.get_data_between("Detailed description of existing Cavity Wall Insulation ", "Detailed description of Defects in existing Cavity Wall Insulation")
|
||||
if len(lst) > 2:
|
||||
self.insulation_info = lst[-1]
|
||||
self.insulation_info = Insulation(type=lst[-1])
|
||||
else:
|
||||
dict_ = self.two_columns_processor(lst, ["Detailed description of existing Cavity Wall Insulation "], ["Detailed description of Defects in existing Cavity Wall Insulation"])
|
||||
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ class SharePointScraper():
|
|||
assert self.sharepoint_client_id is not None, "Please assign SHAREPOINT_CLIENT_ID env variable"
|
||||
assert self.sharepoint_client_secret is not None, "Please assign SHAREPOINT_CLIENT_SECRET env variable"
|
||||
assert self.sharepoint_tenant_id is not None, "Please assign SHAREPOINT_TENANT_ID env variable"
|
||||
assert self.sharepoint_drive is not None, "Please set sharepoint driver id env variable. See SharePointInstaller for more information"
|
||||
assert self.sharepoint_drive.value is not None, "Please set sharepoint driver id env variable. See SharePointInstaller for more information"
|
||||
|
||||
self.surveyor_names = []
|
||||
|
||||
|
|
@ -105,6 +105,28 @@ class SharePointScraper():
|
|||
|
||||
self.surveyor_names = new_list
|
||||
|
||||
|
||||
def does_folder_exists_at(self, file_name, file_path):
|
||||
for folders in self.get_folders_in_path(file_path)['value']:
|
||||
if file_name.upper() in folders["name"].upper():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def create_file(self, file_name, at_path="/"):
|
||||
|
||||
sharepoint_client = SharePointClient(
|
||||
tenant_id=self.sharepoint_tenant_id,
|
||||
client_id=self.sharepoint_client_id,
|
||||
client_secret=self.sharepoint_client_secret,
|
||||
site_id=self.sharepoint_drive.value,
|
||||
)
|
||||
if self.does_folder_exists_at(file_name, at_path) is False:
|
||||
return sharepoint_client.create_folder(file_name, at_path)
|
||||
|
||||
|
||||
|
||||
@ensure_surveyor_names_loaded
|
||||
def get_surveryor_names(self):
|
||||
return self.surveyor_names
|
||||
|
|
|
|||
|
|
@ -17,4 +17,4 @@ def local_survey(pre_site_note_path):
|
|||
return surveyedDataProcessor("122 Fake Street", [pre_site_note_path])
|
||||
|
||||
def test_material(local_survey):
|
||||
assert local_survey.csr.insulation_info == "Foam"
|
||||
assert local_survey.csr.insulation_info.type == "Foam"
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ from etl.utils.logger import Logger
|
|||
import tempfile
|
||||
import os
|
||||
|
||||
# Api Documentation: https://learn.microsoft.com/en-us/graph/api/drive-get?view=graph-rest-1.0&tabs=http
|
||||
|
||||
logger = Logger(name="SharePoint.py", level=logging.DEBUG).get_logger()
|
||||
|
||||
|
|
@ -88,7 +89,7 @@ def api_call_decorator(func):
|
|||
response = requests.request(http_method, url, headers=self.headers, json=data)
|
||||
|
||||
# Handle the response
|
||||
if response.status_code == 200:
|
||||
if response.status_code == 200 or response.status_code == 201:
|
||||
response_json = response.json() # Store the response JSON
|
||||
if page_size:
|
||||
results.extend(response_json.get('value', []))
|
||||
|
|
@ -240,6 +241,8 @@ class SharePointClient:
|
|||
@api_call_decorator
|
||||
def list_folder_contents(self, folder_path: str, page_size: int = 100):
|
||||
"""
|
||||
GET drive/root/children
|
||||
|
||||
This function will list the contents of a folder in SharePoint.
|
||||
:param drive_id: The ID of the drive.
|
||||
:param folder_path: The path of the folder.
|
||||
|
|
@ -250,6 +253,28 @@ class SharePointClient:
|
|||
logger.debug(f"Listing folder contents from URL: {url}")
|
||||
return 'GET', url, None
|
||||
|
||||
|
||||
@api_call_decorator
|
||||
def create_folder(self, file_name, folder_path):
|
||||
"""
|
||||
POST https://graph.microsoft.com/v1.0/me/drive/root/children
|
||||
Content-Type: application/json
|
||||
{
|
||||
"name": "New Folder",
|
||||
"folder": { },
|
||||
"@microsoft.graph.conflictBehavior": "rename"
|
||||
}
|
||||
|
||||
"""
|
||||
data = {
|
||||
"name": file_name,
|
||||
"folder": { },
|
||||
"@microsoft.graph.conflictBehavior": "rename"
|
||||
}
|
||||
url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{folder_path}:/children"
|
||||
|
||||
return 'POST', url, data
|
||||
|
||||
@staticmethod
|
||||
def download_sharepoint_file(download_url):
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue