mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
changes made
This commit is contained in:
parent
a2c4cfedbe
commit
7b07edf8c4
4 changed files with 167 additions and 86 deletions
80
etl/filechecker.py
Normal file
80
etl/filechecker.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
import os
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
|
||||
os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
|
||||
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
|
||||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
|
||||
import pandas as pd
|
||||
import hashlib
|
||||
|
||||
def calculate_sha256(bytes_io):
|
||||
bytes_io.seek(0) # Make sure we're at the start
|
||||
data = bytes_io.read()
|
||||
return hashlib.sha256(data).hexdigest()
|
||||
|
||||
south_coast_scraper = SharePointScraper(SharePointInstaller.JJC)
|
||||
|
||||
|
||||
folders = south_coast_scraper.get_folders_in_path('/')
|
||||
|
||||
|
||||
list_of_file_names = []
|
||||
for folder in folders['value']:
|
||||
if "Khalim" in folder["name"]:
|
||||
continue
|
||||
elif ".Training" in folder["name"]:
|
||||
continue
|
||||
if 'file' not in folder:
|
||||
list_of_file_names.append("/" + folder["name"])
|
||||
|
||||
list_of_dates = []
|
||||
for folder in list_of_file_names:
|
||||
dates = south_coast_scraper.get_folders_in_path(folder)
|
||||
for date in dates['value']:
|
||||
if 'file' not in date:
|
||||
list_of_dates.append(folder + "/" + date["name"])
|
||||
|
||||
print(list_of_dates)
|
||||
|
||||
list_of_housing_associations = []
|
||||
for folder in list_of_dates:
|
||||
house_ass = south_coast_scraper.get_folders_in_path(folder)
|
||||
for house in house_ass['value']:
|
||||
if 'file' not in house:
|
||||
list_of_housing_associations.append(folder + "/" + house["name"])
|
||||
|
||||
list_of_address = []
|
||||
|
||||
for folder in list_of_housing_associations:
|
||||
address = south_coast_scraper.get_folders_in_path(folder)
|
||||
for add in address['value']:
|
||||
if 'file' not in add:
|
||||
list_of_address.append(folder + "/" + add['name'])
|
||||
|
||||
list_of_pictures = []
|
||||
|
||||
for folder in list_of_address:
|
||||
pictures = south_coast_scraper.get_folders_in_path(folder)
|
||||
for pic in pictures['value']:
|
||||
if 'file' not in pic:
|
||||
list_of_pictures.append(folder + "/" + pic['name'])
|
||||
|
||||
print(list_of_pictures)
|
||||
|
||||
final_list = []
|
||||
for files in list_of_pictures:
|
||||
content = south_coast_scraper.get_folders_in_path(files)
|
||||
for file in content['value']:
|
||||
if 'file' in file:
|
||||
url = file['@microsoft.graph.downloadUrl']
|
||||
sha256 = calculate_sha256(south_coast_scraper.get_file_content(url))
|
||||
final_list.append({
|
||||
"Directories": files,
|
||||
"Photo Name": file['name'],
|
||||
"sha256": sha256,
|
||||
})
|
||||
|
||||
final_df = pd.DataFrame(final_list)
|
||||
|
||||
final_df.to_csv("jjc.csv")
|
||||
|
|
@ -21,7 +21,6 @@ class HubSpotClient():
|
|||
found_deals = []
|
||||
after = None
|
||||
while True:
|
||||
print("hello world")
|
||||
search_request = PublicObjectSearchRequest(
|
||||
filter_groups=[{
|
||||
"filters": [{
|
||||
|
|
@ -36,7 +35,8 @@ class HubSpotClient():
|
|||
"work_type",
|
||||
"property_needs_trickle_vents",
|
||||
"domna_survey_post_sap",
|
||||
"existing_wall_insulation"
|
||||
"existing_wall_insulation",
|
||||
"installer",
|
||||
],
|
||||
limit=200,
|
||||
after=after,
|
||||
|
|
@ -46,7 +46,6 @@ class HubSpotClient():
|
|||
if not response.paging or not response.paging.next:
|
||||
break
|
||||
after = response.paging.next.after
|
||||
return found_deals
|
||||
|
||||
all_deals = []
|
||||
if hasattr(found_deals, "results"):
|
||||
|
|
@ -58,7 +57,8 @@ class HubSpotClient():
|
|||
needs_trickle_ventilation=True if deal.properties["property_needs_trickle_vents"].upper() == "YES" else False,
|
||||
post_sap_score=int(deal.properties["domna_survey_post_sap"]),
|
||||
existing_wall_insulation=deal.properties["existing_wall_insulation"],
|
||||
no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"])
|
||||
no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]),
|
||||
installer=deal.properties["Installer"],
|
||||
))
|
||||
return all_deals
|
||||
else:
|
||||
|
|
@ -70,4 +70,4 @@ class HubSpotClient():
|
|||
print(f"Pipeline: {pipeline.label}")
|
||||
for stage in pipeline.stages:
|
||||
print(f" - Label: {stage.label}")
|
||||
print(f" ID: {stage.id}") #
|
||||
print(f" ID: {stage.id}") #
|
||||
|
|
|
|||
|
|
@ -17,4 +17,5 @@ class SubmissionInfoFromDeal(BaseModel):
|
|||
needs_trickle_ventilation: bool
|
||||
post_sap_score: int
|
||||
existing_wall_insulation: str
|
||||
no_of_wet_rooms: int
|
||||
no_of_wet_rooms: int
|
||||
installer: str
|
||||
|
|
@ -13,7 +13,7 @@ os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
|
|||
hubSpotClient = HubSpotClient()
|
||||
|
||||
deals = hubSpotClient.get_deals_from_deal_stage(DealStage.CUSTOMER_CONTACTED)
|
||||
|
||||
print(deals)
|
||||
hubSpotClient.print_all_pipeline_ids()
|
||||
|
||||
csv_list = []
|
||||
|
|
@ -26,99 +26,99 @@ for deal in deals:
|
|||
"wetrooms": deal.no_of_wet_rooms,
|
||||
"hubspot_wall_insulation": deal.existing_wall_insulation,
|
||||
"POST INSTALL SAP SCORE": deal.post_sap_score,
|
||||
"Installer": deal.installer,
|
||||
})
|
||||
|
||||
|
||||
hubspot_submissions = pd.DataFrame(csv_list)
|
||||
price_empty = get_jjc_price_matrix()
|
||||
price_foam = get_jjc_price_matrix("foam.csv")
|
||||
price_general = get_jjc_price_matrix("general.csv")
|
||||
total_price = []
|
||||
# hubspot_submissions = pd.DataFrame(csv_list)
|
||||
# price_empty = get_jjc_price_matrix()
|
||||
# price_foam = get_jjc_price_matrix("foam.csv")
|
||||
# price_general = get_jjc_price_matrix("general.csv")
|
||||
# total_price = []
|
||||
|
||||
|
||||
jjc = SharePointScraper(SharePointInstaller.JJC, development=True)
|
||||
file_paths = jjc.download_file_for_each_address()
|
||||
list_of_surveys = []
|
||||
# jjc = SharePointScraper(SharePointInstaller.JJC, development=True)
|
||||
# file_paths = jjc.download_file_for_each_address()
|
||||
# list_of_surveys = []
|
||||
|
||||
for eachAddress in file_paths:
|
||||
for address, files in eachAddress.items():
|
||||
list_of_surveys.append(surveyedDataProcessor(address, files))
|
||||
for survey in list_of_surveys:
|
||||
if survey.pre_site_note:
|
||||
floor_banding, total_floor_area = work_out_total_floor_area(survey.pre_site_note)
|
||||
letter, number = survey.pre_site_note.survey_information.current_sap.split(" ")
|
||||
pre_sap_score = number+letter
|
||||
# for eachAddress in file_paths:
|
||||
# for address, files in eachAddress.items():
|
||||
# list_of_surveys.append(surveyedDataProcessor(address, files))
|
||||
# for survey in list_of_surveys:
|
||||
# if survey.pre_site_note:
|
||||
# floor_banding, total_floor_area = work_out_total_floor_area(survey.pre_site_note)
|
||||
# letter, number = survey.pre_site_note.survey_information.current_sap.split(" ")
|
||||
# pre_sap_score = number+letter
|
||||
|
||||
address = survey.pre_site_note.survey_information.address.split(",")
|
||||
address = [item.strip() for item in address][0]
|
||||
filtered_df = hubspot_submissions[hubspot_submissions["Address"].apply(lambda x: address.upper() == x.split(",")[0].upper())]
|
||||
if len(filtered_df) == 1:
|
||||
funding_type = type_of_work(letter.upper(), get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])[-1])
|
||||
data = {
|
||||
"Address": survey.pre_site_note.survey_information.address,
|
||||
"HubSpot Address": filtered_df["Address"].values[0],
|
||||
"Pre SAP from sharepoint": number,
|
||||
"Post SAP from surveyor": filtered_df["POST INSTALL SAP SCORE"].values[0],
|
||||
"Surveyor's Name": survey.pre_site_note.assessor_information.name,
|
||||
"floor_area_group" : floor_banding,
|
||||
"wetrooms" : filtered_df["wetrooms"].values[0],
|
||||
"Trickle Vent" : filtered_df["Trickle Vent"].values[0],
|
||||
"survey_stated_work_type": filtered_df["hubspot_work_type"].values[0],
|
||||
}
|
||||
# address = survey.pre_site_note.survey_information.address.split(",")
|
||||
# address = [item.strip() for item in address][0]
|
||||
# filtered_df = hubspot_submissions[hubspot_submissions["Address"].apply(lambda x: address.upper() == x.split(",")[0].upper())]
|
||||
# if len(filtered_df) == 1:
|
||||
# funding_type = type_of_work(letter.upper(), get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])[-1])
|
||||
# data = {
|
||||
# "Address": survey.pre_site_note.survey_information.address,
|
||||
# "HubSpot Address": filtered_df["Address"].values[0],
|
||||
# "Pre SAP from sharepoint": number,
|
||||
# "Post SAP from surveyor": filtered_df["POST INSTALL SAP SCORE"].values[0],
|
||||
# "Surveyor's Name": survey.pre_site_note.assessor_information.name,
|
||||
# "floor_area_group" : floor_banding,
|
||||
# "wetrooms" : filtered_df["wetrooms"].values[0],
|
||||
# "Trickle Vent" : filtered_df["Trickle Vent"].values[0],
|
||||
# "survey_stated_work_type": filtered_df["hubspot_work_type"].values[0],
|
||||
# }
|
||||
|
||||
csr_insulation = None
|
||||
merged_df = pd.DataFrame()
|
||||
if survey.csr:
|
||||
if survey.csr.insulation_info:
|
||||
csr_insulation = survey.csr.insulation_info.type.upper()
|
||||
# csr_insulation = None
|
||||
# merged_df = pd.DataFrame()
|
||||
# if survey.csr:
|
||||
# if survey.csr.insulation_info:
|
||||
# csr_insulation = survey.csr.insulation_info.type.upper()
|
||||
|
||||
hubspot_wall_insulation = None
|
||||
hubspot_wall_insulation = filtered_df["hubspot_wall_insulation"].values[0]
|
||||
data.update({"csr_insulation": csr_insulation})
|
||||
data.update({"hubspot_wall_insulation": hubspot_wall_insulation})
|
||||
# hubspot_wall_insulation = None
|
||||
# hubspot_wall_insulation = filtered_df["hubspot_wall_insulation"].values[0]
|
||||
# data.update({"csr_insulation": csr_insulation})
|
||||
# data.update({"hubspot_wall_insulation": hubspot_wall_insulation})
|
||||
|
||||
if funding_type == "GBIS":
|
||||
if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY":
|
||||
data.update({"funding": funding_type.upper()})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper():
|
||||
data.update({"funding": funding_type.upper() + " Remedial"})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
else:
|
||||
data.update({"funding": funding_type.upper() + " Remedial"})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
elif funding_type == "ECO4":
|
||||
if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY":
|
||||
formatted_funding_type = f"{funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}"
|
||||
data.update({"funding": formatted_funding_type})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper():
|
||||
formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}"
|
||||
data.update({"funding": formatted_funding_type})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
else:
|
||||
formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}"
|
||||
data.update({"funding": formatted_funding_type})
|
||||
df = pd.DataFrame([data])
|
||||
merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
else:
|
||||
raise RuntimeError(f"UNKNOWN FUNDING TYPE {funding_type}")
|
||||
# if funding_type == "GBIS":
|
||||
# if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY":
|
||||
# data.update({"funding": funding_type.upper()})
|
||||
# df = pd.DataFrame([data])
|
||||
# merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
# elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper():
|
||||
# data.update({"funding": funding_type.upper() + " Remedial"})
|
||||
# df = pd.DataFrame([data])
|
||||
# merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
# else:
|
||||
# data.update({"funding": funding_type.upper() + " Remedial"})
|
||||
# df = pd.DataFrame([data])
|
||||
# merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
# elif funding_type == "ECO4":
|
||||
# if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY":
|
||||
# formatted_funding_type = f"{funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}"
|
||||
# data.update({"funding": formatted_funding_type})
|
||||
# df = pd.DataFrame([data])
|
||||
# merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
# elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper():
|
||||
# formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}"
|
||||
# data.update({"funding": formatted_funding_type})
|
||||
# df = pd.DataFrame([data])
|
||||
# merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
# else:
|
||||
# formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}"
|
||||
# data.update({"funding": formatted_funding_type})
|
||||
# df = pd.DataFrame([data])
|
||||
# merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left')
|
||||
# else:
|
||||
# raise RuntimeError(f"UNKNOWN FUNDING TYPE {funding_type}")
|
||||
|
||||
if not merged_df.empty:
|
||||
total_price.append(merged_df)
|
||||
# if not merged_df.empty:
|
||||
# total_price.append(merged_df)
|
||||
|
||||
final_df = pd.concat(total_price, ignore_index=True)
|
||||
# final_df = pd.concat(total_price, ignore_index=True)
|
||||
|
||||
final_df.to_csv("survery_data.csv", index=False)
|
||||
# final_df.to_csv("survery_data.csv", index=False)
|
||||
|
||||
|
||||
print(f"WEEK COMMENCING {WEEK_COMMENCING}")
|
||||
print("Excel file 'survey_data.xlsx' created successfully!")
|
||||
# print(f"WEEK COMMENCING {WEEK_COMMENCING}")
|
||||
# print("Excel file 'survey_data.xlsx' created successfully!")
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue