deeem score improved

This commit is contained in:
Jun-te Kim 2025-04-22 16:16:39 +00:00
parent aca37ea10d
commit f9633618b1
5 changed files with 92 additions and 22 deletions

View file

@ -0,0 +1,59 @@
import os
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING
import pandas as pd
from etl.surveyedData.surveryedData import surveyedDataProcessor
import etl.scraper.scraper as scraper_module
def return_pandas_from_scraping(week_commencing, installer):
scraper_module.WEEK_COMMENCING = week_commencing
sp = SharePointScraper(installer)
file_paths = sp.download_file_for_each_address()
list_of_surveys = []
list_ = []
for eachAddress in file_paths:
for address, files in eachAddress.items():
list_of_surveys.append(surveyedDataProcessor(address, files))
for survey in list_of_surveys:
dict_ = {}
if survey.pre_site_note:
dict_.update({"address": survey.address})
dict_.update({"age_band": survey.pre_site_note.property_description.main_property.age_band})
list_.append(dict_)
if list_:
return pd.DataFrame(list_)
else:
return None
installers = [SharePointInstaller.JJC, SharePointInstaller.SOUTH_COAST_INSULATION]
dates = [
"W.C. 14.04.2025",
"W.C. 31.03.2025",
"W.C. 24.03.2025",
"W.C. 17.03.2025",
"W.C. 10.03.2025",
"W.C. 03.03.2025",
"W.C. 24.02.2025",
]
all_dfs = []
for installer in installers:
for date in dates:
df = return_pandas_from_scraping(date, installer)
if df is not None:
df["installer"] = installer.name
df["week_commencing"] = date
all_dfs.append(df)
giant_df = pd.concat(all_dfs, ignore_index=True)
giant_df
giant_df.to_csv("age_band.csv")

View file

@ -21,7 +21,7 @@ output_path = os.path.abspath(verbose_file)
sp.upload_to_sharepoint(output_path, verbose_file)
lewis_view = "FOR_LEWIS.xlsx"
selected_columns = ["INSTALLER", "HUBSPOT_DEAL_ADDRESS", "PRICE"]
selected_columns = ["HUBSPOT_INSTALLER", "HUBSPOT_DEAL_ADDRESS", "PRICE"]
minimal_df = df[selected_columns]
minimal_df.to_excel(lewis_view, index=False)
output_path = os.path.abspath(lewis_view)
@ -58,14 +58,14 @@ sp.move_deals_to_completed(deal_ids)
# SCIS
# 3 examples of Solar
# ( not in hubspot ) 12 short hedges - Solar 1608
# ( not in hubspot ) 18 short hedge - Solar 1608
# ( not in hubspot) 6 forety road -Solar 1608
# ( in hubspot ) 12 short hedges - Solar 1608
# ( in hubspot ) 18 short hedge - Solar 1608
# ( in hubspot) 6 forety road -Solar 1608
# 3 examples Cavity Wall, FOAM, Empty and General ideally
# ( not in hubspot ) 319 Muirfield Road, (Empty Cavity) - 1000
# ( not in hubspot ) 2 queensway, (Fibre) - 500
# ( not in hubspot )56 Aughton Crescent -(foam) - To be worked out by Lewis but lets use this as an oppurtunity -
# ( in hubspot ) 319 Muirfield Road, (Empty Cavity) - 1000
# ( hubspot ) 2 queensway, (Fibre) - 500
# ( in hubspot )56 Aughton Crescent -(foam) - To be worked out by Lewis but lets use this as an oppurtunity -
# Compare value with what I should get and in the deem score. Keep tabs below so I can check easily

View file

@ -69,11 +69,9 @@ for files in list_of_pictures:
if 'file' in file:
url = file['@microsoft.graph.downloadUrl']
print(f"Downloading {files}/{file['name']}")
sha256 = calculate_sha256(south_coast_scraper.get_file_content(url))
final_list.append({
"Directories": files,
"Photo Name": file['name'],
"sha256": sha256,
})
final_df = pd.DataFrame(final_list)

View file

@ -14,7 +14,7 @@ from datetime import datetime, timedelta
def previous_monday():
today = datetime.today()
last_monday = today - timedelta(days=today.weekday() + 7) # Go back to last week's Monday
return f"W.C. 31.03.2025"
return f"W.C. 31.09.2000"
# return f"W.C. {last_monday.strftime('%d.%m.%Y')}"
WEEK_COMMENCING = os.getenv("WEEK_COMMENCING", previous_monday())

View file

@ -151,6 +151,7 @@ class SurveyPrice():
def sharepoint_data_for_installer(self, installer):
sp = SharePointScraper(installer)
file_paths = sp.download_file_for_each_address()
surveys = []
@ -173,7 +174,7 @@ class SurveyPrice():
"SHAREPOINT FLOOR_AREA_BANDING": "NO PRE SITE NOTES FOUND",
"SHAREPOINT PRE_INSTALL_SAP_SCORE": "NO PRE SITE NOTES FOUND",
"SHAREPOINT INSULATION MATERIAL": None,
"SHAREPOINT ADDRESS": address
"SHAREPOINT ADDRESS": surveyInfo.address
}
if surveyInfo.pre_site_note:
@ -231,14 +232,24 @@ class SurveyPrice():
raise RuntimeError("No information found from Hubspot")
# Standardise address
self.all_survey_info_from_sharepoint['clean_address'] = self.all_survey_info_from_sharepoint['SHAREPOINT ADDRESS'].apply(
lambda x: x.lower().replace(',', '').strip()
)
def extract_start_and_postcode(addr):
if not isinstance(addr, str) or addr.strip() == "":
return "", ""
parts = addr.lower().replace(",", "").strip().split()
start = ' '.join(parts[:2]) # Number + street
postcode = ' '.join(parts[-2:]) # Postcode
return start, postcode
self.all_hubspot_submissions['clean_address'] = self.all_hubspot_submissions['HUBSPOT_DEAL_ADDRESS'].apply(
lambda x: x.lower().replace(',', '').strip()
# Extract start + postcode from both datasets
self.all_survey_info_from_sharepoint[['address_start', 'postcode']] = self.all_survey_info_from_sharepoint['SHAREPOINT ADDRESS'].apply(
lambda x: pd.Series(extract_start_and_postcode(x))
)
self.all_hubspot_submissions[['address_start', 'postcode']] = self.all_hubspot_submissions['HUBSPOT_DEAL_ADDRESS'].apply(
lambda x: pd.Series(extract_start_and_postcode(x))
)
# re-name to installer
self.all_survey_info_from_sharepoint = self.all_survey_info_from_sharepoint.rename(
columns={
@ -254,14 +265,14 @@ class SurveyPrice():
)
merged_df = pd.merge(
self.all_survey_info_from_sharepoint,
self.all_hubspot_submissions,
left_on=['clean_address'],
right_on=['clean_address'],
self.all_survey_info_from_sharepoint,
self.all_hubspot_submissions,
on=['address_start', 'postcode'],
how='inner'
)
merged_df.drop(columns=['clean_address'], inplace=True)
merged_df.drop(columns=['address_start', 'postcode'], inplace=True)
def compute_energy_grant(row):
pre_band_letter = row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"][-1]
@ -274,12 +285,14 @@ class SurveyPrice():
def work_type(row):
if row["ENERGY_GRANT"] == "GBIS":
return row["ENERGY GRANT"]
return "GBIS"
else:
return f"{row["ENERGY_GRANT"]} - SAP {row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"]} to {row["POST_INSTALL_SAP_SCORE_BANDING"]}"
# Add missing variables
if merged_df.size == 0:
raise RuntimeError("no matched addresses with hubspot and sharepoint pre site notes")
merged_df["ENERGY_GRANT"] = merged_df.apply(compute_energy_grant, axis=1)
merged_df["POST_INSTALL_SAP_SCORE_BANDING"] = merged_df.apply(compute_banding_for_post_sap, axis=1)
merged_df["WORK TYPE"] = merged_df.apply(work_type, axis=1)