completed matching

This commit is contained in:
Khalim Conn-Kowlessar 2025-01-30 17:30:17 +00:00
parent daabf2a586
commit f6d8688698
4 changed files with 248 additions and 10 deletions

View file

@ -3078,6 +3078,13 @@ def revised_model():
retrofit_assessment_data = pd.DataFrame(extracted_data)
# retrofit_assessment_data.to_csv(
# os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 2.csv"), index=False
# )
retrofit_assessment_data = pd.read_csv(
os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 2.csv"),
)
# Remove some definite duplicates
dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].duplicated()]["Address"]
dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].isin(dupes)]
@ -3097,10 +3104,6 @@ def revised_model():
# Replace \n with ""
retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "")
# retrofit_assessment_data.to_csv(
# os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet.csv"), index=False
# )
# We can read in the data as needed
# Next Step: Read in the coordinated measures and match to the extracted data
@ -3108,24 +3111,59 @@ def revised_model():
# CCS
#############################################################
ccs_coordination_sheet = pd.read_excel(
os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "CCS_Installation_Compliance_CCS.xlsx"),
os.path.join(
CUSTOMER_FOLDER_PATH,
"Jan 2025 Project",
"CCS_Installation_Compliance_-_Stonewater_SHDF_2_1_1738228227.xlsx"
),
header=4
)
ccs_postcodes = pd.read_excel(
os.path.join(
CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "CCS_Installation_Compliance_CCS.xlsx"
),
header=4
)
ccs_coordination_sheet = ccs_postcodes[['Name', 'Post Code', 'Asset ID', 'Asset ID.1']].merge(
ccs_coordination_sheet, how="left", on="Name"
)
ccs_coordination_sheet = ccs_coordination_sheet[~pd.isnull(ccs_coordination_sheet["Name"])]
ccs_coordination_sheet["contractor"] = "CCS"
# We split ccs into two sections - the first being
ccs_coordination_removed_from_programme = ccs_coordination_sheet.tail(21)
ccs_coordination_sheet = ccs_coordination_sheet.head(87)
ccs_coordination = pd.concat([ccs_coordination_removed_from_programme, ccs_coordination_sheet])
from urllib import parse
def extract_sharepoint_url(x):
if pd.isnull(x):
return ""
return "/".join(parse.urlparse(
x.split(" - http")[1]
).path.replace("%20", " ").split("/")[-2:])
ccs_coordination["folder_path"] = ccs_coordination["Sharepoint Link"].apply(lambda x: extract_sharepoint_url(x))
############################################################
# WATES
#############################################################
wates_coordination_sheet = pd.read_excel(
os.path.join(
CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_1738229226.xlsx"
),
header=4
)
wates_postcodes = pd.read_excel(
os.path.join(
CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_Vinci-Wates.xlsx"
),
header=4
)
wates_postcodes = wates_postcodes[~pd.isnull(wates_postcodes["Post Code"])]
wates_coordination_sheet = wates_coordination_sheet.merge(
wates_postcodes[['Name', 'Post Code', 'Asset ID']].drop_duplicates(), how="left", on="Name"
)
wates_coordination_sheet["contractor"] = "Wates"
# Break into the different sites:
# Wiltshire
@ -3136,7 +3174,7 @@ def revised_model():
wates_coordination_sheet_bournemouth = wates_coordination_sheet.iloc[524:567, :]
wates_coordination_sheet_cambridgeshire = wates_coordination_sheet.iloc[571:581, :]
wates_coordination_sheet_removed_from_programme = wates_coordination_sheet.iloc[586:926, :]
wates_coordination_sheet_abeyance = wates_coordination_sheet.iloc[928:972, :]
wates_coordination_sheet_abeyance = wates_coordination_sheet.iloc[930:972, :]
wates_coordination = pd.concat(
[
@ -3151,12 +3189,15 @@ def revised_model():
]
)
wates_coordination["folder_path"] = wates_coordination["Sharepoint Folder"].apply(
lambda x: extract_sharepoint_url(x)
)
# Combine the data back
############################################################
# NEW 450 COORDINATED RETROFIT ASSESSMENTS
#############################################################
retrofit_packages_board = pd.read_excel(
os.path.join(
CUSTOMER_FOLDER_PATH,
@ -3361,17 +3402,49 @@ def revised_model():
wates_coordination = wates_coordination[
wates_coordination["Retrofit Assessment"].isin(["Completed"])
]
wates_coordination = wates_coordination[
~pd.isnull(wates_coordination["Postcode"])
]
wates_manual_filters = {
"24 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/24-25 Rabley Wood View"
"24 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/24-25 Rabley Wood View",
"14 Edencroft": "Wave 2.1 Surveys/3. Wiltshire/14 Edencroft",
"Flat 31 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/Flat 31 Rabley Wood View",
'Flat 13, Manor Fields': 'Wave 2.1 Surveys/1. Herefordshire/(038) Manor Fields Flat 13',
"4 Kittys Lane": "Wave 2.1 Surveys/1. Herefordshire/(005) Kittys Lane 4",
'1 Jephson Court': 'Wave 2.1 Surveys/5. Coventry/Jesphson Court 1',
'2 Jephson Court': 'Wave 2.1 Surveys/5. Coventry/Jesphson Court 2',
}
wates_matching_lookup = []
# Examples to skip when we cannot get the data
wates_to_skip = [
"66 Abbatt Close", # File type is unusual, couldn't extract the data
"Flat 69 Goddard Road", # Doesn't exist
"19 Garth House", # # File type is unusual, couldn't extract the data
'5 Gilpin Close', # No properly formatted EPR
'49 The Hide, Netherfield', # TODO: TEMP HERE
'19 Chanders Rd',
'5 Chanders Rd',
'23 Chanders Rd',
'3 Chanders Rd',
'1 Orchard Close',
]
for _, home in tqdm(wates_coordination.iterrows(), total=len(wates_coordination)):
# Search the folder
filtered = retrofit_assessment_data[
retrofit_assessment_data["survey_folder"].str.contains(home["folder_path"], regex=False)
]
if len(filtered) == 1:
wates_matching_lookup.append(
{
"survey_folder": filtered["survey_folder"].values[0],
"Asset ID": home["Asset ID"],
"Name": home["Name"]
}
)
continue
if home["Name"] in wates_to_skip:
continue

View file

@ -86,7 +86,6 @@ def download_data_from_sharepoint():
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
)
len(contents["value"])
folders_to_pull = [
folder for folder in contents["value"] if folder["name"] in ["3. Wiltshire", "4. Bournemouth", "5. Coventry"]
]
@ -108,6 +107,8 @@ def download_data_from_sharepoint():
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
folder_to_pull["name"] + "/" + property_folder["name"]
)
if not property_folder_contents.get("value"):
continue
# We look for the retrofit assessment folder:
property_sub_folders = [
f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower()
@ -138,5 +139,5 @@ def download_data_from_sharepoint():
drive_id=sharepoint_client.document_drive["id"],
folder_path=property_folder_path,
download_dir=download_dir,
excluded_file_types=["MOV"]
excluded_file_types=["MOV", "jpg"]
)

View file

@ -1,9 +1,33 @@
import os
import PyPDF2
from string import Template
from survey_report.extraction.detect_report_type import detect_report_type
from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor
def generate_html_report(template_path, output_path, data):
"""
Reads an HTML template file, injects dynamic values, and generates a final HTML report.
Args:
- template_path (str): Path to the HTML template file.
- output_path (str): Path to save the generated HTML file.
- data (dict): Dictionary containing dynamic values for the report.
"""
# Read the template file
with open(template_path, "r", encoding="utf-8") as f:
html_template = Template(f.read()) # Use Template from string module
# Replace placeholders with actual data
final_html = html_template.safe_substitute(data) # Use safe_substitute to prevent missing key errors
# Save the generated HTML file
with open(output_path, "w", encoding="utf-8") as f:
f.write(final_html)
print(f"HTML report generated successfully: {output_path}")
def handle():
"""
Performs the data extraction process for the survey report
@ -48,3 +72,20 @@ def handle():
data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
del data_sheet["Total Building Dimensions"]
# Generate the HTML report
# Placeholder locations
template_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/template.html"
output_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/output/report.html"
logo_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/assets/logo.png"
generate_html_report(
template_path, output_path,
data={
"address": data_sheet["Address"],
"logo_path": logo_path,
"current_epc": data_sheet["Current EPC Band"],
"current_sap": data_sheet["Current SAP Rating"],
"potential_epc": "A", # TODO PLACEHOLDER
"potential_sap": 91, # TODO PLACEHOLDER
}
)

123
survey_report/template.html Normal file
View file

@ -0,0 +1,123 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Domna Energy Report</title>
<style>
body {
font-family: Arial, sans-serif;
background-color: #ffffff;
color: #333;
margin: 0;
padding: 0;
display: flex;
justify-content: center;
}
.container {
width: 100%;
max-width: 1300px;
margin: 20px auto;
}
.header {
background-color: #1B1F3B;
color: white;
padding: 30px;
display: flex;
justify-content: space-between;
align-items: center;
border-radius: 12px;
}
.header h1 {
margin: 5;
font-size: 24px;
}
.header p {
margin: 5px 0 0;
font-size: 16px;
color: #d1d5db;
}
.logo img {
height: 60px;
}
/* EPC Rating Cards */
.epc-container {
display: flex;
justify-content: space-between;
gap: 20px;
margin-top: 30px;
}
.epc-card {
background-color: white;
border: 2px solid #ccc;
border-radius: 10px;
padding: 20px;
flex: 1;
display: flex;
flex-direction: column;
justify-content: space-between; /* Pushes SAP to bottom */
align-items: center;
text-align: center;
box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
position: relative;
height: 160px;
}
.epc-title {
font-size: 18px;
font-weight: bold;
color: #666;
}
.epc-rating {
font-size: 50px;
font-weight: bold;
}
.sap-rating {
font-size: 18px;
color: #555;
position: absolute;
bottom: 10px;
right: 20px;
}
.before .epc-rating {
color: #1B1F3B; /* Medium Blue */
}
.after .epc-rating {
color: #D4AF37; /* Gold */
}
</style>
</head>
<body>
<div class="container">
<!-- Header Section -->
<div class="header">
<div>
<h1>Domna Energy Report</h1>
<p>${address}</p> <!-- Address Placeholder -->
</div>
<div class="logo">
<img src="${logo_path}" alt="Domna Logo">
</div>
</div>
<!-- EPC Rating Cards -->
<div class="epc-container">
<div class="epc-card before">
<div class="epc-title">Current EPC Rating</div>
<div class="epc-rating">${current_epc}</div>
<div class="sap-rating">SAP ${current_sap}</div>
</div>
<div class="epc-card after">
<div class="epc-title">Potential EPC Rating</div>
<div class="epc-rating">${potential_epc}</div>
<div class="sap-rating">SAP ${potential_sap}</div>
</div>
</div>
</div>
</body>
</html>