mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
putting together outputs
This commit is contained in:
parent
10bc433283
commit
139db23592
3 changed files with 360 additions and 81 deletions
|
|
@ -1,5 +1,6 @@
|
|||
import os
|
||||
from pyexpat import features
|
||||
from urllib import parse
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
import PyPDF2
|
||||
import re
|
||||
|
|
@ -2936,6 +2937,14 @@ def identify_incorrect_packages():
|
|||
)
|
||||
|
||||
|
||||
def extract_sharepoint_url(x):
|
||||
if pd.isnull(x):
|
||||
return ""
|
||||
return "/".join(parse.urlparse(
|
||||
x.split(" - http")[1]
|
||||
).path.replace("%20", " ").split("/")[-2:])
|
||||
|
||||
|
||||
def revised_model():
|
||||
"""
|
||||
This function implements the revised model for Stonewater, where we are looking at new priority postcodes
|
||||
|
|
@ -2956,6 +2965,7 @@ def revised_model():
|
|||
original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
|
||||
original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
|
||||
original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
|
||||
original_archetypes["UPRN"] = original_archetypes["UPRN"].astype("Int64").astype(str)
|
||||
|
||||
# Check if we have all of the addresses
|
||||
missed = original_archetypes[
|
||||
|
|
@ -2965,7 +2975,7 @@ def revised_model():
|
|||
assert set(missed) == {'NOT PRIORITY POSTCODE', 'IN WAVE 2.1', 'EPC C OR ABOVE'}
|
||||
|
||||
original_archetypes = original_archetypes[
|
||||
["Address ID", "Archetype ID", "Archetype Group Rank"]
|
||||
["Address ID", "Archetype ID", "Archetype Group Rank", "UPRN"]
|
||||
]
|
||||
|
||||
# Merge these archetypes on to the new priority postcodes
|
||||
|
|
@ -3104,6 +3114,42 @@ def revised_model():
|
|||
# Replace \n with ""
|
||||
retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "")
|
||||
|
||||
retrofit_assessments_data_columns = [
|
||||
'Current SAP Rating', 'Current EPC Band', 'Primary Energy Use (kWh/yr)',
|
||||
'Primary Energy Use Intensity (kWh/m2/yr)', 'Number of Storeys',
|
||||
'Fuel Bill', 'Window Age Description',
|
||||
'Window Age Description Proportion (%)',
|
||||
'Secondary Window Age Description',
|
||||
'Secondary Window Age Description Proportion (%)', 'Number of Windows',
|
||||
'Total Number of Doors', 'Number of Insulated Doors',
|
||||
'Existing Primary Heating System',
|
||||
'Existing Primary Heating PCDF Reference',
|
||||
'Existing Primary Heating Controls',
|
||||
'Existing Primary Heating % of Heat',
|
||||
'Existing Secondary Heating System',
|
||||
'Existing Secondary Heating PCDF Reference',
|
||||
'Existing Secondary Heating Controls',
|
||||
'Existing Secondary Heating % of Heat', 'Secondary Heating Code',
|
||||
'Water Heating Code', 'Total Floor Area (m2)',
|
||||
'Total Ground Floor Area (m2)', 'RIR Floor Area',
|
||||
'Main Building Wall Area (m2)', 'First Extension Wall Area (m2)',
|
||||
'Number of Light Fittings', 'Number of LEL Fittings',
|
||||
'Number of fittings needing LEL', 'Main Roof Type',
|
||||
'Main Roof Insulation', 'Main Roof Insulation Thickness',
|
||||
'Main Wall Type', 'Main Wall Insulation', 'Main Wall Dry-lining',
|
||||
'Main Wall Thickness', 'Main Building Alternative Wall Type',
|
||||
'Main Building Alternative Wall Insulation',
|
||||
'Main Building Alternative Wall Dry-lining',
|
||||
'Main Building Alternative Wall Thickness', 'Main Fuel'
|
||||
]
|
||||
# For the columns in retrofit_assessments_data_columns, prefix all of them with Survey:
|
||||
retrofit_assessments_data_columns_prefixed = ["Survey: " + x for x in retrofit_assessments_data_columns]
|
||||
rename_dict = dict(zip(retrofit_assessments_data_columns, retrofit_assessments_data_columns_prefixed))
|
||||
retrofit_assessment_data = retrofit_assessment_data.rename(columns=rename_dict)
|
||||
retrofit_assessment_data["Survey: Current EPC Band"] = (
|
||||
retrofit_assessment_data["Survey: Current SAP Rating"].apply(lambda x: sap_to_epc(x))
|
||||
)
|
||||
|
||||
# We can read in the data as needed
|
||||
|
||||
# Next Step: Read in the coordinated measures and match to the extracted data
|
||||
|
|
@ -3134,14 +3180,6 @@ def revised_model():
|
|||
ccs_coordination_sheet = ccs_coordination_sheet.head(87)
|
||||
ccs_coordination = pd.concat([ccs_coordination_removed_from_programme, ccs_coordination_sheet])
|
||||
|
||||
from urllib import parse
|
||||
def extract_sharepoint_url(x):
|
||||
if pd.isnull(x):
|
||||
return ""
|
||||
return "/".join(parse.urlparse(
|
||||
x.split(" - http")[1]
|
||||
).path.replace("%20", " ").split("/")[-2:])
|
||||
|
||||
ccs_coordination["folder_path"] = ccs_coordination["Sharepoint Link"].apply(lambda x: extract_sharepoint_url(x))
|
||||
|
||||
############################################################
|
||||
|
|
@ -3224,8 +3262,6 @@ def revised_model():
|
|||
lambda x: extract_sharepoint_url(x)
|
||||
)
|
||||
|
||||
# Combine the data back
|
||||
|
||||
############################################################
|
||||
# NEW 450 COORDINATED RETROFIT ASSESSMENTS
|
||||
#############################################################
|
||||
|
|
@ -3352,7 +3388,6 @@ def revised_model():
|
|||
)
|
||||
ccs_coordination = ccs_coordination[~pd.isnull(ccs_coordination["Postcode"])]
|
||||
ccs_coordination = ccs_coordination[ccs_coordination["Retrofit Assessment"] != "Outstanding"]
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
ccs_manual_filters = {
|
||||
"35 Kittiwake Close": "Wave 2.1 Surveys/11. CCS Dorset/Kittiwake Close 35"
|
||||
|
|
@ -3596,6 +3631,17 @@ def revised_model():
|
|||
matching_lookup, how="left", on="Name"
|
||||
)
|
||||
|
||||
# We now map the retrofit assessment data to the coordinated packages
|
||||
wates_coordination = wates_coordination.merge(
|
||||
retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
|
||||
)
|
||||
ccs_coordination = ccs_coordination.merge(
|
||||
retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
|
||||
)
|
||||
retrofit_packages_board = retrofit_packages_board.merge(
|
||||
retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
|
||||
)
|
||||
|
||||
# We have 4 properties in the Wates coordination board, that we want to remove from the retrofit packages board
|
||||
to_remove = wates_coordination[
|
||||
wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"])
|
||||
|
|
@ -3617,8 +3663,8 @@ def revised_model():
|
|||
'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
|
||||
'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
|
||||
'Solar PV', 'Other measures', 'Organisation Reference',
|
||||
]
|
||||
],
|
||||
] + retrofit_assessments_data_columns_prefixed
|
||||
],
|
||||
ccs_coordination[
|
||||
[
|
||||
# We don't have secondary wall insulation, Flat Roof, RIR, Heating Controls,
|
||||
|
|
@ -3627,8 +3673,8 @@ def revised_model():
|
|||
'SAP Band Install Package', 'Package Approved (Client)',
|
||||
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
|
||||
'Ventilation', 'Heating', 'Other Measures', "Asset ID.1_y",
|
||||
]
|
||||
].rename(
|
||||
] + retrofit_assessments_data_columns_prefixed
|
||||
].rename(
|
||||
columns={
|
||||
"SAP Band Pre": "Actual SAP Band",
|
||||
"SAP Rating Pre": "Actual SAP Rating",
|
||||
|
|
@ -3651,8 +3697,8 @@ def revised_model():
|
|||
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
|
||||
'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x'
|
||||
|
||||
]
|
||||
].rename(
|
||||
] + retrofit_assessments_data_columns_prefixed
|
||||
].rename(
|
||||
columns={
|
||||
"SAP Band Pre": "Actual SAP Band",
|
||||
"SAP Rating Pre": "Actual SAP Rating",
|
||||
|
|
@ -3681,24 +3727,8 @@ def revised_model():
|
|||
on="Organisation Reference"
|
||||
)
|
||||
|
||||
# We match the properties to their closest match
|
||||
# We clean up the SAP ratings in the coordinated packages
|
||||
def sap_to_number(x):
|
||||
try:
|
||||
return int(x)
|
||||
except:
|
||||
if x[-1] in ["A", "B", "C", "D", "E", "F"]:
|
||||
return int(x[:-1])
|
||||
|
||||
if x[0] in ["A", "B", "C", "D", "E", "F"]:
|
||||
return int(x[1:])
|
||||
|
||||
coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Actual SAP Band"])]
|
||||
coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Actual SAP Rating"])]
|
||||
|
||||
coordinated_packages["Actual SAP Rating"] = coordinated_packages["Actual SAP Rating"].apply(
|
||||
lambda x: sap_to_number(x)
|
||||
)
|
||||
coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Survey: Current EPC Band"])]
|
||||
coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Survey: Current SAP Rating"])]
|
||||
|
||||
# We need the features pertaining to these priority postcodes
|
||||
|
||||
|
|
@ -3721,6 +3751,11 @@ def revised_model():
|
|||
if not match.empty:
|
||||
return match
|
||||
|
||||
# Finally, we search for a property in the same Archetype
|
||||
match = coordinated_packages[coordinated_packages["Archetype ID"] == home["Archetype ID"]]
|
||||
if not match.empty:
|
||||
return match
|
||||
|
||||
return None # No match found
|
||||
|
||||
coordinated_packages["Postal Region"] = coordinated_packages["Postcode"].str.split(" ").str[0].str.strip()
|
||||
|
|
@ -3732,6 +3767,12 @@ def revised_model():
|
|||
coordinated_packages["Primary Property Type"] = coordinated_packages["Property Type"].str.split(":").str[0]
|
||||
new_priority_postcodes["Primary Property Type"] = new_priority_postcodes["Property Type"].str.split(":").str[0]
|
||||
|
||||
coordinated_packages = coordinated_packages.merge(
|
||||
new_priority_postcodes[["Organisation Reference", "Archetype ID"]],
|
||||
how="left",
|
||||
on="Organisation Reference"
|
||||
)
|
||||
|
||||
# For every property in the priority postcodes data, we look for a most appropriate matching property
|
||||
no_match = []
|
||||
matches = []
|
||||
|
|
@ -3759,16 +3800,17 @@ def revised_model():
|
|||
no_match_summary = no_match_summary.sort_values("Organisation Reference", ascending=False)
|
||||
|
||||
# len(no_match)
|
||||
# 8764, 5607, 5646
|
||||
# 8764, 5607, 5646, 5071
|
||||
# no_match_summary.shape
|
||||
# (3953, 6), (2948, 6), (2969, 7)
|
||||
# (3953, 6), (2948, 6), (2969, 7), (2575, 7)
|
||||
|
||||
matches_df = pd.DataFrame(matches)
|
||||
matches_df = matches_df.merge(
|
||||
coordinated_packages[["Organisation Reference", "Actual SAP Band", "Actual SAP Rating"]],
|
||||
coordinated_packages[["Organisation Reference", "Survey: Current EPC Band", "Survey: Current SAP Rating"]],
|
||||
left_on="Best Match Organisation Reference", right_on="Organisation Reference",
|
||||
suffixes=("", " - Closest Match")
|
||||
)
|
||||
|
||||
# We want to aggregate the matches, when we have multiple
|
||||
aggregated_matches_df = []
|
||||
for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"):
|
||||
|
|
@ -3778,19 +3820,21 @@ def revised_model():
|
|||
"Organisation Reference": org_ref,
|
||||
"Number of matches": 1,
|
||||
"Proportion": 100,
|
||||
"Estimated SAP Rating": mapped_matches["Actual SAP Rating"].values[0],
|
||||
"Estimated EPC Rating": sap_to_epc(mapped_matches["Actual SAP Rating"].values[0])
|
||||
"Estimated SAP Rating": mapped_matches["Survey: Current SAP Rating"].values[0],
|
||||
"Estimated EPC Rating": mapped_matches["Survey: Current EPC Band"].values[0]
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
# We need to aggregate the matches, since we have multiple
|
||||
average_rating = mapped_matches["Actual SAP Rating"].mean()
|
||||
average_rating = mapped_matches["Survey: Current SAP Rating"].mean()
|
||||
number_of_matches = mapped_matches.shape[0]
|
||||
average_epc_rating = sap_to_epc(average_rating)
|
||||
# proportion is the number of properties that have this EPC rating
|
||||
proportion_with_this_epc = int(
|
||||
mapped_matches[mapped_matches["Actual SAP Band"] == average_epc_rating].shape[0] / number_of_matches * 100)
|
||||
mapped_matches[mapped_matches["Survey: Current EPC Band"] == average_epc_rating].shape[
|
||||
0] / number_of_matches * 100
|
||||
)
|
||||
aggregated_matches_df.append(
|
||||
{
|
||||
"Organisation Reference": org_ref,
|
||||
|
|
@ -3804,12 +3848,220 @@ def revised_model():
|
|||
aggregated_matches_df = pd.DataFrame(aggregated_matches_df)
|
||||
|
||||
mapped_priority_list = new_priority_postcodes.merge(
|
||||
matches_df, on="Organisation Reference",
|
||||
aggregated_matches_df, on="Organisation Reference", how="left"
|
||||
)
|
||||
# We merge on the EPC ratings for the matched properties
|
||||
mapped_priority_list = mapped_priority_list.merge(
|
||||
|
||||
mapped_priority_list["address1"] = mapped_priority_list["Address"].str.split(",").str[0]
|
||||
|
||||
# If we have a leading number like 01, 02, 03, 04, 05, 06, 07, 08, 09, we remove the leading 0
|
||||
|
||||
def remove_leading_zero(address):
|
||||
return re.sub(r"^0([1-9]) ", r"\1 ", address)
|
||||
|
||||
# Example usage
|
||||
mapped_priority_list["address1"] = mapped_priority_list["address1"].apply(remove_leading_zero)
|
||||
mapped_priority_list["address1"] = np.where(
|
||||
mapped_priority_list["Organisation Reference"] == 37004,
|
||||
"8 Mason Road",
|
||||
mapped_priority_list["address1"]
|
||||
)
|
||||
mapped_priority_list["address1"] = np.where(
|
||||
mapped_priority_list["Organisation Reference"] == 37003,
|
||||
"9 Mason Road",
|
||||
mapped_priority_list["address1"]
|
||||
)
|
||||
|
||||
mapped_priority_list = mapped_priority_list.rename(
|
||||
columns={"UPRN": "uprn"}
|
||||
)
|
||||
mapped_priority_list["row_id"] = mapped_priority_list["Organisation Reference"]
|
||||
|
||||
# Let's get the newest EPC data for these properties
|
||||
# We merge on UPRN, when we have it
|
||||
# from etl.route_march_data_pull.app import get_data
|
||||
# epc_data, errors, nodata = get_data(
|
||||
# asset_list=mapped_priority_list,
|
||||
# fulladdress_column="Address",
|
||||
# address1_column="address1",
|
||||
# postcode_column="Postcode",
|
||||
# manual_uprn_map={},
|
||||
# epc_api_only=True
|
||||
# )
|
||||
#
|
||||
# epc_df = pd.DataFrame(epc_data)
|
||||
# epc_df.to_csv(
|
||||
# os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "full_epc_data.csv"), index=False
|
||||
# )
|
||||
epc_df = pd.read_csv(os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "full_epc_data.csv"))
|
||||
epc_df = epc_df.rename(columns={"row_id": "Organisation Reference"})
|
||||
|
||||
# We now package up the data
|
||||
|
||||
# Sheet 1 is the base coordination data
|
||||
output_coordination_sheet = coordinated_packages[
|
||||
[
|
||||
"Name", "Postcode", 'Organisation Reference', 'Package Ref',
|
||||
'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
|
||||
'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
|
||||
'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
|
||||
'Solar PV', 'Other measures', 'Survey: Current SAP Rating', 'Survey: Current EPC Band',
|
||||
'Survey: Primary Energy Use (kWh/yr)',
|
||||
'Survey: Primary Energy Use Intensity (kWh/m2/yr)',
|
||||
'Survey: Number of Storeys', 'Survey: Fuel Bill',
|
||||
'Survey: Window Age Description',
|
||||
'Survey: Window Age Description Proportion (%)',
|
||||
'Survey: Secondary Window Age Description',
|
||||
'Survey: Secondary Window Age Description Proportion (%)',
|
||||
'Survey: Number of Windows', 'Survey: Total Number of Doors',
|
||||
'Survey: Number of Insulated Doors',
|
||||
'Survey: Existing Primary Heating System',
|
||||
'Survey: Existing Primary Heating PCDF Reference',
|
||||
'Survey: Existing Primary Heating Controls',
|
||||
'Survey: Existing Primary Heating % of Heat',
|
||||
'Survey: Existing Secondary Heating System',
|
||||
'Survey: Existing Secondary Heating PCDF Reference',
|
||||
'Survey: Existing Secondary Heating Controls',
|
||||
'Survey: Existing Secondary Heating % of Heat',
|
||||
'Survey: Secondary Heating Code', 'Survey: Water Heating Code',
|
||||
'Survey: Total Floor Area (m2)', 'Survey: Total Ground Floor Area (m2)',
|
||||
'Survey: RIR Floor Area', 'Survey: Main Building Wall Area (m2)',
|
||||
'Survey: First Extension Wall Area (m2)',
|
||||
'Survey: Number of Light Fittings', 'Survey: Number of LEL Fittings',
|
||||
'Survey: Number of fittings needing LEL', 'Survey: Main Roof Type',
|
||||
'Survey: Main Roof Insulation',
|
||||
'Survey: Main Roof Insulation Thickness', 'Survey: Main Wall Type',
|
||||
'Survey: Main Wall Insulation', 'Survey: Main Wall Dry-lining',
|
||||
'Survey: Main Wall Thickness',
|
||||
'Survey: Main Building Alternative Wall Type',
|
||||
'Survey: Main Building Alternative Wall Insulation',
|
||||
'Survey: Main Building Alternative Wall Dry-lining',
|
||||
'Survey: Main Building Alternative Wall Thickness',
|
||||
'Survey: Main Fuel',
|
||||
'Walls', 'Roofs', 'Heating', 'Main Fuel', 'Age', 'Property Type'
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
'Walls': "Parity - Walls",
|
||||
'Roofs': "Parity - Roof",
|
||||
'Heating': "Parity - Heating",
|
||||
'Main Fuel': "Parity - Fuel",
|
||||
'Age': "Parity - Age Band",
|
||||
'Property Type': "Parity - Property Type"
|
||||
}
|
||||
)
|
||||
|
||||
# Sheet 2 is the lookup table which maps the properties to their closest match
|
||||
# We need to bring in the parity attributes between the mapped properties so we can see side-by-side
|
||||
mapped_lookup = matches_df[
|
||||
[
|
||||
'Organisation Reference',
|
||||
'Best Match Organisation Reference',
|
||||
'Survey: Current EPC Band',
|
||||
'Survey: Current SAP Rating'
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
'Best Match Organisation Reference': "Best Match - Organisation Reference",
|
||||
"Survey: Current EPC Band": "Best Match - Survey: Current EPC Band",
|
||||
'Survey: Current SAP Rating': "Best Match - Survey: Current SAp Rating"
|
||||
}
|
||||
).merge(
|
||||
features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]],
|
||||
how="left",
|
||||
on="Organisation Reference"
|
||||
).merge(
|
||||
features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]].rename(
|
||||
columns={
|
||||
"Organisation Reference": "Best Match - Organisation Reference",
|
||||
"Walls": "Best Match - Walls",
|
||||
"Roofs": "Best Match - Roof",
|
||||
"Heating": "Best Match - Heating",
|
||||
"Main Fuel": "Best Match - Main Fuel",
|
||||
"Age": "Best Match - Age",
|
||||
"Property Type": "Best Match - Property Type"
|
||||
}
|
||||
),
|
||||
how="left",
|
||||
on="Best Match - Organisation Reference"
|
||||
).merge(
|
||||
coordinated_packages[
|
||||
[
|
||||
"Organisation Reference", 'Survey: Main Wall Type', 'Survey: Main Wall Insulation',
|
||||
'Survey: Main Roof Type', 'Survey: Main Roof Insulation', 'Survey: Main Roof Insulation Thickness',
|
||||
'Survey: Existing Primary Heating System',
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
"Organisation Reference": "Best Match - Organisation Reference",
|
||||
'Survey: Main Wall Type': 'Best Match - Survey: Main Wall Type',
|
||||
'Survey: Main Wall Insulation': 'Best Match - Survey: Main Wall Insulation',
|
||||
'Survey: Main Roof Type': 'Best Match - Survey: Main Roof Type',
|
||||
'Survey: Main Roof Insulation': 'Best Match - Survey: Main Roof Insulation',
|
||||
'Survey: Main Roof Insulation Thickness': 'Best Match - Survey: Main Roof Insulation Thickness',
|
||||
'Survey: Existing Primary Heating System': 'Best Match - Survey: Existing Primary Heating System',
|
||||
}
|
||||
),
|
||||
how="left",
|
||||
on="Best Match - Organisation Reference"
|
||||
)
|
||||
|
||||
# Finally, we have the property, against the mapped home with the estimate SAP scores and the EPC data
|
||||
worksheet = mapped_priority_list[
|
||||
[
|
||||
'Organisation Reference', 'Address', 'Postcode', 'Address ID', 'uprn', 'Archetype ID',
|
||||
'SAP', 'SAP Band', "Property Type", "Walls", "Roofs", 'Glazing',
|
||||
'Heating', 'Main Fuel', 'Hot Water', 'Estimated SAP Rating', 'Estimated EPC Rating'
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
"SAP": "Parity - SAP Rating",
|
||||
"SAP Band": "Parity - EPC Rating",
|
||||
"Property Type": "Parity - Property Type",
|
||||
"Walls": "Parity - Walls",
|
||||
"Roofs": "Parity - Roofs",
|
||||
'Glazing': "Parity - Glazing",
|
||||
'Heating': 'Parity - Heating',
|
||||
'Main Fuel': 'Parity - Main Fuel',
|
||||
'Hot Water': 'Parity - Hot Water',
|
||||
}
|
||||
).merge(
|
||||
epc_df[
|
||||
[
|
||||
"Organisation Reference",
|
||||
"uprn",
|
||||
"current-energy-efficiency",
|
||||
"current-energy-rating",
|
||||
"lodgement-date",
|
||||
"construction-age-band",
|
||||
"walls-description",
|
||||
"roof-description",
|
||||
"mainheat-description",
|
||||
"windows-description",
|
||||
"hotwater-description",
|
||||
"main-fuel",
|
||||
"total-floor-area",
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
"uprn": "Last EPC - uprn",
|
||||
"current-energy-efficiency": "Last EPC - SAP Score",
|
||||
"current-energy-rating": "Last EPC - EPC Rating",
|
||||
"lodgement-date": "Last EPC - Date Lodged",
|
||||
"construction-age-band": "Last EPC - Age Band",
|
||||
"walls-description": "Last EPC - Walls",
|
||||
"roof-description": "Last EPC - Roof",
|
||||
"mainheat-description": "Last EPC - Heating",
|
||||
"windows-description": "Last EPC - Windows",
|
||||
"hotwater-description": "Last EPC - Hot Water",
|
||||
"main-fuel": "Last EPC - Main Fuel",
|
||||
"total-floor-area": "Last EPC - Total Floor Area"
|
||||
}
|
||||
),
|
||||
how="left",
|
||||
on='Organisation Reference'
|
||||
)
|
||||
|
||||
worksheet["Years Since Last EPC"]
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ load_dotenv(dotenv_path="backend/.env")
|
|||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
|
||||
def get_data(asset_list, fulladdress_column, address1_column, postcode_column, manual_uprn_map):
|
||||
def get_data(asset_list, fulladdress_column, address1_column, postcode_column, manual_uprn_map, epc_api_only=True):
|
||||
epc_data = []
|
||||
errors = []
|
||||
no_epc = []
|
||||
|
|
@ -33,6 +33,11 @@ def get_data(asset_list, fulladdress_column, address1_column, postcode_column, m
|
|||
if house_no is None:
|
||||
house_no = house_number
|
||||
uprn = manual_uprn_map.get(full_address, None)
|
||||
if uprn is None and home.get("uprn"):
|
||||
uprn = home["uprn"]
|
||||
|
||||
if pd.isnull(uprn):
|
||||
uprn = None
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=str(house_no),
|
||||
|
|
@ -88,6 +93,15 @@ def get_data(asset_list, fulladdress_column, address1_column, postcode_column, m
|
|||
no_epc.append(home["row_id"])
|
||||
continue
|
||||
|
||||
if epc_api_only:
|
||||
epc = {
|
||||
"row_id": home["row_id"],
|
||||
**searcher.newest_epc.copy()
|
||||
}
|
||||
|
||||
epc_data.append(epc)
|
||||
continue
|
||||
|
||||
# Look for EPC recommendatons
|
||||
try:
|
||||
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
import os
|
||||
import PyPDF2
|
||||
from string import Template
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from survey_report.extraction.detect_report_type import detect_report_type
|
||||
from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor
|
||||
|
||||
|
|
@ -34,44 +37,54 @@ def handle():
|
|||
:return:
|
||||
"""
|
||||
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2"
|
||||
folders = [
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 4",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 5",
|
||||
]
|
||||
data = []
|
||||
for data_folder in folders:
|
||||
|
||||
folder_contents = os.listdir(data_folder)
|
||||
# We look for the following files:
|
||||
# Site notes
|
||||
file_mapping = {}
|
||||
for file in folder_contents:
|
||||
# Check if it's a pdf file
|
||||
if not file.endswith(".pdf"):
|
||||
continue
|
||||
filepath = os.path.join(data_folder, file)
|
||||
with (open(filepath, "rb") as f):
|
||||
pdf = PyPDF2.PdfReader(f)
|
||||
first_page = pdf.pages[0].extract_text()
|
||||
text = ""
|
||||
for page in pdf.pages:
|
||||
text += page.extract_text()
|
||||
folder_contents = os.listdir(data_folder)
|
||||
# We look for the following files:
|
||||
# Site notes
|
||||
file_mapping = {}
|
||||
for file in folder_contents:
|
||||
# Check if it's a pdf file
|
||||
if not file.endswith(".pdf"):
|
||||
continue
|
||||
filepath = os.path.join(data_folder, file)
|
||||
with (open(filepath, "rb") as f):
|
||||
pdf = PyPDF2.PdfReader(f)
|
||||
first_page = pdf.pages[0].extract_text()
|
||||
text = ""
|
||||
for page in pdf.pages:
|
||||
text += page.extract_text()
|
||||
|
||||
# Check the report type
|
||||
report_type = detect_report_type(first_page)
|
||||
if report_type is not None:
|
||||
file_mapping[report_type] = text
|
||||
# Check the report type
|
||||
report_type = detect_report_type(first_page)
|
||||
if report_type is not None:
|
||||
file_mapping[report_type] = text
|
||||
|
||||
# This is only set up to work with quido site notes so we must have it
|
||||
site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"])
|
||||
site_notes = site_notes_extractor.extract_all()
|
||||
# This is only set up to work with quido site notes so we must have it
|
||||
site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"])
|
||||
site_notes = site_notes_extractor.extract_all()
|
||||
|
||||
# We also must have an EPR
|
||||
epr_extractor = EPRExtractor(file_mapping["quidos_epr"])
|
||||
epr = epr_extractor.extract_all()
|
||||
# We also must have an EPR
|
||||
epr_extractor = EPRExtractor(file_mapping["quidos_epr"])
|
||||
epr = epr_extractor.extract_all()
|
||||
|
||||
# We now produce the combined data sheet which is the starting figure:
|
||||
data_sheet = {**epr, **site_notes}
|
||||
del data_sheet['Building Dimensions']
|
||||
# We unnest the Total Building Dimensions
|
||||
data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
|
||||
data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
|
||||
del data_sheet["Total Building Dimensions"]
|
||||
# We now produce the combined data sheet which is the starting figure:
|
||||
data_sheet = {**epr, **site_notes}
|
||||
del data_sheet['Building Dimensions']
|
||||
# We unnest the Total Building Dimensions
|
||||
data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
|
||||
data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
|
||||
del data_sheet["Total Building Dimensions"]
|
||||
data.append(data_sheet)
|
||||
data = pd.DataFrame(data)
|
||||
|
||||
# Generate the HTML report
|
||||
# Placeholder locations
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue