mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
debugging extract epr for old elmhurst epr
This commit is contained in:
parent
86deed8115
commit
ca7a0e9d10
2 changed files with 34 additions and 13 deletions
|
|
@ -747,12 +747,30 @@ def extract_epr(pdf_path):
|
|||
|
||||
# Extract Current and Potential SAP ratings
|
||||
sap_match = re.search(r"GG \(1-20\)\s*(\d{1,2})\s*(\d{1,2})", text)
|
||||
current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2))
|
||||
data["Current SAP Rating"] = current_sap
|
||||
if sap_match is None:
|
||||
# Handles the older format of the elmhurst EPR
|
||||
# The text will look something like this:
|
||||
# Least energy efficient - higher running costsD 61 - we extract D 61
|
||||
sap_match = re.search(
|
||||
r"(?P<current_epc>[A-G])\s(?P<current_sap>\d{1,3})(?P<potential_epc>[A-G])\s(?P<potential_sap>\d{1,3})",
|
||||
text)
|
||||
data["Current EPC Band"] = sap_match.group("current_epc")
|
||||
data["Current SAP Rating"] = int(sap_match.group("current_sap"))
|
||||
else:
|
||||
current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2))
|
||||
data["Current SAP Rating"] = current_sap
|
||||
|
||||
# Extract the primary energy use intensity
|
||||
additional_rating_match = re.search(r"Additional ratings for your home\s*([\d.]+)", text)
|
||||
data["Primary Energy Use Intensity (kWh/m2/yr)"] = float(additional_rating_match.group(1))
|
||||
if additional_rating_match:
|
||||
data["Primary Energy Use Intensity (kWh/m2/yr)"] = float(additional_rating_match.group(1))
|
||||
else:
|
||||
# Handles the older format of the Elmhurst EPR
|
||||
primary_energy_match = re.search(r"actual consumption\.\n(?P<primary_energy>\d+)", text)
|
||||
data["Primary Energy Use (kWh/yr)"] = int(primary_energy_match.group("primary_energy"))
|
||||
# We calculate the primary energy use intensity by dividing by floor area
|
||||
floor_area = re.search(r"Total Floor Area\s(?P<floor_area>\d+)\s?m2", text).group("floor_area")
|
||||
data["Primary Energy Use Intensity (kWh/m2/yr)"] = data["Primary Energy Use (kWh/yr)"] / int(floor_area)
|
||||
|
||||
# Extract Number of Storeys
|
||||
storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
|
||||
|
|
@ -2983,8 +3001,13 @@ def revised_model():
|
|||
# We now do a large pull of all of the data
|
||||
extracted_data = []
|
||||
for survey_folder in tqdm(survey_folders):
|
||||
|
||||
survey_folder_path = os.path.join(CUSTOMER_FOLDER_PATH, survey_folder)
|
||||
|
||||
# Check that the survey folder is actually a folder
|
||||
if not os.path.isdir(survey_folder_path):
|
||||
continue
|
||||
|
||||
# List the folders inside of the survey folder
|
||||
survey_subfolders = [
|
||||
name for name in os.listdir(survey_folder_path)
|
||||
|
|
|
|||
|
|
@ -162,19 +162,17 @@ def app():
|
|||
Property UPRN
|
||||
|
||||
"""
|
||||
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern"
|
||||
DATA_FILENAME = "January 2025 Additions Query.xlsx"
|
||||
SHEET_NAME = "Jan 2025 additions"
|
||||
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/For Housing"
|
||||
DATA_FILENAME = "For Housing Data pull.xlsx"
|
||||
SHEET_NAME = "Sheet1"
|
||||
POSTCODE_COLUMN = "Post Code"
|
||||
FULLADDRESS_COLUMN = "Street / Block Name"
|
||||
ADDRESS1_COLUMN = None
|
||||
ADDRESS1_METHOD = "first_word"
|
||||
ADDRESS_COLS_TO_CONCAT = []
|
||||
FULLADDRESS_COLUMN = None
|
||||
ADDRESS1_COLUMN = "NO."
|
||||
ADDRESS1_METHOD = None
|
||||
ADDRESS_COLS_TO_CONCAT = ["NO.", "Street / Block Name"]
|
||||
|
||||
# Maps addresses to uprn in problematic cases
|
||||
MANUAL_UPRN_MAP = {
|
||||
"Ardelagh Ardelagh Faris Lane Woodham Addlestone KT15 3DJ": 100061484560
|
||||
}
|
||||
MANUAL_UPRN_MAP = {}
|
||||
|
||||
asset_list = pd.read_excel(os.path.join(DATA_FOLDER, DATA_FILENAME), header=0, sheet_name=SHEET_NAME)
|
||||
asset_list = asset_list[~pd.isnull(asset_list[POSTCODE_COLUMN])].reset_index()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue