mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
adding v1 extraction to stonewater
This commit is contained in:
parent
84d4070b49
commit
711db3f552
1 changed files with 48 additions and 5 deletions
|
|
@ -182,7 +182,10 @@ def extract_summary_report(pdf_path):
|
|||
data["Current SAP Rating"] = sap_match.group(1).split(" ")[1]
|
||||
|
||||
# Extract age
|
||||
age_band_match = re.search(r"3\.0 Date Built:\s*Main Property\s*[A-Z]?\s*(\d{4}-\d{4})", text)
|
||||
age_band_match = re.search(
|
||||
r"3\.0 Date Built:\s*Main Property\s*[A-Z]?\s*(\d{4}-\d{4}|before \d{4}|\d{4} onwards)",
|
||||
text
|
||||
)
|
||||
data["Main Building Age Band"] = age_band_match.group(1)
|
||||
|
||||
# Number of storeys
|
||||
|
|
@ -786,7 +789,11 @@ def extract_epr(pdf_path):
|
|||
data["Primary Energy Use Intensity (kWh/m2/yr)"] = data["Primary Energy Use (kWh/yr)"] / int(floor_area)
|
||||
|
||||
# Extract age band
|
||||
age_band_match = re.search(r"Building part:\s*Main\s*-\s*built in\s*(?:[A-Z]\s*)?(\d{4}-\d{4})", text)
|
||||
age_band_match = re.search(
|
||||
r"Building part:\s*Main\s*-\s*built in\s*(?:[A-Z]\s*)?(\d{4}-\d{4}|before \d{4}|\d{4} onwards)",
|
||||
text
|
||||
)
|
||||
|
||||
data["Main Building Age Band"] = age_band_match.group(1)
|
||||
|
||||
# Extract Number of Storeys
|
||||
|
|
@ -3065,8 +3072,21 @@ def revised_model():
|
|||
os.path.join(mtp_folder, file) for file in os.listdir(mtp_folder_path)
|
||||
if ".DS_Store" not in file and not os.path.isdir(os.path.join(mtp_folder_path, mtp_folder, file))
|
||||
]
|
||||
|
||||
has_v1 = [
|
||||
f for f in mtp_contents if "v1" in f.lower() or "/ss" in f.lower()
|
||||
]
|
||||
|
||||
if has_v1:
|
||||
# Then we go one level deeper
|
||||
mtp_contents = [
|
||||
os.path.join(has_v1[0], f) for f in
|
||||
os.listdir(os.path.join(survey_folder_path, has_v1[0]))
|
||||
]
|
||||
|
||||
# We check the the IMA
|
||||
for file_name in mtp_contents:
|
||||
|
||||
filepath = os.path.join(survey_folder_path, file_name)
|
||||
# We expect a pdf so try and parse it
|
||||
try:
|
||||
|
|
@ -3092,6 +3112,12 @@ def revised_model():
|
|||
has_pv = bool(pv_search)
|
||||
pv_system = pv_search.group(0) if has_pv else None
|
||||
|
||||
# We perform a second search for PV:
|
||||
if pv_search is None:
|
||||
pv_search = re.search("solar pv", text.lower())
|
||||
has_pv = bool(pv_search)
|
||||
pv_system = "Solar PV" if has_pv else None
|
||||
|
||||
rir_search = re.search(r"RIR \(\d+(\.\d+)?\)", text)
|
||||
has_rir = bool(rir_search)
|
||||
rir_spec = rir_search.group(0) if has_rir else None
|
||||
|
|
@ -3149,12 +3175,20 @@ def revised_model():
|
|||
extracted_data.append(summary_data)
|
||||
|
||||
retrofit_assessment_data = pd.DataFrame(extracted_data)
|
||||
mtp_df = pd.DataFrame(mtp_extracted_data)
|
||||
|
||||
# Save
|
||||
# retrofit_assessment_data.to_csv(
|
||||
# os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 3.csv"), index=False
|
||||
# )
|
||||
# mtp_df.to_csv(
|
||||
# os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/MTP Data Sheet 3.csv"), index=False
|
||||
# )
|
||||
retrofit_assessment_data = pd.read_csv(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 2.csv"),
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 3.csv"),
|
||||
)
|
||||
mtp_df = pd.read_csv(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/MTP Data Sheet 3.csv"),
|
||||
)
|
||||
|
||||
# Remove some definite duplicates
|
||||
|
|
@ -3164,6 +3198,9 @@ def revised_model():
|
|||
# Get all of the folders that end with ROSS
|
||||
to_drop = dupes[dupes["survey_folder"].str.endswith("ROSS")]["survey_folder"].unique().tolist()
|
||||
|
||||
# Replace \n with ""
|
||||
retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "")
|
||||
|
||||
retrofit_assessment_data = retrofit_assessment_data[
|
||||
~retrofit_assessment_data["survey_folder"].isin(
|
||||
[
|
||||
|
|
@ -3173,8 +3210,6 @@ def revised_model():
|
|||
] + to_drop
|
||||
)
|
||||
]
|
||||
# Replace \n with ""
|
||||
retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "")
|
||||
|
||||
retrofit_assessments_data_columns = [
|
||||
'Current SAP Rating', 'Current EPC Band', 'Primary Energy Use (kWh/yr)',
|
||||
|
|
@ -3685,9 +3720,17 @@ def revised_model():
|
|||
if not missed_asset_id.empty:
|
||||
raise Exception("Missing Asset ID")
|
||||
|
||||
# We merge the mpt data on to the wates coordination
|
||||
wates_coordination = wates_coordination.merge(
|
||||
mtp_df, how="left", on="survey_folder"
|
||||
)
|
||||
|
||||
ccs_coordination = ccs_coordination.merge(
|
||||
ccs_matching_lookup, how="left", on="Name"
|
||||
)
|
||||
ccs_coordination = ccs_coordination.merge(
|
||||
mtp_df, how="left", on="survey_folder"
|
||||
)
|
||||
|
||||
retrofit_packages_board = retrofit_packages_board.merge(
|
||||
matching_lookup, how="left", on="Name"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue