mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
additional data cleaning
This commit is contained in:
parent
33ea47e71d
commit
c68e4f017e
1 changed files with 4 additions and 9 deletions
|
|
@ -86,12 +86,8 @@ def extract_epr(pdf_path):
|
|||
data["Address"] = address_match.group(1).strip()
|
||||
|
||||
# Extract Total Floor Area
|
||||
area_match = re.search(r"Total Floor Area\s*(\d+ m2)", text)
|
||||
data["Total Floor Area"] = area_match.group(1)
|
||||
|
||||
# Extract Estimated Annual Costs
|
||||
cost_match = re.search(r"TOTAL\s*£(\d+)", text)
|
||||
data["Estimated Annual Costs"] = f"£{cost_match.group(1)}"
|
||||
# area_match = re.search(r"Total Floor Area\s*(\d+ m2)", text)
|
||||
# data["Total Floor Area"] = area_match.group(1)
|
||||
|
||||
# Extract Current SAP rating
|
||||
# Updated Regular Expression to find "GG (1-20)" followed by two numbers
|
||||
|
|
@ -216,6 +212,5 @@ def main():
|
|||
|
||||
extracted_data = pd.DataFrame(extracted_data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue