stonewater extracting age

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-12 16:19:52 +00:00
parent 959d29b675
commit 6396f081c1
3 changed files with 8 additions and 3 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -125,6 +125,7 @@ def extract_summary_report(pdf_path):
- Address
"""
blah
data = {
"Address": None,
"Postcode": None,
@ -701,6 +702,7 @@ def extract_epr(pdf_path):
"Primary Energy Use (kWh/yr)": None,
"Primary Energy Use Intensity (kWh/m2/yr)": None,
"Number of Storeys": None,
"Main Building Age Band": None,
"Fuel Bill": None,
"Window Age Description": None,
"Window Age Description Proportion (%)": None,
@ -779,6 +781,10 @@ def extract_epr(pdf_path):
floor_area = re.search(r"Total Floor Area\s(?P<floor_area>\d+)\s?m2", text).group("floor_area")
data["Primary Energy Use Intensity (kWh/m2/yr)"] = data["Primary Energy Use (kWh/yr)"] / int(floor_area)
# Extract age band
age_band_match = re.search(r"Building part:\s*Main\s*-\s*built in\s*(?:[A-Z]\s*)?(\d{4}-\d{4})", text)
data["Main Building Age Band"] = age_band_match.group(1)
# Extract Number of Storeys
storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
data["Number of Storeys"] = int(storeys_match.group(1))
@ -3022,7 +3028,6 @@ def revised_model():
# We now do a large pull of all of the data
extracted_data = []
for survey_folder in tqdm(survey_folders):
survey_folder_path = os.path.join(CUSTOMER_FOLDER_PATH, survey_folder)
# Check that the survey folder is actually a folder