mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
chore(debug): summary_to_sap.py — Elmhurst Summary PDF -> our SAP + trail 🟪
Runs a Summary PDF through the chain-test path (pdftotext -> ElmhurstSiteNotesExtractor -> from_elmhurst_site_notes) into Sap10Calculator and dumps SAP + per-end-use kWh + the intermediate worksheet trail, for diffing our calc against the accompanying Elmhurst U985 worksheet PDF. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0851b48807
commit
02ef67fd8f
1 changed files with 94 additions and 0 deletions
94
scripts/summary_to_sap.py
Normal file
94
scripts/summary_to_sap.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
"""Elmhurst Summary PDF -> EpcPropertyData -> Sap10Calculator, with a dump of
|
||||
our SAP score + per-end-use kWh + the `intermediate` worksheet trail, for
|
||||
diffing against the accompanying Elmhurst worksheet PDF.
|
||||
|
||||
Usage:
|
||||
python -m scripts.summary_to_sap "<path to Summary_*.pdf>"
|
||||
|
||||
Reuses the exact preprocessing the Summary->EpcPropertyData chain test uses
|
||||
(`backend/documents_parser/tests/test_summary_pdf_mapper_chain.py`):
|
||||
`pdftotext -layout` -> Textract-style label/value stream -> extractor ->
|
||||
`from_elmhurst_site_notes` mapper.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from domain.sap10_calculator.calculator import Sap10Calculator
|
||||
from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_inputs
|
||||
|
||||
|
||||
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
|
||||
info = subprocess.run(
|
||||
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True
|
||||
).stdout
|
||||
m = re.search(r"Pages:\s+(\d+)", info)
|
||||
if m is None:
|
||||
raise RuntimeError(f"Could not parse page count from {pdf_path}")
|
||||
page_count = int(m.group(1))
|
||||
pages: list[str] = []
|
||||
for i in range(1, page_count + 1):
|
||||
layout = subprocess.run(
|
||||
["pdftotext", "-layout", "-f", str(i), "-l", str(i), str(pdf_path), "-"],
|
||||
capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
tokens: list[str] = []
|
||||
for line in layout.splitlines():
|
||||
if not line.strip():
|
||||
tokens.append("")
|
||||
continue
|
||||
tokens.extend(p for p in re.split(r"\s{2,}", line.strip()) if p)
|
||||
pages.append("\n".join(tokens))
|
||||
return pages
|
||||
|
||||
|
||||
def main(pdf: str) -> None:
|
||||
pdf_path = Path(pdf)
|
||||
pages = _summary_pdf_to_textract_style_pages(pdf_path)
|
||||
survey = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(survey)
|
||||
inp = cert_to_inputs(epc)
|
||||
r = Sap10Calculator().calculate(epc)
|
||||
p = epc.sap_building_parts[0] if epc.sap_building_parts else None
|
||||
|
||||
print(f"=== {pdf_path.name} ===")
|
||||
print(f"dwelling_type={epc.dwelling_type!r} property_type={epc.property_type!r} "
|
||||
f"age_band={p.construction_age_band if p else None} TFA={epc.total_floor_area_m2}")
|
||||
print(f"OUR SAP = {r.sap_score} ({r.sap_score_continuous:.4f}) "
|
||||
f"CO2={r.co2_kg_per_yr/1000:.3f} t/yr PEUI={r.primary_energy_kwh_per_m2:.1f}")
|
||||
print("--- per end use (kWh/yr) ---")
|
||||
print(f" space_heating useful = {r.space_heating_kwh_per_yr:.1f}")
|
||||
print(f" main_heating fuel = {r.main_heating_fuel_kwh_per_yr:.1f}")
|
||||
print(f" secondary fuel = {r.secondary_heating_fuel_kwh_per_yr:.1f}")
|
||||
print(f" hot_water = {r.hot_water_kwh_per_yr:.1f}")
|
||||
print(f" lighting = {r.lighting_kwh_per_yr:.1f}")
|
||||
print(f" pumps_fans = {r.pumps_fans_kwh_per_yr:.1f}")
|
||||
print(f" delivered fuel total = {r.intermediate.get('delivered_fuel_kwh_per_yr', float('nan')):.1f}")
|
||||
print("--- costs / rating ---")
|
||||
for k in ("main_heating_cost_gbp", "secondary_heating_cost_gbp", "hot_water_cost_gbp",
|
||||
"pumps_fans_cost_gbp", "lighting_cost_gbp", "ecf"):
|
||||
print(f" {k:28s} {r.intermediate.get(k, float('nan')):.4f}")
|
||||
print(f" is_off_peak={r.is_off_peak_meter} main_hrf={r.main_heating_high_rate_fraction} "
|
||||
f"hw_hrf={r.hot_water_high_rate_fraction:.4f} other_hrf={r.other_electricity_high_rate_fraction}")
|
||||
print(f" space £/kWh={inp.space_heating_fuel_cost_gbp_per_kwh} "
|
||||
f"hw £/kWh={inp.hot_water_fuel_cost_gbp_per_kwh} other £/kWh={inp.other_fuel_cost_gbp_per_kwh}")
|
||||
print("--- heat balance (intermediate) ---")
|
||||
for k in ("heat_transfer_coefficient_w_per_k", "heat_loss_parameter_w_per_m2k",
|
||||
"walls_w_per_k", "roof_w_per_k", "floor_w_per_k", "party_walls_w_per_k",
|
||||
"windows_w_per_k", "doors_w_per_k", "thermal_bridging_w_per_k",
|
||||
"infiltration_w_per_k", "infiltration_ach", "internal_gains_annual_avg_w",
|
||||
"mean_internal_temp_annual_avg_c", "useful_space_heating_kwh_per_yr"):
|
||||
print(f" {k:38s} {r.intermediate.get(k, float('nan')):.4f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print(__doc__)
|
||||
sys.exit(2)
|
||||
main(sys.argv[1])
|
||||
Loading…
Add table
Reference in a new issue