adding the assessment information to retrieve_find_my_epc_data

This commit is contained in:
Khalim Conn-Kowlessar 2024-09-30 11:53:04 +01:00
parent ceb003ec7a
commit 591b952225
3 changed files with 86 additions and 4 deletions

View file

@ -100,9 +100,44 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str, expected_e
bills = address_res.find('div', {'id': 'bills-affected'})
bills_list = bills.find_all('li')
if not bills_list:
return None
heating_text = bills_list[0].text
hot_water_text = bills_list[1].text
# If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
heating_text = None
hot_water_text = None
else:
heating_text = bills_list[0].text
hot_water_text = bills_list[1].text
# Search for the assessment informaton
assessment_information = address_res.find('div', {'id': 'information'})
# Parse this information
rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
# Create a dictionary to hold the parsed information
assessment_data = {}
for row in rows:
key = row.find('dt').text.strip()
if key == "Type of assessment":
# We dont reliably extract this
continue
value_tag = row.find('dd')
# Check if value contains a link (email)
if value_tag.find('a'):
value = value_tag.find('a').text.strip()
elif value_tag.find('summary'):
value = value_tag.find('span').text.strip()
else:
value = value_tag.text.strip()
assessment_data[key] = value
expected_keys = [
'Assessors name', 'Telephone', 'Email', 'Accreditation scheme', 'Assessors ID', 'Assessors declaration',
'Date of assessment', 'Date of certificate'
]
# Check we have all the expected keys
for key in expected_keys:
if key not in assessment_data:
raise ValueError(f"Missing key: {key}")
resulting_data = {
'extracted_uprn': uprn,
@ -114,6 +149,7 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str, expected_e
"potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
"heating_text": heating_text,
"hot_water_text": hot_water_text,
**assessment_data
}
return resulting_data

View file

@ -0,0 +1,41 @@
import pandas as pd
import numpy as np
import time
from tqdm import tqdm
from etl.bill_savings.data_collection import retrieve_find_my_epc_data, calculate_expiry_date
def main():
"""
This script handles pulling the surveyor names and acreditation details for Surveyors who have completed
the newest EPC for AIHA's properties
"""
epc_data = pd.read_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/epc_data.csv")
epc_data = epc_data[["uprn", "address", "address1", "postcode", "lodgement-date"]]
epc_collected_data = []
for _, unit in tqdm(epc_data.iterrows(), total=len(epc_data)):
time.sleep(np.random.uniform(0.2, 1.5))
uprn = int(unit["uprn"])
address = unit["address1"]
postcode = unit["postcode"]
expected_expiry_date = calculate_expiry_date(unit["lodgement-date"])
response = retrieve_find_my_epc_data(
uprn=uprn,
postcode=postcode,
address=address,
expected_expiry_date=expected_expiry_date
)
if response is None:
raise Exception("fix me")
epc_collected_data.append(response)
epc_collected_data = pd.DataFrame(epc_collected_data)
for x in epc_collected_data:
keys = x.keys()
# Check for None keys
if any(k is None for k in keys):
frew

View file

@ -120,10 +120,15 @@ class MainheatControlAttributes(Definitions):
"rheoli'r t l llaw": "manual charge control",
}
NO_DATA_DESCRIPTIONS = [
"SAP05:Main-Heating-Controls",
"SAP:Main-Heating-Controls",
]
def __init__(self, description: str):
self.description: str = clean_description(description.lower()).strip()
self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or (
description == "SAP05:Main-Heating-Controls"
description in self.NO_DATA_DESCRIPTIONS
)
translation = self.WELSH_TEXT.get(self.description)