From 38d80351e2929cd78d005e6db11874a5779b792f Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 17 May 2024 06:54:48 +0100 Subject: [PATCH] add previous assessors --- .../find_my_epc_pipeline.py | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/etl/epc_recommendations/find_my_epc_pipeline.py b/etl/epc_recommendations/find_my_epc_pipeline.py index df85f60a..88ac3827 100644 --- a/etl/epc_recommendations/find_my_epc_pipeline.py +++ b/etl/epc_recommendations/find_my_epc_pipeline.py @@ -68,10 +68,35 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str): new_property_df['assessor_number'] = assessor_number new_property_df['assessor_email'] = assessor_email + new_property_df['previous_assessor_details'] = None + + other_certificates_section = address_res.find("div", {'id': 'other_certificates_and_reports'}) + other_certificates_lines = other_certificates_section.find('dl', {"class": "govuk-summary-list"}) + other_certificates_links = other_certificates_lines.find_all('a') + + if len(other_certificates_links) > 0: + + previous_assesors_details_list = [] + + # Iterate and download previous assessors + for link in other_certificates_links: + previous_url = BASE_ENERGY_URL + link['href'] + previous_url_response = requests.get(previous_url, headers=headers) + previous_url_res = BeautifulSoup(previous_url_response.text, features="html.parser") + + assessor_block = previous_url_res.find('div', {'class': 'epc-contact-assessor'}) + assessor_fields = assessor_block.find_all('dd', {"class": 'govuk-summary-list__value govuk-!-width-one-half'}) + assessor_name = assessor_fields[0].text.strip() + assessor_number = assessor_fields[1].text.strip() + assessor_email = assessor_fields[2].text.strip() + + previous_assesors_details_list.append([assessor_name, assessor_number, assessor_email]) + + new_property_df['previous_assessor_details'] = [previous_assesors_details_list] + new_property_df['improvements'] = None new_property_df['improvements_sap'] = None new_property_df['improvements_cost'] = None - # return new_property_df # print('### Changes that can be made:') @@ -128,7 +153,7 @@ def main(): addresses_df['uprn'] = addresses_df['uprn'].astype(int) find_my_epc_data_list = [] - for i, row in tqdm(addresses_df.iterrows()): + for i, row in tqdm(addresses_df.tail(3).iterrows()): address_data = retrieve_find_my_epc_data( uprn=row['uprn'],