From ce2a4665de675476216d1df6aaae9a362ef1d0e1 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 17 May 2024 06:13:28 +0100 Subject: [PATCH] add improvement detials --- .../find_my_epc_pipeline.py | 69 +++++++++++-------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/etl/epc_recommendations/find_my_epc_pipeline.py b/etl/epc_recommendations/find_my_epc_pipeline.py index 2b4a762e..0915823d 100644 --- a/etl/epc_recommendations/find_my_epc_pipeline.py +++ b/etl/epc_recommendations/find_my_epc_pipeline.py @@ -51,13 +51,13 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str): 'address': [address], 'epc_certificate': [epc_certificate], 'current_epc_rating': [current_rating.split(' ')[-6]], - 'current_epc_efficiency': [current_rating.split(' ')[-1]], + 'current_epc_efficiency': [int(current_rating.split(' ')[-1])], 'potential_epc_rating': [potential_rating.split(' ')[-6]], - "potential_epc_efficiency": [potential_rating.split(' ')[-1]] + "potential_epc_efficiency": [int(potential_rating.split(' ')[-1])] } ) - print("Find assessor") + print("Find assessor details") assessor_block = address_res.find('div', {'class': 'epc-contact-assessor'}) assessor_fields = assessor_block.find_all('dd', {"class": 'govuk-summary-list__value govuk-!-width-one-half'}) assessor_name = assessor_fields[0].text.strip() @@ -68,43 +68,54 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str): new_property_df['assessor_number'] = assessor_number new_property_df['assessor_email'] = assessor_email - return new_property_df + new_property_df['improvements'] = None + new_property_df['improvements_sap'] = None + new_property_df['improvements_cost'] = None + + # return new_property_df # print('### Changes that can be made:') - # improvements = address_res.find('div', {"class": "govuk-body printable-area epb-recommended-improvements"}) + improvements = address_res.find('div', {"class": "govuk-body printable-area epb-recommended-improvements"}) - # if improvements is None: - # print("No changes suggested") - # else: - # changes = improvements.find_all('h3') - # changes_impact = improvements.find_all('dl', {"class": 'govuk-summary-list'}) + if improvements is None: + print("No changes suggested") + return new_property_df + else: + changes = improvements.find_all('h3') + changes_impact = improvements.find_all('dl', {"class": 'govuk-summary-list'}) - # for element in zip(changes, changes_impact): - # improvement_header = element[0].text - # print("#### " + improvement_header) + sap_running_efficiency = new_property_df['current_epc_efficiency'].values[0] + changes_list = [] + changes_impact_list = [] + changes_cost_list = [] + for i, element in enumerate(zip(changes, changes_impact)): - # improvement_text = element[1].text - # print(improvement_text) + improvement_header = element[0].text + # print("#### " + improvement_header) + + improvement_text = element[1].text + # print(improvement_text) - # col_name = improvement_header.split(":")[1] - # cost = element[1].find('dd', {"class": "govuk-summary-list__value"}).text.lstrip().rstrip() + col_name = improvement_header.split(":")[1].strip() + cost = element[1].find('dd', {"class": "govuk-summary-list__value"}).text.lstrip().rstrip() - # impact = element[1].find('text', {"class": "govuk-!-font-weight-bold"}).text.split(" ") - # impact_num = impact[0] - # impact_cat = impact[1] - # print(cost) - # new_property_df[col_name] = True - # # cost_column = col_name + '-cost' - # # new_property_df.assign(cost_column=cost) - # new_property_df[col_name + '-cost'] = cost - # new_property_df[col_name + '-impact_num'] = impact_num - # new_property_df[col_name + '-impact_cat'] = impact_cat + impact = element[1].find('text', {"class": "govuk-!-font-weight-bold"}).text.split(" ") + impact_num = int(impact[0]) + impact_cat = impact[1] + sap_difference = impact_num - sap_running_efficiency - # data = pd.concat([data, new_property_df]) - # data.to_csv('./portfolio.csv') + changes_list.append(col_name) + changes_impact_list.append(sap_difference) + changes_cost_list.append(cost) + sap_running_efficiency = impact_num + new_property_df['improvements'] = [changes_list] + new_property_df['improvements_sap'] = [changes_impact_list] + new_property_df['improvements_cost'] = [changes_cost_list] + + return new_property_df def main(): """