diff --git a/etl/epc_recommendations/find_my_epc_pipeline.py b/etl/epc_recommendations/find_my_epc_pipeline.py index 0915823d..df85f60a 100644 --- a/etl/epc_recommendations/find_my_epc_pipeline.py +++ b/etl/epc_recommendations/find_my_epc_pipeline.py @@ -20,7 +20,7 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str): postcode_search = SEARCH_POSTCODE_URL.format(postcode_input=postcode_input) postcode_response = requests.get(postcode_search, headers=headers) - postcode_res = BeautifulSoup(postcode_response.text) + postcode_res = BeautifulSoup(postcode_response.text, features="html.parser") address_links_full = postcode_res.findAll('a', {'class': 'govuk-link', 'rel': 'nofollow'}) address_links = {element.text.lstrip().rstrip(): BASE_ENERGY_URL + element['href'] for element in address_links_full} @@ -31,19 +31,19 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str): epc_certificate = chosen_epc.split('/')[-1] address_response = requests.get(chosen_epc, headers=headers) - address_res = BeautifulSoup(address_response.text) + address_res = BeautifulSoup(address_response.text, features="html.parser") - print("## Energy rating - current and potential") + # print("## Energy rating - current and potential") ratings = address_res.find('desc', {'id': 'svg-desc'}).text - print('### Current EPC rating') + # print('### Current EPC rating') current_rating = ratings.split(".")[0] - print("##### " + current_rating) + # print("##### " + current_rating) - print('### Potential EPC rating') + # print('### Potential EPC rating') potential_rating = ratings.split(".")[1] - print("##### " + potential_rating) + # print("##### " + potential_rating) new_property_df = pd.DataFrame( { @@ -91,10 +91,8 @@ def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str): for i, element in enumerate(zip(changes, changes_impact)): improvement_header = element[0].text - # print("#### " + improvement_header) improvement_text = element[1].text - # print(improvement_text) col_name = improvement_header.split(":")[1].strip() cost = element[1].find('dd', {"class": "govuk-summary-list__value"}).text.lstrip().rstrip()