add better logic

This commit is contained in:
Michael Duong 2024-05-16 18:48:56 +01:00
parent 856fd5d32c
commit 0da0d5480f

View file

@ -24,10 +24,8 @@ def retrieve_find_my_epc_data(postcode: str, address: str):
address_links_full = postcode_res.findAll('a', {'class': 'govuk-link', 'rel': 'nofollow'})
address_links = {element.text.lstrip().rstrip(): BASE_ENERGY_URL + element['href'] for element in address_links_full}
chosen_epc = address_links[address + ', ' + postcode]
# # TODO: get the address match working properly
# chosen_epc = address_links[list(address_links.keys())[2]]
index_of_address = [key.startswith(address) for key in list(address_links.keys())]
chosen_epc = address_links[list(address_links.keys())[np.where(index_of_address)[0][0]]]
epc_certificate = chosen_epc.split('/')[-1]
@ -110,21 +108,10 @@ def main():
"""
# Load in list of properties
# addresses = [
# {
# "postcode": "BB1 1XD",
# "address": "5 Wasdale Avenue, Blackburn"
# },
# {
# "postcode": "BB1 8ED",
# "address": "21 Carlton Road"
# }
# ]
addresses_df = pd.read_excel("places_for_people_EPC_data.xlsx")
find_my_epc_data_list = []
for i, row in addresses_df.head(2).iterrows():
for i, row in tqdm(addresses_df.iterrows()):
address_data = retrieve_find_my_epc_data(
postcode=row['POSTCODE'],
address=row['Matched EPC Address']
@ -132,15 +119,6 @@ def main():
find_my_epc_data_list.append(address_data)
# for address in tqdm(addresses):
# address_data = retrieve_find_my_epc_data(
# postcode=address['postcode'],
# address=address['address']
# )
# find_my_epc_data_list.append(address_data)
find_my_epc_data = pd.concat(find_my_epc_data_list)
find_my_epc_data.to_parquet('find_my_epc_data.parquet')