mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
add fix for missing uprns
This commit is contained in:
parent
c9891b8023
commit
982499962f
1 changed files with 22 additions and 9 deletions
|
|
@ -10,7 +10,7 @@ from tqdm import tqdm
|
|||
SEARCH_POSTCODE_URL = "https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode={postcode_input}"
|
||||
BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
|
||||
|
||||
def retrieve_find_my_epc_data(postcode: str, address: str):
|
||||
def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str):
|
||||
"""
|
||||
For a post code and address, we pull out all the required data from the find my epc website
|
||||
"""
|
||||
|
|
@ -46,12 +46,15 @@ def retrieve_find_my_epc_data(postcode: str, address: str):
|
|||
print("##### " + potential_rating)
|
||||
|
||||
new_property_df = pd.DataFrame(
|
||||
{'address': [address],
|
||||
'epc_certificate': [epc_certificate],
|
||||
'current_epc_rating': [current_rating.split(' ')[-6]],
|
||||
'current_epc_efficiency': [current_rating.split(' ')[-1]],
|
||||
'potential_epc_rating': [potential_rating.split(' ')[-6]],
|
||||
"potential_epc_efficiency": [potential_rating.split(' ')[-1]]}
|
||||
{
|
||||
'uprn': [uprn],
|
||||
'address': [address],
|
||||
'epc_certificate': [epc_certificate],
|
||||
'current_epc_rating': [current_rating.split(' ')[-6]],
|
||||
'current_epc_efficiency': [current_rating.split(' ')[-1]],
|
||||
'potential_epc_rating': [potential_rating.split(' ')[-6]],
|
||||
"potential_epc_efficiency": [potential_rating.split(' ')[-1]]
|
||||
}
|
||||
)
|
||||
|
||||
print("Find assessor")
|
||||
|
|
@ -109,11 +112,17 @@ def main():
|
|||
"""
|
||||
|
||||
# Load in list of properties
|
||||
addresses_df = pd.read_excel("places_for_people_EPC_data.xlsx")
|
||||
base_addresses_df = pd.read_excel("places_for_people_EPC_data.xlsx")
|
||||
|
||||
addresses_df = base_addresses_df[~base_addresses_df['uprn'].isnull()].copy().reset_index(drop=True)
|
||||
|
||||
addresses_df['uprn'] = addresses_df['uprn'].astype(int)
|
||||
|
||||
find_my_epc_data_list = []
|
||||
for i, row in tqdm(addresses_df.iterrows()):
|
||||
for i, row in tqdm(addresses_df.tail(3).iterrows()):
|
||||
|
||||
address_data = retrieve_find_my_epc_data(
|
||||
uprn=row['uprn'],
|
||||
postcode=row['POSTCODE'],
|
||||
address=row['Matched EPC Address']
|
||||
)
|
||||
|
|
@ -124,5 +133,9 @@ def main():
|
|||
|
||||
find_my_epc_data.to_parquet('find_my_epc_data.parquet')
|
||||
|
||||
final_df = pd.merge(left=base_addresses_df, right=find_my_epc_data, left_on='Matched EPC Address', right_on='address', how='left')
|
||||
|
||||
final_df.to_parquet('final_df.parquet')
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Reference in a new issue