From fa5148228bac3819429d39b10b7edbbe8e75abfe Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Dec 2023 16:33:22 +0000 Subject: [PATCH] completed merge of ha15 and ha32 --- backend/SearchEpc.py | 32 +++++++++++++ etl/eligibility/ha_15_32/app.py | 55 ++++------------------- etl/eligibility/ha_15_32/requirements.txt | 4 +- 3 files changed, 43 insertions(+), 48 deletions(-) create mode 100644 backend/SearchEpc.py diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py new file mode 100644 index 00000000..41fbbb0f --- /dev/null +++ b/backend/SearchEpc.py @@ -0,0 +1,32 @@ +from epc_api.client import EpcClient + +class SearchEpc: + """ + Given address information about a home, this class is responsible for retrieving the EPC data associated + to the property. + + For a home, we might have address lines 1, 2, 3 and 4, as well as a postcode. + + Often, simply searching the EPC database with address line 1 and postcode will be enough to find + the property, but there are some cases where this is not true and we might need to utilise other + combinations about the home to find the property + """ + + def __init__(self, address1, postcode, address2=None, address3=None, address4=None): + """ + Address lines 1 and postcode are mandatory fields. The other address lines are optional + but can be used to find the epc for the home, if address1 and postcode are insufficient + :param address1: string, propery's address line 1 + :param postcode: string, propery's postcode + :param address2: string, optional, propery's address line 2 + :param address3: string, optional, propery's address line 3 + :param address4: string, optional, propery's address line 4 + """ + + self.address1 = address1 + self.postcode = postcode + self.address2 = address2 + self.address3 = address3 + self.address4 = address4 + + def search(self): diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index cf836439..90229801 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -155,7 +155,9 @@ def marge_ha_32(asset_list, identified_addresses): if missed.shape[0] != 36: raise ValueError("We have a problem here, investigate me, missings beyond the Hessle Road addresses") - # TODO: Finish me + # Finally, we return the data we need + + return merged_data, dropped_identified_merge_keys def merge_ha_15(asset_list, identified_addresses): @@ -303,55 +305,14 @@ def merge_ha_15(asset_list, identified_addresses): ~identified_addresses["merge_key"].isin(merged_data["merge_key4"]) ] - if ha == "ha32": - if missed.shape[0] != 36: - raise ValueError("We have a problem here, investigate me, missings beyond the Hessle Road addresses") + if missed.shape[0]: + raise ValueError("We have a problem here, investigate me, should not have any missings for ha15") - missed.shape - - missed["Address"].unique() - - len([m for m in missed["Address"].unique() if "Mary Mac" in m]) - - a = identified_addresses[ - identified_addresses["Address"].str.contains("103 Priory Crescent") - ] - b = asset_list[ - asset_list["Address Line 1"].str.contains("103 Priory Crescent") - ] - - a["merge_key"] - b["merge_key"] - b["merge_key2"] - b["merge_key3"] - - identified_addresses["merge_key"].isin(merged_data["merge_key"]) - - identified_addresses["merge_key"].isin(asset_list["merge_key"]).sum() - identified_addresses["merge_key2"].isin(asset_list["merge_key2"]).sum() - - # Find merge keys not in the identified addresses - - missed = identified_addresses[ - ~identified_addresses["merge_key"].isin(merged_data["merge_key"]) - ] - - identified_addresses[ - (identified_addresses["Address"].str.contains("Ashbury Court")) - ] - - asset_list[ - (asset_list["Street"].str.contains("Ashbury Court")) - ] - - # Drop the UPRN column from the merged data - merged_data = merged_data.drop(columns=["UPRN"]) - - return merged_data + return merged_data, dropped_identified_merge_keys def app(): ha32_asset_list, ha15_asset_list, ha32_identified_addresses, ha15_identified_addresses = load_data() - ha32 = marge_ha_32(asset_list=ha32_asset_list, identified_addresses=ha32_identified_addresses) - ha15 = merge_ha_15(asset_list=ha15_asset_list, identified_addresses=ha15_identified_addresses) + ha32, _ = marge_ha_32(asset_list=ha32_asset_list, identified_addresses=ha32_identified_addresses) + ha15, _ = merge_ha_15(asset_list=ha15_asset_list, identified_addresses=ha15_identified_addresses) diff --git a/etl/eligibility/ha_15_32/requirements.txt b/etl/eligibility/ha_15_32/requirements.txt index 1411a4a0..390d7de9 100644 --- a/etl/eligibility/ha_15_32/requirements.txt +++ b/etl/eligibility/ha_15_32/requirements.txt @@ -1 +1,3 @@ -pandas \ No newline at end of file +pandas +pydantic==1.10.11 +epc-api-python==1.0.2