diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index b153b624..8379cc2a 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -218,8 +218,9 @@ class AssetList: STANDARD_ADDRESS_1 = "domna_address_1" STANDARD_POSTCODE = "domna_postcode" STANDARD_FULL_ADDRESS = "domna_full_address" - STANDARD_YEAR_BUILT = "domna_year_built" + STANDARD_YEAR_BUILT = "landlord_year_built" STANDARD_UPRN = "ordnance_survey_uprn" + STANDARD_LANDLORD_PROPERTY_ID = "landlord_property_id" STANDARD_PROPERTY_TYPE = "landlord_property_type" STANDARD_WALL_CONSTRUCTION = "landlord_wall_construction" STANDARD_HEATING_SYSTEM = "landlord_heating_system" @@ -293,6 +294,8 @@ class AssetList: self.variable_mappings = {} + self.rename_map = {} + def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"): if method not in self.ADDRESS_1_CLEANING_METHODS: @@ -359,6 +362,25 @@ class AssetList: # We look for string in the form (x-y) return bool(cls.MULTI_UNIT_REGEX.search(address1_section)) + @staticmethod + def _convert_uprn(x): + """ + Used to convert UPRNS to integer strings + :param x: uprn to convert + :return: converted uprn + """ + + if pd.isnull(x): + return x + + # check if numeric + if np.isreal(x): + return str(int(x)) + + if str(x).isdigit(): + return str(int(x)) + return x + def init_standardise(self): """ This function is used to standardise the asset list @@ -411,6 +433,12 @@ class AssetList: # We create the domna property id self.create_property_id() + # Clean up the UPRN column, if the landlord has provided them + if self.landlord_uprn is not None: + self.standardised_asset_list[self.landlord_uprn] = ( + self.standardised_asset_list[self.landlord_uprn].apply(self._convert_uprn) + ) + # We keep just the columns we care about and will work through the various columns and standardise variables = [ self.landlord_property_id, @@ -425,7 +453,21 @@ class AssetList: self.landlord_heating_system, self.landlord_existing_pv ] - rename = {} + # Keep just non-null variables (e.g landlord may not provide uprn + variables = [v for v in variables if v is not None] + rename = { + self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID, + self.address1_colname: self.STANDARD_ADDRESS_1, + self.postcode_colname: self.STANDARD_POSTCODE, + self.full_address_colname: self.STANDARD_FULL_ADDRESS, + self.landlord_uprn: self.STANDARD_UPRN, + self.landlord_property_type: self.STANDARD_PROPERTY_TYPE, + self.landlord_year_built: self.STANDARD_YEAR_BUILT, + self.landlord_wall_construction: self.STANDARD_WALL_CONSTRUCTION, + self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM, + self.landlord_existing_pv: self.STANDARD_EXISTING_PV + } + rename = {k: v for k, v in rename.items() if k is not None} if self.non_intrusives_present: variables += self.NON_INTRUSIVES_COLNAMES @@ -538,5 +580,10 @@ class AssetList: ~self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated() ] + # Apply renames to our standard names + self.standardised_asset_list = self.standardised_asset_list.rename( + columns=self.rename_map + ) + def create_lookup_mappings(self): pass