fixing dupes

This commit is contained in:
Khalim Conn-Kowlessar 2024-08-19 16:54:19 +01:00
parent 34464267dc
commit 308e028605
2 changed files with 18 additions and 14 deletions

View file

@ -208,7 +208,7 @@ class PropertyTargetsModel(Base):
class PortfolioUsers(Base):
__table_args__ = "portfolioUsers"
__tablename__ = "portfolioUsers"
id = Column(Integer, primary_key=True, autoincrement=True)
userId = Column(Integer, ForeignKey('user.id'), nullable=False)
portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)

View file

@ -167,6 +167,9 @@ class Ownership:
file_key=self.combined_matching_lookup_pre_filter_filepath
)
# Prepare the final outputs:
self.create_final_matches()
def source_epc_properties(self, column_filters=None):
"""
This function will filter the epc data as specified by column filters, searching across all of the EPC tables
@ -556,22 +559,22 @@ class Ownership:
self.shared_freehold_match = shared_freehold_match
# finally, we create matched addresses
combined_matching_lookup = pd.concat([self.freehold_matching_lookup, self.leasehold_matching_lookup])
self.combined_matching_lookup = pd.concat([self.freehold_matching_lookup, self.leasehold_matching_lookup])
# Remove duplicates
combined_matching_lookup = self.remove_duplicate_matches(
matching_lookup=combined_matching_lookup,
self.combined_matching_lookup = self.remove_duplicate_matches(
matching_lookup=self.combined_matching_lookup,
properties=self.epc_data,
company_ownership=self.ownership_data
)
# We also have duplicates at a UPRN level
self.combined_matching_lookup = self.remove_duplicate_uprn_matches(
matching_lookup=combined_matching_lookup,
matching_lookup=self.combined_matching_lookup,
properties=self.epc_data,
company_ownership=self.ownership_data
)
self.matched_addresses = combined_matching_lookup.merge(
self.matched_addresses = self.combined_matching_lookup.merge(
self.epc_data[
[
"UPRN",
@ -859,7 +862,7 @@ class Ownership:
logger.info("Sucessfully completed land registry matching - merging onto matched_addresses")
# Merge onto the EPC - ownership matches
self.matched_addresses = self.matched_addresses.merge(
land_registry_matches,
self.land_registry_matches,
how="left",
left_on="UPRN",
right_on="uprn"
@ -944,21 +947,22 @@ class Ownership:
logger.info("Performing conservation area and listed/herigage building filtering")
portfolio_spatial_data = OpenUprnClient.get_spatial_data(
self.epc_data["UPRN"].tolist(), bucket_name="retrofit-data-dev"
matched_addresses_final["UPRN"].unique().tolist(), bucket_name="retrofit-data-dev"
)
portfolio_spatial_data = portfolio_spatial_data[
["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]
]
].copy()
portfolio_spatial_data["UPRN"] = portfolio_spatial_data["UPRN"].astype(str)
# Filter matched_addresses_final and filter combined_matching_lookup_final
matched_addresses_final = matched_addresses_final.merge(
portfolio_spatial_data, how="left", on="UPRN"
)
matched_addresses_final = matched_addresses_final[
~matched_addresses_final["conservation_status"] &
~matched_addresses_final["is_listed_building"] &
~matched_addresses_final["is_heritage_building"]
matched_addresses_final["conservation_status"].isin([None, False]) &
matched_addresses_final["is_listed_building"].isin([None, False]) &
matched_addresses_final["is_heritage_building"].isin([None, False])
]
# Filter combined_matching_lookup accordingly
@ -970,7 +974,7 @@ class Ownership:
combined_aggregate = self.aggregate_matches(
matching_lookup=combined_matching_lookup_final,
company_ownership=self.ownership_data,
properties=self.epc_paths
properties=self.epc_data
)
self.portfolio_owners = combined_aggregate[combined_aggregate["cumulative_value"] <= self.portfolio_value]
@ -983,7 +987,7 @@ class Ownership:
self.portfolio_epc_data = self.epc_data[self.epc_data["UPRN"].isin(self.portfolio_properties["UPRN"])]
logger.info("Storing final outptus")
logger.info("Storing final outpus")
# Store data
save_excel_to_s3(
df=self.portfolio_owners,