mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fixing dupes
This commit is contained in:
parent
34464267dc
commit
308e028605
2 changed files with 18 additions and 14 deletions
|
|
@ -208,7 +208,7 @@ class PropertyTargetsModel(Base):
|
|||
|
||||
|
||||
class PortfolioUsers(Base):
|
||||
__table_args__ = "portfolioUsers"
|
||||
__tablename__ = "portfolioUsers"
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
userId = Column(Integer, ForeignKey('user.id'), nullable=False)
|
||||
portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
|
||||
|
|
|
|||
|
|
@ -167,6 +167,9 @@ class Ownership:
|
|||
file_key=self.combined_matching_lookup_pre_filter_filepath
|
||||
)
|
||||
|
||||
# Prepare the final outputs:
|
||||
self.create_final_matches()
|
||||
|
||||
def source_epc_properties(self, column_filters=None):
|
||||
"""
|
||||
This function will filter the epc data as specified by column filters, searching across all of the EPC tables
|
||||
|
|
@ -556,22 +559,22 @@ class Ownership:
|
|||
self.shared_freehold_match = shared_freehold_match
|
||||
|
||||
# finally, we create matched addresses
|
||||
combined_matching_lookup = pd.concat([self.freehold_matching_lookup, self.leasehold_matching_lookup])
|
||||
self.combined_matching_lookup = pd.concat([self.freehold_matching_lookup, self.leasehold_matching_lookup])
|
||||
|
||||
# Remove duplicates
|
||||
combined_matching_lookup = self.remove_duplicate_matches(
|
||||
matching_lookup=combined_matching_lookup,
|
||||
self.combined_matching_lookup = self.remove_duplicate_matches(
|
||||
matching_lookup=self.combined_matching_lookup,
|
||||
properties=self.epc_data,
|
||||
company_ownership=self.ownership_data
|
||||
)
|
||||
# We also have duplicates at a UPRN level
|
||||
self.combined_matching_lookup = self.remove_duplicate_uprn_matches(
|
||||
matching_lookup=combined_matching_lookup,
|
||||
matching_lookup=self.combined_matching_lookup,
|
||||
properties=self.epc_data,
|
||||
company_ownership=self.ownership_data
|
||||
)
|
||||
|
||||
self.matched_addresses = combined_matching_lookup.merge(
|
||||
self.matched_addresses = self.combined_matching_lookup.merge(
|
||||
self.epc_data[
|
||||
[
|
||||
"UPRN",
|
||||
|
|
@ -859,7 +862,7 @@ class Ownership:
|
|||
logger.info("Sucessfully completed land registry matching - merging onto matched_addresses")
|
||||
# Merge onto the EPC - ownership matches
|
||||
self.matched_addresses = self.matched_addresses.merge(
|
||||
land_registry_matches,
|
||||
self.land_registry_matches,
|
||||
how="left",
|
||||
left_on="UPRN",
|
||||
right_on="uprn"
|
||||
|
|
@ -944,21 +947,22 @@ class Ownership:
|
|||
logger.info("Performing conservation area and listed/herigage building filtering")
|
||||
|
||||
portfolio_spatial_data = OpenUprnClient.get_spatial_data(
|
||||
self.epc_data["UPRN"].tolist(), bucket_name="retrofit-data-dev"
|
||||
matched_addresses_final["UPRN"].unique().tolist(), bucket_name="retrofit-data-dev"
|
||||
)
|
||||
|
||||
portfolio_spatial_data = portfolio_spatial_data[
|
||||
["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]
|
||||
]
|
||||
].copy()
|
||||
portfolio_spatial_data["UPRN"] = portfolio_spatial_data["UPRN"].astype(str)
|
||||
|
||||
# Filter matched_addresses_final and filter combined_matching_lookup_final
|
||||
matched_addresses_final = matched_addresses_final.merge(
|
||||
portfolio_spatial_data, how="left", on="UPRN"
|
||||
)
|
||||
matched_addresses_final = matched_addresses_final[
|
||||
~matched_addresses_final["conservation_status"] &
|
||||
~matched_addresses_final["is_listed_building"] &
|
||||
~matched_addresses_final["is_heritage_building"]
|
||||
matched_addresses_final["conservation_status"].isin([None, False]) &
|
||||
matched_addresses_final["is_listed_building"].isin([None, False]) &
|
||||
matched_addresses_final["is_heritage_building"].isin([None, False])
|
||||
]
|
||||
|
||||
# Filter combined_matching_lookup accordingly
|
||||
|
|
@ -970,7 +974,7 @@ class Ownership:
|
|||
combined_aggregate = self.aggregate_matches(
|
||||
matching_lookup=combined_matching_lookup_final,
|
||||
company_ownership=self.ownership_data,
|
||||
properties=self.epc_paths
|
||||
properties=self.epc_data
|
||||
)
|
||||
|
||||
self.portfolio_owners = combined_aggregate[combined_aggregate["cumulative_value"] <= self.portfolio_value]
|
||||
|
|
@ -983,7 +987,7 @@ class Ownership:
|
|||
|
||||
self.portfolio_epc_data = self.epc_data[self.epc_data["UPRN"].isin(self.portfolio_properties["UPRN"])]
|
||||
|
||||
logger.info("Storing final outptus")
|
||||
logger.info("Storing final outpus")
|
||||
# Store data
|
||||
save_excel_to_s3(
|
||||
df=self.portfolio_owners,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue