mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
matched submissions
This commit is contained in:
parent
3cfe938e27
commit
83a1ac8cf3
2 changed files with 23 additions and 9 deletions
|
|
@ -5,7 +5,6 @@ import tiktoken
|
|||
from pprint import pprint
|
||||
from datetime import datetime
|
||||
|
||||
from docutils.utils.math.tex2mathml_extern import blahtexml
|
||||
from openai import OpenAI
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
|
@ -379,9 +378,10 @@ class AssetList:
|
|||
self.contact_details = None
|
||||
self.contact_detail_fields = None
|
||||
self.outcomes = None
|
||||
self.outcomes_no_match = None
|
||||
self.outcomes_no_match = pd.DataFrame()
|
||||
self.outcomes_for_output = pd.DataFrame()
|
||||
self.master_surveyed = None
|
||||
self.unmatched_submissions = pd.DataFrame()
|
||||
|
||||
# When this is True, we intend to break the programme into multiple phases. We may need to review
|
||||
# how this is structured in the future, as depending on how we get future data, we may need to
|
||||
|
|
@ -2249,6 +2249,7 @@ class AssetList:
|
|||
|
||||
logger.info("Getting masters and merging onto asset list")
|
||||
master_surveyed = []
|
||||
unmatched_submissions = []
|
||||
for filepath in master_filepaths:
|
||||
master_data = pd.read_csv(filepath)
|
||||
# Strip columns
|
||||
|
|
@ -2293,14 +2294,17 @@ class AssetList:
|
|||
axis=1
|
||||
)
|
||||
|
||||
postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code"
|
||||
house_no_col = 'NO.' if 'NO.' in master_data.columns else "NO"
|
||||
|
||||
# Otherwise, we need to match algorithmically
|
||||
logger.info("Matching master data to asset list")
|
||||
matched = []
|
||||
unmatched = []
|
||||
for _, row in tqdm(master_data.iterrows(), total=len(master_data)):
|
||||
if pd.isnull(row["POSTCODE"]):
|
||||
if pd.isnull(row[postcode_col]):
|
||||
continue
|
||||
postcode_no_space = row["POSTCODE"].strip().replace(" ", "").lower()
|
||||
postcode_no_space = row[postcode_col].strip().replace(" ", "").lower()
|
||||
|
||||
df = self.standardised_asset_list[
|
||||
(
|
||||
|
|
@ -2310,7 +2314,7 @@ class AssetList:
|
|||
)
|
||||
]
|
||||
|
||||
house_no = row["NO"]
|
||||
house_no = row[house_no_col]
|
||||
|
||||
if house_no in df["house_no"].values:
|
||||
df = df[df["house_no"] == house_no]
|
||||
|
|
@ -2326,7 +2330,7 @@ class AssetList:
|
|||
df = df[
|
||||
df[self.STANDARD_FULL_ADDRESS].str.lower().apply(
|
||||
lambda x: process.extractOne(
|
||||
" ".join([row["NO"], row["Street / Block Name"], row["TOWN"]]).lower(),
|
||||
" ".join([row[house_no_col], row["Street / Block Name"], row["TOWN"]]).lower(),
|
||||
x
|
||||
)[1]
|
||||
) > 90
|
||||
|
|
@ -2337,11 +2341,11 @@ class AssetList:
|
|||
continue
|
||||
|
||||
if any(df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(
|
||||
" ".join([row["NO"], row["Street / Block Name"]]).lower()
|
||||
" ".join([row[house_no_col], row["Street / Block Name"]]).lower()
|
||||
)):
|
||||
df = df[
|
||||
df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(
|
||||
" ".join([row["NO"], row["Street / Block Name"]]).lower()
|
||||
" ".join([row[house_no_col], row["Street / Block Name"]]).lower()
|
||||
)
|
||||
]
|
||||
|
||||
|
|
@ -2384,7 +2388,7 @@ class AssetList:
|
|||
unmatched_df = master_data[
|
||||
master_data["row_id"].isin(unmatched)
|
||||
]
|
||||
submissions_unmatched.append(unmatched_df)
|
||||
unmatched_submissions.append(unmatched_df)
|
||||
|
||||
master_surveyed = pd.concat(master_surveyed)
|
||||
master_surveyed = master_surveyed[~pd.isnull(master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID])]
|
||||
|
|
@ -2404,3 +2408,7 @@ class AssetList:
|
|||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID
|
||||
)
|
||||
|
||||
# Finally, we keep a record of the unmatched
|
||||
if unmatched_submissions:
|
||||
self.unmatched_submissions = pd.concat(unmatched_submissions)
|
||||
|
|
|
|||
|
|
@ -943,5 +943,11 @@ def app():
|
|||
if not asset_list.outcomes_for_output.empty:
|
||||
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
|
||||
|
||||
if not asset_list.unmatched_submissions.empty:
|
||||
asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
|
||||
|
||||
if not asset_list.outcomes_no_match.empty:
|
||||
asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
|
||||
|
||||
# Store the Hubspot export as a csv
|
||||
hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue