mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
updating stonewater
This commit is contained in:
parent
711db3f552
commit
b8a094106c
5 changed files with 89 additions and 58 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from dotenv import load_dotenv
|
|||
from utils.s3 import save_csv_to_s3
|
||||
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
|
||||
|
||||
PORTFOLIO_ID = 127
|
||||
PORTFOLIO_ID = 128
|
||||
USER_ID = 8
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
|
|
@ -19,9 +19,9 @@ def app():
|
|||
|
||||
asset_list = [
|
||||
{
|
||||
"address": "19 Hillcrest Court",
|
||||
"postcode": "IP21 4YJ",
|
||||
"uprn": 2630134524,
|
||||
"address": "46",
|
||||
"postcode": "BS6 7BD",
|
||||
"uprn": 61091,
|
||||
}
|
||||
]
|
||||
asset_list = pd.DataFrame(asset_list)
|
||||
|
|
@ -52,8 +52,8 @@ def app():
|
|||
|
||||
valuation_data = [
|
||||
{
|
||||
"uprn": 2630134524,
|
||||
"valuation": 96_000
|
||||
"uprn": 61091,
|
||||
"valuation": 897_000
|
||||
}
|
||||
]
|
||||
# Store valuation data to s3
|
||||
|
|
|
|||
|
|
@ -3028,11 +3028,12 @@ def revised_model():
|
|||
"10. Little Island",
|
||||
"11. CCS Dorset"
|
||||
]
|
||||
wave_21_folder_name = "Wave 2.1 Surveys - 2"
|
||||
|
||||
for wave_2_1_folder in wave_21_folders:
|
||||
folder_path = os.path.join(CUSTOMER_FOLDER_PATH, "Wave 2.1 Surveys", wave_2_1_folder)
|
||||
folder_path = os.path.join(CUSTOMER_FOLDER_PATH, wave_21_folder_name, wave_2_1_folder)
|
||||
if os.path.isdir(folder_path): # Check if folder exists
|
||||
folder_contents = [os.path.join("Wave 2.1 Surveys", wave_2_1_folder, file) for file in
|
||||
folder_contents = [os.path.join(wave_21_folder_name, wave_2_1_folder, file) for file in
|
||||
os.listdir(folder_path)]
|
||||
survey_folders.extend(folder_contents) # Append contents to the master list
|
||||
|
||||
|
|
@ -3179,18 +3180,32 @@ def revised_model():
|
|||
|
||||
# Save
|
||||
# retrofit_assessment_data.to_csv(
|
||||
# os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 3.csv"), index=False
|
||||
# os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 5.csv"), index=False
|
||||
# )
|
||||
# mtp_df.to_csv(
|
||||
# os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/MTP Data Sheet 3.csv"), index=False
|
||||
# os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/MTP Data Sheet 5.csv"), index=False
|
||||
# )
|
||||
retrofit_assessment_data = pd.read_csv(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 3.csv"),
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 5.csv"),
|
||||
)
|
||||
mtp_df = pd.read_csv(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/MTP Data Sheet 3.csv"),
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/MTP Data Sheet 5.csv"),
|
||||
)
|
||||
|
||||
# There are a few duplicates we just manually drop
|
||||
mtp_df = mtp_df.drop_duplicates()
|
||||
mtp_df = mtp_df[
|
||||
~((
|
||||
mtp_df["survey_folder"] == "Wave 2.1 Surveys - 2/1. Herefordshire/(043) Manor Fields 27"
|
||||
) & (~mtp_df["has_pv"]))
|
||||
]
|
||||
|
||||
mtp_df = mtp_df[
|
||||
~((
|
||||
mtp_df["survey_folder"] == "Wave 2.1 Surveys - 2/2. Bedfordshire/(147) Gilpin Close 5"
|
||||
) & (~mtp_df["has_pv"]))
|
||||
]
|
||||
|
||||
# Remove some definite duplicates
|
||||
dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].duplicated()]["Address"]
|
||||
dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].isin(dupes)]
|
||||
|
|
@ -3487,7 +3502,7 @@ def revised_model():
|
|||
ccs_coordination = ccs_coordination[ccs_coordination["Retrofit Assessment"] != "Outstanding"]
|
||||
|
||||
ccs_manual_filters = {
|
||||
"35 Kittiwake Close": "Wave 2.1 Surveys/11. CCS Dorset/Kittiwake Close 35"
|
||||
"35 Kittiwake Close": f"{wave_21_folder_name}/11. CCS Dorset/Kittiwake Close 35"
|
||||
}
|
||||
ccs_matching_lookup = []
|
||||
for _, home in tqdm(ccs_coordination.iterrows(), total=len(ccs_coordination)):
|
||||
|
|
@ -3583,13 +3598,13 @@ def revised_model():
|
|||
]
|
||||
|
||||
wates_manual_filters = {
|
||||
"24 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/24-25 Rabley Wood View",
|
||||
"14 Edencroft": "Wave 2.1 Surveys/3. Wiltshire/14 Edencroft",
|
||||
"Flat 31 Rabley Wood View": "Wave 2.1 Surveys/3. Wiltshire/Flat 31 Rabley Wood View",
|
||||
'Flat 13, Manor Fields': 'Wave 2.1 Surveys/1. Herefordshire/(038) Manor Fields Flat 13',
|
||||
"4 Kittys Lane": "Wave 2.1 Surveys/1. Herefordshire/(005) Kittys Lane 4",
|
||||
'1 Jephson Court': 'Wave 2.1 Surveys/5. Coventry/Jesphson Court 1',
|
||||
'2 Jephson Court': 'Wave 2.1 Surveys/5. Coventry/Jesphson Court 2',
|
||||
"24 Rabley Wood View": f"{wave_21_folder_name}/3. Wiltshire/24-25 Rabley Wood View",
|
||||
"14 Edencroft": f"{wave_21_folder_name}/3. Wiltshire/14 Edencroft",
|
||||
"Flat 31 Rabley Wood View": f"{wave_21_folder_name}/3. Wiltshire/Flat 31 Rabley Wood View",
|
||||
'Flat 13, Manor Fields': f'{wave_21_folder_name}/1. Herefordshire/(038) Manor Fields Flat 13',
|
||||
"4 Kittys Lane": f"{wave_21_folder_name}/1. Herefordshire/(005) Kittys Lane 4",
|
||||
'1 Jephson Court': f'{wave_21_folder_name}/5. Coventry/Jesphson Court 1',
|
||||
'2 Jephson Court': f'{wave_21_folder_name}/5. Coventry/Jesphson Court 2',
|
||||
}
|
||||
wates_matching_lookup = []
|
||||
# Examples to skip when we cannot get the data
|
||||
|
|
@ -3720,6 +3735,9 @@ def revised_model():
|
|||
if not missed_asset_id.empty:
|
||||
raise Exception("Missing Asset ID")
|
||||
|
||||
if wates_coordination["Asset ID_x"].duplicated().sum():
|
||||
raise Exception("Duplicated IDs in wates")
|
||||
|
||||
# We merge the mpt data on to the wates coordination
|
||||
wates_coordination = wates_coordination.merge(
|
||||
mtp_df, how="left", on="survey_folder"
|
||||
|
|
@ -3839,29 +3857,31 @@ def revised_model():
|
|||
|
||||
def find_nearest_matching_property(coordinated_packages, home):
|
||||
filter_levels = [
|
||||
["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
|
||||
["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
|
||||
["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
|
||||
["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"],
|
||||
["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
|
||||
["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"],
|
||||
(["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 1),
|
||||
(["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 2),
|
||||
(["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 3),
|
||||
(["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 4),
|
||||
(["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 5),
|
||||
(["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 6),
|
||||
]
|
||||
|
||||
for i, filters in enumerate(filter_levels):
|
||||
max_confidence = max([confidence for (_, confidence) in filter_levels])
|
||||
|
||||
for i, (filters, match_confidence) in enumerate(filter_levels):
|
||||
match = coordinated_packages.copy()
|
||||
|
||||
for col in filters:
|
||||
match = match[match[col] == home[col]]
|
||||
|
||||
if not match.empty:
|
||||
return match
|
||||
return match, match_confidence
|
||||
|
||||
# Finally, we search for a property in the same Archetype
|
||||
match = coordinated_packages[coordinated_packages["Archetype ID"] == home["Archetype ID"]]
|
||||
if not match.empty:
|
||||
return match
|
||||
return match, max_confidence + 1
|
||||
|
||||
return None # No match found
|
||||
return None, None # No match found
|
||||
|
||||
coordinated_packages["Postal Region"] = coordinated_packages["Postcode"].str.split(" ").str[0].str.strip()
|
||||
new_priority_postcodes["Postal Region"] = new_priority_postcodes["Postcode"].str.split(" ").str[0].str.strip()
|
||||
|
|
@ -3896,8 +3916,8 @@ def revised_model():
|
|||
]
|
||||
matches.extend(to_extend)
|
||||
continue
|
||||
|
||||
closest_match = find_nearest_matching_property(coordinated_packages, home)
|
||||
blah
|
||||
closest_match, match_confidence = find_nearest_matching_property(coordinated_packages, home)
|
||||
if closest_match is None:
|
||||
no_match.append(home["Organisation Reference"])
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -86,8 +86,14 @@ def download_data_from_sharepoint():
|
|||
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
|
||||
)
|
||||
|
||||
folders_to_keep = [
|
||||
"1. Herefordshire", "2. Bedfordshire", "3. Wiltshire", "4. Bournemouth",
|
||||
"5. Coventry", "6. West Sussex", "7. Dorset", "8. Cambridgeshire",
|
||||
"9. Guildford", "10. Little Island", "11. CCS Dorset",
|
||||
]
|
||||
|
||||
folders_to_pull = [
|
||||
folder for folder in contents["value"] if folder["name"] in ["3. Wiltshire", "4. Bournemouth", "5. Coventry"]
|
||||
folder for folder in contents["value"] if folder["name"] in folders_to_keep
|
||||
]
|
||||
for folder_to_pull in folders_to_pull:
|
||||
# Get the contents
|
||||
|
|
@ -109,35 +115,40 @@ def download_data_from_sharepoint():
|
|||
)
|
||||
if not property_folder_contents.get("value"):
|
||||
continue
|
||||
# We look for the retrofit assessment folder:
|
||||
# We look for the retrofit assessment folder or mtp folders:
|
||||
property_sub_folders = [
|
||||
f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower()
|
||||
f for f in property_folder_contents["value"] if
|
||||
"ra coordinator info" in f["name"].lower() or
|
||||
"retrofit assessment" in f["name"].lower() or
|
||||
"ra info" in f["name"].lower() or
|
||||
"mtp" in f["name"].lower() or
|
||||
"mid-term" in f["name"].lower()
|
||||
]
|
||||
|
||||
if not property_sub_folders:
|
||||
continue
|
||||
|
||||
# if we have this, we download the folder and store it on my laptop!
|
||||
property_sub_folder = property_sub_folders[0]
|
||||
for property_sub_folder in property_sub_folders:
|
||||
# if we have this, we download the folder and store it on my laptop!
|
||||
|
||||
property_folder_path = os.path.join(
|
||||
"Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
|
||||
folder_to_pull["name"],
|
||||
property_folder["name"],
|
||||
property_sub_folder["name"]
|
||||
)
|
||||
property_folder_path = os.path.join(
|
||||
"Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
|
||||
folder_to_pull["name"],
|
||||
property_folder["name"],
|
||||
property_sub_folder["name"]
|
||||
)
|
||||
|
||||
download_dir = os.path.join(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys",
|
||||
folder_to_pull["name"],
|
||||
property_folder["name"],
|
||||
property_sub_folder["name"]
|
||||
)
|
||||
download_dir = os.path.join(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys - 2",
|
||||
folder_to_pull["name"],
|
||||
property_folder["name"],
|
||||
property_sub_folder["name"]
|
||||
)
|
||||
|
||||
# We download the folder
|
||||
sharepoint_client.download_sharepoint_folder(
|
||||
drive_id=sharepoint_client.document_drive["id"],
|
||||
folder_path=property_folder_path,
|
||||
download_dir=download_dir,
|
||||
excluded_file_types=["MOV", "jpg"]
|
||||
)
|
||||
# We download the folder
|
||||
sharepoint_client.download_sharepoint_folder(
|
||||
drive_id=sharepoint_client.document_drive["id"],
|
||||
folder_path=property_folder_path,
|
||||
download_dir=download_dir,
|
||||
excluded_file_types=["MOV", "jpg"]
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue