checking additional list

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-06 13:41:41 +00:00
parent ba3130b1c5
commit 7c4e32abc9
4 changed files with 280 additions and 2 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -1,4 +1,278 @@
import os
import time
import json
import pandas as pd
import numpy as np
from tqdm import tqdm
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from utils.s3 import read_from_s3, read_pickle_from_s3
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def get_data(asset_list):
epc_data = []
errors = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
try:
postcode = home["Postcode"]
house_number = home["Number"]
full_address = home["Full Address"]
searcher = SearchEpc(
address1=str(house_number),
postcode=postcode,
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
except:
property_recommendations = {"rows": []}
epc = {
"row_id": home["row_id"],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"]
}
epc_data.append(epc)
except Exception as e:
errors.append(home["row_id"])
time.sleep(5)
return epc_data, errors
def app():
"""
This code creates a list of cavity properties, for review
"""
archetyped_properties = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - "
"Archetyped V3.1.xlsx",
header=4
)
cavity_descriptions = [
"Cavity: AsBuilt (1983-1995)",
"Cavity: AsBuilt (Post 1995)",
"Cavity: AsBuilt (Pre 1976)",
"Cavity: AsBuilt (1976-1982)",
]
archetyped_properties["Is Cavity Property"] = archetyped_properties["Wall Type"].isin(cavity_descriptions)
# We also identify any properties where properties were found to need cavity wall insulation
costed_packages = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater - Costed Retrofit Packages "
"20241030 (WIP) Single Model V2.xlsx",
sheet_name="Modelled Packages",
header=13
)
needs_cwi = costed_packages[
costed_packages["Main Wall Insulation"].isin(
[
"Poss Extract CWI & Refill (issues identified)",
"CWI RdSAP Default"
]
)
][["Address ID", "Address", "Current SAP Rating", "Current EPC Band", "Postcode", "Archetype ID",
"Main Wall Insulation",
"Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"]]
# We flag these properties
archetyped_properties["Survey shows CWI needed for Archetype"] = archetyped_properties["Archetype ID"].isin(
needs_cwi["Archetype ID"]
)
archetyped_properties = archetyped_properties[~pd.isnull(archetyped_properties["Address ID"])]
archetyped_properties = archetyped_properties[archetyped_properties["Address ID"] != "Address ID"]
# this is the big list!!!
features = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
"master sheet.csv",
encoding='latin1'
)
features["Address ID"] = features["Address ID"].astype(str)
features_to_merge = features[
[
"Address ID", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating", "Main Fuel", "Hot Water",
"Renewables", "Total Floor Area"
]
]
stonewater_cavity_properties = archetyped_properties[
["Name", "Postcode", "Osm. ID", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no", "Street name",
"Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"]
].merge(
features_to_merge, how="left", on="Address ID"
)
# We filter this down to the properties that are cavity properties
stonewater_cavity_properties = stonewater_cavity_properties[
stonewater_cavity_properties["Is Cavity Property"] |
stonewater_cavity_properties["Survey shows CWI needed for Archetype"]
]
stonewater_cavity_properties["Reason Included"] = "As Built Cavity Property"
stonewater_cavity_properties["Reason Included"] = np.where(
stonewater_cavity_properties["Survey shows CWI needed for Archetype"] &
~stonewater_cavity_properties["Is Cavity Property"],
"Survey revealed potential need for CWI or extract and re-fill",
stonewater_cavity_properties["Reason Included"]
)
stonewater_cavity_properties["Reason Included"] = np.where(
stonewater_cavity_properties["Survey shows CWI needed for Archetype"] &
stonewater_cavity_properties["Is Cavity Property"],
"Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
stonewater_cavity_properties["Reason Included"]
)
# We indicate the exact properties that need CWI, based on survey findings
stonewater_cavity_properties["Reason Included"] = np.where(
stonewater_cavity_properties["Address ID"].isin(
needs_cwi[needs_cwi["Main Wall Insulation"] == "CWI RdSAP Default"]["Address ID"].astype(int).astype(
str).values
),
"Survey showed this property needs CWI",
stonewater_cavity_properties["Reason Included"]
)
stonewater_cavity_properties["Reason Included"] = np.where(
stonewater_cavity_properties["Address ID"].isin(
needs_cwi[needs_cwi["Main Wall Insulation"] == "Poss Extract CWI & Refill (issues identified)"][
"Address ID"].astype(int).astype(str).values
),
"Survey showed this property could need extract and re-fill",
stonewater_cavity_properties["Reason Included"]
)
# We get the EPC data
epc_data = json.loads(
read_from_s3(
bucket_name="retrofit-data-dev",
s3_file_name="customers/Stonewater/clustering/epc_data.json"
)
)
epc_data = pd.DataFrame(epc_data)
epc_data["uprn"] = np.where(
epc_data["internal_id"] == 1091,
83143766,
epc_data["uprn"]
)
epc_data_batch_2 = read_pickle_from_s3(
s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
bucket_name="retrofit-data-dev"
)
epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
complete_epcs = pd.concat([epc_data, epc_data_batch_2])
epcs_to_merge = complete_epcs[
[
"uprn",
"address",
"postcode",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
"energy-consumption-current"
]
].rename(
columns={
"address": "Address",
"postcode": "Postcode",
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)",
}
)
# We de-dupe, taking the newest on the date the EPC was lod
epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
# Merge the EPCs on, with the data we need
stonewater_cavity_properties = stonewater_cavity_properties.rename(
columns={
"Age": "Parity - Build Age",
"Property Type": "Parity - Property Type",
"Walls": "Parity - Wall Construction",
"Roofs": "Parity - Roof Construction",
"Glazing": "Parity - Glazing Type",
"Heating": "Parity - Heating Type",
"Main Fuel": "Parity - Main Fuel",
"Hot Water": "Parity - Hot Water",
"Renewables": "Parity - Renewables",
"Total Floor Area": "Parity - Total Floor Area"
}
).merge(
epcs_to_merge,
how="left",
left_on="UPRN",
right_on="uprn"
)
# We now flag the additional properties in the as built list
additional_properties = features[
~features["Address ID"].isin(archetyped_properties["Address ID"].values)
]
# Filter on as built cavity properties
additional_properties = additional_properties[
additional_properties["Walls"].isin(
cavity_descriptions +
["Cavity: FilledCavity", "Cavity: External", "Cavity: Internal"]
)
]
# Pull the EPCs for these properties
for _, home in tqdm(additional_properties.iterrows()):
full_address = home["Address"]
postcode = home["Postcode"]
address1 = full_address.split(",")[0]

View file

@ -2,3 +2,7 @@ PyPDF2
pandas
tqdm
openpyxl
boto3
epc-api-python==1.0.2
usaddress==0.5.11
fuzzywuzzy==0.18.0