diff --git a/.idea/Model.iml b/.idea/Model.iml
index df6c4faa..96ad7a95 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index af5a3faf..3b5535d5 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -382,6 +382,8 @@ class AssetList:
self.outcomes_for_output = pd.DataFrame()
self.master_surveyed = None
self.unmatched_submissions = pd.DataFrame()
+ self.ecosurv = None
+ self.ecosurv_no_match = pd.DataFrame()
# When this is True, we intend to break the programme into multiple phases. We may need to review
# how this is structured in the future, as depending on how we get future data, we may need to
@@ -1114,7 +1116,7 @@ class AssetList:
def identify_worktypes(self, cleaned):
- if self.STANDARD_SAP is not None:
+ if self.landlord_sap is not None:
# We add a SAP category for all work type identification
self.standardised_asset_list["SAP Category"] = np.where(
(
@@ -1135,16 +1137,22 @@ class AssetList:
)
else:
# We add a SAP category for all work type identification
+ # We break into 4 categories (54 or less, 55-68, 69-74, 75 or more)
+
self.standardised_asset_list["SAP Category"] = np.where(
- self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68,
- "SAP Rating 68 or less",
+ (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 54),
+ "SAP Rating 54 or less",
np.where(
- (
- self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <=
- self.EMPTY_CAVITY_SAP_THRESHOLD
+ (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68),
+ "SAP Rating 55-68",
+ np.where(
+ (
+ self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <=
+ self.EMPTY_CAVITY_SAP_THRESHOLD
+ ),
+ f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}",
+ f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more"
),
- f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}",
- f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more"
)
)
@@ -1406,7 +1414,12 @@ class AssetList:
elif self.old_format_non_intrusives_present:
self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = (
self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin(
- ["retro drilled", "retro filled", "ewi", "retro drilled/ solid"]
+ [
+ "retro drilled", "retro filled", "ewi", "retro drilled/ solid", "retro drilled and filled",
+ ]
+ ) |
+ self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().str.contains(
+ "retro drilled"
)
)
else:
@@ -1565,13 +1578,6 @@ class AssetList:
solar_roof_meets_criteria
)
- # We shouldn't have an overlap
- if (
- self.standardised_asset_list["solar_eligible"] &
- self.standardised_asset_list["solar_eligible_needs_heating_upgrade"]
- ).sum():
- raise ValueError("Both heating upgrade and no heating upgrade are true - this should not be possible")
-
# We check for a specific sub-set of properties which are uninsulated solid wall properties that are EPC E
# or below (we'll use 57 as a threshold) - These are for a pilot with Net Zero Renewables
self.standardised_asset_list["solar_eligible_solid_wall_uninsulated"] = (
@@ -1617,27 +1623,58 @@ class AssetList:
)
# We break the cavity reason into a few different categories, when the EPC is different from inspections
- self.standardised_asset_list["cavity_reason"] = np.where(
- (
- self.standardised_asset_list["epc_indicates_empty_cavity"] &
- ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
- (self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") &
- pd.isnull(self.standardised_asset_list["cavity_reason"])
- ),
- "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list["SAP Category"],
- self.standardised_asset_list["cavity_reason"]
- )
+ if self.old_format_non_intrusives_present:
+ self.standardised_asset_list["cavity_reason"] = np.where(
+ (
+ self.standardised_asset_list["epc_indicates_empty_cavity"] &
+ ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
+ (self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin(
+ [
+ "retro drilled and filled", "retro drilled", "retro filled", "retro drilled & filled",
+ ]
+ )) &
+ pd.isnull(self.standardised_asset_list["cavity_reason"])
+ ),
+ "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[
+ "SAP Category"],
+ self.standardised_asset_list["cavity_reason"]
+ )
- self.standardised_asset_list["cavity_reason"] = np.where(
- (
- self.standardised_asset_list["epc_indicates_empty_cavity"] &
- ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
- (self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") &
- pd.isnull(self.standardised_asset_list["cavity_reason"])
- ),
- "EPC Shows Empty Cavity, inspections show filled at build: " + self.standardised_asset_list["SAP Category"],
- self.standardised_asset_list["cavity_reason"]
- )
+ self.standardised_asset_list["cavity_reason"] = np.where(
+ (
+ self.standardised_asset_list["epc_indicates_empty_cavity"] &
+ ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
+ self.standardised_asset_list['non_intrusive_indicates_cavity_extraction'] &
+ pd.isnull(self.standardised_asset_list["cavity_reason"])
+ ),
+ "EPC Shows Empty Cavity, inspections show filled or other: " + self.standardised_asset_list[
+ "SAP Category"],
+ self.standardised_asset_list["cavity_reason"]
+ )
+ else:
+ self.standardised_asset_list["cavity_reason"] = np.where(
+ (
+ self.standardised_asset_list["epc_indicates_empty_cavity"] &
+ ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
+ (self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") &
+ pd.isnull(self.standardised_asset_list["cavity_reason"])
+ ),
+ "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list[
+ "SAP Category"],
+ self.standardised_asset_list["cavity_reason"]
+ )
+
+ self.standardised_asset_list["cavity_reason"] = np.where(
+ (
+ self.standardised_asset_list["epc_indicates_empty_cavity"] &
+ ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
+ (self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") &
+ pd.isnull(self.standardised_asset_list["cavity_reason"])
+ ),
+ "EPC Shows Empty Cavity, inspections show filled at build: " + self.standardised_asset_list[
+ "SAP Category"],
+ self.standardised_asset_list["cavity_reason"]
+ )
self.standardised_asset_list["cavity_reason"] = np.where(
(
@@ -1682,7 +1719,7 @@ class AssetList:
solar_reason_map = {
"solar_eligible": "Solar Eligible: ",
"solar_eligible_needs_heating_upgrade": (
- "Solar Eligible, Solid Floor, Needs Heating Upgrade: "
+ "Solar Eligible, Needs Heating Upgrade: "
),
"solar_eligible_solid_wall_uninsulated": "Solar Eligible, Solid Wall Uninsulated, EPC E or Below: ",
}
@@ -1695,34 +1732,46 @@ class AssetList:
)
# Flag anything that has existing outcomes
- if (self.outcomes is not None) and ("Surveyed" in self.standardised_asset_list.columns):
+ if (self.outcomes is not None) and ("surveyed" in self.standardised_asset_list.columns):
- if "Installer Refusal" not in self.standardised_asset_list.columns:
+ if "installer refusal" not in self.standardised_asset_list.columns:
self.standardised_asset_list["cavity_reason"] = np.where(
(
- (self.standardised_asset_list["Surveyed"] > 0)
+ (self.standardised_asset_list["surveyed"] > 0)
),
None,
self.standardised_asset_list["cavity_reason"]
)
else:
- self.standardised_asset_list["cavity_reason"] = np.where(
- (
- (self.standardised_asset_list["Surveyed"] > 0) |
- (self.standardised_asset_list["Installer Refusal"] > 0)
- ),
- None,
- self.standardised_asset_list["cavity_reason"]
- )
+ for col in ["cavity_reason", "solar_reason"]:
+ self.standardised_asset_list[col] = np.where(
+ (
+ (self.standardised_asset_list["surveyed"] > 0) |
+ (self.standardised_asset_list["installer refusal"] > 0)
+ ),
+ None,
+ self.standardised_asset_list[col]
+ )
if self.master_surveyed is not None:
- self.standardised_asset_list["cavity_reason"] = np.where(
- (
- (~pd.isnull(self.standardised_asset_list["submission_date"]))
- ),
- None,
- self.standardised_asset_list["cavity_reason"]
- )
+ for col in ["cavity_reason", "solar_reason"]:
+ self.standardised_asset_list[col] = np.where(
+ (
+ (~pd.isnull(self.standardised_asset_list["submission_date"]))
+ ),
+ None,
+ self.standardised_asset_list[col]
+ )
+
+ if self.ecosurv is not None:
+ for col in ["cavity_reason", "solar_reason"]:
+ self.standardised_asset_list[col] = np.where(
+ (
+ (~pd.isnull(self.standardised_asset_list["ecosurv_reference"]))
+ ),
+ None,
+ self.standardised_asset_list[col]
+ )
blocks_of_flats = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
@@ -2081,6 +2130,104 @@ class AssetList:
self.hubspot_data = programme_data
+ def flag_ecosurv(self, ecosurv_landlords=None):
+
+ """
+ This class will match ecosurv data to the asset list
+ :return:
+ """
+ if ecosurv_landlords is None:
+ return
+
+ # TODO: Fetch from Sharepoint
+ ecosurv_filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Ecosurv/15.04.csv"
+ logger.info("Getting Ecosurv data from %s", ecosurv_filepath)
+ self.ecosurv = pd.read_csv(
+ ecosurv_filepath,
+ encoding="cp437"
+ )
+
+ landlords = self.ecosurv["Landlord"].value_counts().reset_index(drop=False)
+ landlord_references = landlords[
+ landlords["Landlord"].str.lower().str.contains(ecosurv_landlords)
+ ]
+
+ landlord_ecosurv_data = self.ecosurv[
+ self.ecosurv["Landlord"].isin(landlord_references["Landlord"].values)
+ ]
+
+ # Try and match to asset list
+ matched = []
+ unmatched = []
+ for _, row in tqdm(landlord_ecosurv_data.iterrows(), total=landlord_ecosurv_data.shape[0]):
+ postcode = row["Postcode"].lower()
+ df = self.standardised_asset_list[
+ (
+ self.standardised_asset_list[self.STANDARD_POSTCODE].str.replace(" ", "").str.lower() ==
+ postcode
+ )
+ ].copy()
+
+ if df.empty:
+ unmatched.append(row["Reference"])
+ continue
+
+ if df.shape[0] > 1:
+ house_no = SearchEpc.get_house_number(row["Address Line 1"], row["Postcode"])
+ df["house_no"] = df.apply(
+ lambda x: SearchEpc.get_house_number(
+ str(x[self.STANDARD_ADDRESS_1]), x[self.STANDARD_POSTCODE]
+ ),
+ axis=1
+ )
+ df = df[df["house_no"] == house_no]
+
+ if df.shape[0] > 1:
+ # We compare address line 1 to full address
+ if any(
+ df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(
+ row["Address Line 1"].lower(), na=False)
+ ):
+ df = df[
+ df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(
+ row["Address Line 1"].lower(), na=False
+ )
+ ]
+
+ if df.shape[0] > 1:
+ df = df[df[self.STANDARD_PROPERTY_TYPE] != "other"]
+
+ if df.shape[0] == 1:
+ matched.append(
+ {
+ self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
+ "ecosurv_reference": row["Reference"],
+ "ecosurv_address1": row["Address Line 1"],
+ "ecosurv_postcode": row["Postcode"],
+ }
+ )
+ continue
+
+ if df.shape[0] > 1:
+ unmatched.append(row["Reference"])
+ continue
+
+ logger.info("Matched %s properties to ecosurv data", len(matched))
+ logger.info("%s properties in Ecosurv remain unmatched", len(unmatched))
+
+ # We now match
+ matched = pd.DataFrame(matched)
+ self.standardised_asset_list = self.standardised_asset_list.merge(
+ matched,
+ how="left",
+ on=self.STANDARD_LANDLORD_PROPERTY_ID,
+ )
+
+ # We keep a record of submissions that were NOT matches
+ self.ecosurv_no_match = self.ecosurv[
+ self.ecosurv["Reference"].isin(unmatched)
+ ].copy()
+
def flag_outcomes(
self,
outcomes_filepath,
diff --git a/asset_list/app.py b/asset_list/app.py
index a284371e..8e50c99f 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -124,6 +124,7 @@ def app():
]
master_to_asset_list_filepath = None
phase = False
+ ecosurv_landlords = "paul butler|bromford"
# Torus
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
@@ -608,6 +609,8 @@ def app():
master_to_asset_list_filepath=master_to_asset_list_filepath
)
+ asset_list.flag_ecosurv(ecosurv_landlords)
+
### We retrieve the EPC data
# We chunk up this data into 5000 rows at a time
@@ -949,5 +952,8 @@ def app():
if not asset_list.outcomes_no_match.empty:
asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
+ if not asset_list.ecosurv_no_match.empty:
+ asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
+
# Store the Hubspot export as a csv
hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 57349b8a..bc482263 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -1,4 +1,5 @@
import ast
+import asyncio
import json
from datetime import datetime
@@ -420,9 +421,23 @@ router = APIRouter(
)
-@router.post("/trigger")
-async def trigger_plan(body: PlanTriggerRequest):
+@router.post("/trigger", status_code=202)
+async def trigger_plan_entrypoint(body: PlanTriggerRequest):
+ """
+ This function is the entrypoint for the plan trigger API. It will handle the request and call the
+ trigger_plan function.
+ :param body: The request body
+ :return:
+ """
logger.info("API triggered with body: %s", body)
+ # Kick off the async background task
+ asyncio.create_task(model_engine(body))
+
+ return {"message": "Plan job accepted"}
+
+
+async def model_engine(body: PlanTriggerRequest):
+ logger.info("Model Engine triggered with body: %s", body)
logger.info("Connecting to db")
session = sessionmaker(bind=db_engine)()
@@ -1056,4 +1071,6 @@ async def trigger_plan(body: PlanTriggerRequest):
finally:
session.close()
+ logger.info("Model Engine completed successfully")
+
return Response(status_code=200)
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 6d4852b2..8c57900f 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -1,4 +1,5 @@
import numpy as np
+import pandas as pd
class PropertyValuation:
diff --git a/serverless.yml b/serverless.yml
index 893588c6..abca5ade 100644
--- a/serverless.yml
+++ b/serverless.yml
@@ -81,4 +81,4 @@ functions:
- http:
path: /{proxy+}
method: ANY
- timeout: 120
\ No newline at end of file
+ timeout: 900 # Max timeout to 15 mins for engine runs
\ No newline at end of file