mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #512 from Hestia-Homes/eco-eligiblity-bug
Allow no valuation data
This commit is contained in:
commit
d7ca1ea2bb
12 changed files with 431 additions and 54 deletions
|
|
@ -309,6 +309,17 @@ class AssetList:
|
|||
'NAME OF SURVEYOR'
|
||||
]
|
||||
|
||||
# Solar non-intrusive fields
|
||||
NON_INTRUSIVES_SOLAR_COLNAMES = [
|
||||
'PV, ACCESS ISSUE, SEE NOTES', 'ROOF ORIENTATION',
|
||||
'AREA (m²) OF ROOF WHERE PV WILL BE SITUATED ', 'SHADING',
|
||||
'Roof Tiles - CONCRETE/SLATE/ROSEMARY',
|
||||
'NO. OF PANELS (Typical size of 420W panel is 1mx1.7m and need 30cm all the way around panels)',
|
||||
'SCAFFOLD REQUIRED? IF YES, ARE THERE ANY SURROUNDING ACCESS ISSUES - PLEASE DESCRIBE',
|
||||
'IF PANELS ARE GOING ON REAR PLEASE CHECK FOR SPACE FOR SCAFFOLDING - DESCRIBE ANY ISSUES BELOW',
|
||||
'DATE', 'NAME OF SURVEYOR'
|
||||
]
|
||||
|
||||
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
|
||||
|
||||
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
|
||||
|
|
@ -461,6 +472,8 @@ class AssetList:
|
|||
|
||||
self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns
|
||||
|
||||
self.solar_non_intrusives_present = "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED" in self.raw_asset_list.columns
|
||||
|
||||
# Names of columns
|
||||
self.landlord_property_id = landlord_property_id
|
||||
self.address1_colname = address1_colname
|
||||
|
|
@ -774,6 +787,9 @@ class AssetList:
|
|||
if self.new_format_non_insturives_present_v2:
|
||||
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2
|
||||
|
||||
if self.solar_non_intrusives_present:
|
||||
non_intrusive_columns += self.NON_INTRUSIVES_SOLAR_COLNAMES
|
||||
|
||||
if self.old_format_non_intrusives_present:
|
||||
# We check if we have the ECO Eligibility column, which we might not have
|
||||
non_intrusive_columns = [
|
||||
|
|
@ -946,7 +962,7 @@ class AssetList:
|
|||
|
||||
if self.phase:
|
||||
# We filter on just the properties that have had an inspection
|
||||
if self.new_format_non_insturives_present_v2:
|
||||
if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present:
|
||||
self.standardised_asset_list = self.standardised_asset_list[
|
||||
~self.standardised_asset_list['NAME OF SURVEYOR'].isin(
|
||||
["YET TO BE SURVEYED", "", None]
|
||||
|
|
@ -1341,10 +1357,10 @@ class AssetList:
|
|||
# for identifying cavity jobs
|
||||
if self.non_intrusives_present and not self.old_format_non_intrusives_present:
|
||||
|
||||
if self.new_format_non_insturives_present_v2:
|
||||
if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present:
|
||||
existing_solar_non_intrusives_check = (
|
||||
self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"].str.strip().isin(
|
||||
["ALREADY HAS SOLAR PV"]
|
||||
["ALREADY HAS SOLAR PV", "ALREADY HAS PV"]
|
||||
)
|
||||
)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -59,6 +59,176 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
#
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/"
|
||||
data_filename = "22.10_Cambridge_west addresses.xlsx"
|
||||
sheet_name = "Asset List"
|
||||
postcode_column = 'Postcode'
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = "Full Address"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "id"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_id_colnames = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
|
||||
# Property Box
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box"
|
||||
data_filename = "Property Box Finance Portfolio.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'Postcode'
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = "Address 1"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "row_id"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_id_colnames = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
asset_list_header = 0
|
||||
landlord_block_reference = "block_id"
|
||||
|
||||
# CDS - able-to-pay
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/Able to pay"
|
||||
data_filename = "CDS_ASSET LIST_(2314).xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'Property Address - Postcode'
|
||||
address1_column = "Property Address - Line 1"
|
||||
address1_method = None
|
||||
fulladdress_column = "Property Address - Line 1"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "row_id"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_id_colnames = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
|
||||
# Hyde - solar
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Solar"
|
||||
data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx"
|
||||
sheet_name = "Electric Property Inspections"
|
||||
postcode_column = 'Postcode'
|
||||
address1_column = None # Is only patchily populated so we create it
|
||||
address1_method = 'house_number_extraction'
|
||||
fulladdress_column = "Address"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Property Type"
|
||||
landlord_wall_construction = "Walls "
|
||||
landlord_roof_construction = "Roofs"
|
||||
landlord_heating_system = "Heating"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Address ID"
|
||||
landlord_sap = "SAP"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_id_colnames = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
|
||||
# Hyde cavity
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Cavity"
|
||||
data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx"
|
||||
sheet_name = "Cavity Inspections"
|
||||
postcode_column = 'Postcode'
|
||||
address1_column = None # Is only patchily populated so we create it
|
||||
address1_method = 'house_number_extraction'
|
||||
fulladdress_column = "Address"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Property Type"
|
||||
landlord_wall_construction = "Walls "
|
||||
landlord_roof_construction = "Roofs"
|
||||
landlord_heating_system = "Heating"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Address ID"
|
||||
landlord_sap = "SAP"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_id_colnames = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
|
||||
# CDS - Sept 2025
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/September 2025 Programme"
|
||||
data_filename = "Founder Estates CDS.xlsx"
|
||||
|
|
|
|||
|
|
@ -439,5 +439,23 @@ BUILT_FORM_MAPPINGS = {
|
|||
'Chalet - Wheelchair': 'unknown',
|
||||
'Studio Flat': 'unknown',
|
||||
'Bungalow - Attached': 'semi-detached',
|
||||
'ND': 'unknown'
|
||||
'ND': 'unknown',
|
||||
|
||||
'Maisonette: Mid Terrace: Mid Floor': 'mid-floor',
|
||||
'Maisonette: Semi Detached: Ground Floor': 'semi-detached',
|
||||
'Maisonette: Enclosed Mid Terrace: Ground Floor': 'enclosed mid-terrace',
|
||||
'Maisonette: Enclosed End Terrace: Ground Floor': 'end-terrace',
|
||||
'Maisonette: Mid Terrace: Ground Floor': 'mid-terrace',
|
||||
'Flat: Semi Detached: Basement': 'semi-detached',
|
||||
'Maisonette: Semi Detached: Top Floor': 'semi-detached',
|
||||
'Maisonette: Enclosed Mid Terrace: Mid Floor': 'enclosed mid-terrace',
|
||||
'Flat: Detached: Basement': 'detached',
|
||||
'Maisonette: Enclosed Mid Terrace: Top Floor': 'enclosed mid-terrace',
|
||||
|
||||
'Maisonette: End Terrace: Top Floor': 'top-floor',
|
||||
'House: Mid Terrace: Ground Floor': 'ground floor',
|
||||
'Maisonette: Semi Detached: Mid Floor': 'detached',
|
||||
'Maisonette: Detached: Mid Floor': 'detached',
|
||||
'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -477,6 +477,23 @@ HEATING_MAPPINGS = {
|
|||
|
||||
'Heat networks Heat networks (mains gas)': 'communal heating',
|
||||
'ND Oil': 'oil fuel',
|
||||
'Boiler Biofuel': 'boiler - other fuel'
|
||||
'Boiler Biofuel': 'boiler - other fuel',
|
||||
|
||||
'Electric (direct acting) room heaters: Water- or oil-filled radiators': 'room heaters',
|
||||
'Other: Electric ceiling heating': 'electric ceiling',
|
||||
'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C': 'air source heat pump',
|
||||
'Oil room heaters: Room heater, 2000 or later': 'room heaters',
|
||||
'Electric Underfloor Heating: In screed above insulation (standard or off peak)': 'electric underfloor',
|
||||
'Heat Pump: Electric Heat pumps: Air source heat pump in other cases': 'air source heat pump',
|
||||
'Electric Storage Systems: Old (large volume) storage heaters': 'electric storage heaters',
|
||||
|
||||
'Gas (including LPG) room heaters: Condensing gas fire': 'room heaters',
|
||||
'Solid fuel room heaters: Open fire in grate': 'solid fuel',
|
||||
'Solid fuel room heaters: Open fire with back boiler (no radiators)': 'solid fuel',
|
||||
'Community Heating Systems: Community heat pump (RdSAP)': 'communal heating',
|
||||
'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, '
|
||||
'and sealed to, fireplace opening': 'room heaters',
|
||||
'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel',
|
||||
'Boiler: G rated Combi': 'gas condensing combi'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -343,5 +343,23 @@ PROPERTY_MAPPING = {
|
|||
'bungalow': 'bungalow',
|
||||
'flat': 'flat',
|
||||
'FLA': 'flat',
|
||||
'HOU': 'house'
|
||||
'HOU': 'house',
|
||||
|
||||
'Maisonette: Mid Terrace: Mid Floor': 'maisonette',
|
||||
'Maisonette: Semi Detached: Ground Floor': 'maisonette',
|
||||
'Maisonette: Enclosed Mid Terrace: Ground Floor': 'maisonette',
|
||||
'Maisonette: Enclosed End Terrace: Ground Floor': 'maisonette',
|
||||
'Maisonette: Mid Terrace: Ground Floor': 'maisonette',
|
||||
'Flat: Semi Detached: Basement': 'flat',
|
||||
'Maisonette: Semi Detached: Top Floor': 'maisonette',
|
||||
'Maisonette: Enclosed Mid Terrace: Mid Floor': 'maisonette',
|
||||
'Flat: Detached: Basement': 'flat',
|
||||
'Maisonette: Enclosed Mid Terrace: Top Floor': 'maisonette',
|
||||
|
||||
'Maisonette: End Terrace: Top Floor': 'maisonette',
|
||||
'House: Mid Terrace: Ground Floor': 'house',
|
||||
'Bungalow: EnclosedMidTerrace': 'bungalow',
|
||||
'Maisonette: Semi Detached: Mid Floor': 'maisonette',
|
||||
'Maisonette: Detached: Mid Floor': 'maisonette'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -275,5 +275,30 @@ ROOF_CONSTRUCTION_MAPPINGS = {
|
|||
'Pitched (vaulted ceiling) Non-joist': 'pitched unknown insulation',
|
||||
'ND (inferred) ND (inferred)': 'unknown',
|
||||
'Flat Non-joist': 'flat insulated',
|
||||
'Same dwelling above N/A': 'another dwelling above'
|
||||
'Same dwelling above N/A': 'another dwelling above',
|
||||
|
||||
'Flat: As Built, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation',
|
||||
'PitchedNormalLoftAccess: Unknown, PitchedNormalNoLoftAccess: Unknown': 'pitched unknown insulation',
|
||||
'PitchedNormalLoftAccess: 400mm+': 'pitched insulated',
|
||||
'AnotherDwellingAbove: 150mm': 'another dwelling above',
|
||||
'Flat: 150mm': 'flat insulated',
|
||||
'AnotherDwellingAbove: 50mm': 'another dwelling above',
|
||||
'PitchedNormalNoLoftAccess: As Built': 'pitched no access to loft',
|
||||
'PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated',
|
||||
'PitchedNormalLoftAccess: 200mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated',
|
||||
'PitchedNormalLoftAccess: 350mm': 'pitched insulated',
|
||||
'PitchedNormalNoLoftAccess: 270mm': 'pitched no access to loft',
|
||||
'AnotherDwellingAbove: 100mm': 'another dwelling above',
|
||||
|
||||
'PitchedWithSlopingCeiling: Unknown': 'piched unknown insulation',
|
||||
'AnotherDwellingAbove: Unknown, Flat: As Built': 'another dwelling above',
|
||||
'Flat: Unknown, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation',
|
||||
'SameDwellingAbove: Unknown': 'another dwelling above',
|
||||
'Flat: Unknown': 'flat unknown insulation',
|
||||
'Flat: 50mm, PitchedNormalLoftAccess: 100mm': 'flat insulated',
|
||||
'Flat: As Built, PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'flat unknown insulation',
|
||||
'Flat: As Built, PitchedNormalLoftAccess: 400mm+': 'flat unknown insulation',
|
||||
'PitchedWithSlopingCeiling: As Built': 'pitched insulated',
|
||||
'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation',
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -353,4 +353,7 @@ WALL_CONSTRUCTION_MAPPINGS = {
|
|||
'System built As-built': "uninsulated system built",
|
||||
'System built Internal': 'insulated system built',
|
||||
|
||||
'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation',
|
||||
'Cavity: FilledCavityPlusExternal': 'filled cavity'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -347,7 +347,8 @@ class SearchEpc:
|
|||
# We update the data with the correct uprn
|
||||
if self.uprn:
|
||||
for x in api_response["response"]["rows"]:
|
||||
x["uprn"] = self.uprn
|
||||
if pd.isnull(x["uprn"]):
|
||||
x["uprn"] = self.uprn
|
||||
|
||||
data["rows"].extend(api_response["response"]["rows"])
|
||||
|
||||
|
|
@ -357,6 +358,8 @@ class SearchEpc:
|
|||
row for row in data["rows"]
|
||||
if row["lmk-key"] not in seen and not seen.add(row["lmk-key"])
|
||||
]
|
||||
# Overwrite the data
|
||||
self.data = data
|
||||
|
||||
if data["rows"]:
|
||||
api_response["msg"] = self.SUCCESS
|
||||
|
|
|
|||
|
|
@ -145,14 +145,17 @@ def extract_portfolio_aggregation_data(
|
|||
cost = sum([r["total"] for r in default_recommendations])
|
||||
sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
|
||||
|
||||
lower_bound_valuation_uplift = (
|
||||
property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
|
||||
property_value_increase_ranges[p.id]["current_value"]
|
||||
)
|
||||
upper_bound_valuation_uplift = (
|
||||
property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
|
||||
property_value_increase_ranges[p.id]["current_value"]
|
||||
)
|
||||
if not pd.isnull(property_value_increase_ranges[p.id]["current_value"]):
|
||||
lower_bound_valuation_uplift = (
|
||||
property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
|
||||
property_value_increase_ranges[p.id]["current_value"]
|
||||
)
|
||||
upper_bound_valuation_uplift = (
|
||||
property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
|
||||
property_value_increase_ranges[p.id]["current_value"]
|
||||
)
|
||||
else:
|
||||
lower_bound_valuation_uplift, upper_bound_valuation_uplift = 0, 0
|
||||
|
||||
agg_data.append({
|
||||
"pre_retrofit_epc": p.data["current-energy-rating"],
|
||||
|
|
@ -523,6 +526,7 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
plan_input["built_form"] = plan_input["built_form"].map(built_form_map)
|
||||
|
||||
plan_input = plan_input.to_dict("records")
|
||||
|
||||
else:
|
||||
raise ValueError("Other formats not yet supported")
|
||||
|
||||
|
|
@ -549,6 +553,13 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
# If we have patches or overrides, we should read them in here
|
||||
patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body)
|
||||
|
||||
if body.file_type == "xlsx" and body.file_format == "domna_asset_list":
|
||||
# We check if we have valution data
|
||||
if not valuation_data and body.valuation_file_path in [None, ""]:
|
||||
# We check plan_input
|
||||
if "domna_valuation" in plan_input[0]:
|
||||
valuation_data = [{"uprn": x["uprn"], "valuation": x["domna_valuation"]} for x in plan_input]
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
|
@ -563,12 +574,22 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
if uprn:
|
||||
uprn = int(float(uprn))
|
||||
|
||||
address1 = config.get("address", None)
|
||||
# Handle domna address list format
|
||||
if pd.isnull(address1) and body.file_format == "domna_asset_list":
|
||||
address1 = config.get("domna_full_address", None)
|
||||
|
||||
address1 = str(int(address1)) if isinstance(address1, float) else str(address1)
|
||||
|
||||
full_address = config["domna_full_address"] if body.file_format == "domna_asset_list" else None
|
||||
|
||||
epc_searcher = SearchEpc(
|
||||
address1=str(config["address"]),
|
||||
address1=address1,
|
||||
postcode=config["postcode"],
|
||||
uprn=uprn,
|
||||
auth_token=get_settings().EPC_AUTH_TOKEN,
|
||||
os_api_key="",
|
||||
full_address=full_address
|
||||
)
|
||||
epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None)
|
||||
epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
|
||||
|
|
@ -1176,9 +1197,10 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
|
||||
upload_funding(session, p, new_plan_id, recommendations_to_upload)
|
||||
|
||||
property_valuation_increases.append(
|
||||
valuations["average_increased_value"] - valuations["current_value"]
|
||||
)
|
||||
if valuations["current_value"] > 0:
|
||||
property_valuation_increases.append(
|
||||
valuations["average_increased_value"] - valuations["current_value"]
|
||||
)
|
||||
|
||||
# Commit the session after each batch
|
||||
session.commit()
|
||||
|
|
|
|||
|
|
@ -219,12 +219,19 @@ class PropertyValuation:
|
|||
current_epc = property_instance.data["current-energy-rating"]
|
||||
|
||||
if not current_value:
|
||||
# In this case, we return a % improvement rather than an absolute
|
||||
relative_improvement = cls.estimate_valuation_improvement(
|
||||
current_value=1,
|
||||
current_epc=current_epc,
|
||||
target_epc=target_epc,
|
||||
total_cost=1
|
||||
)
|
||||
return {
|
||||
"current_value": 0,
|
||||
"lower_bound_increased_value": 0,
|
||||
"upper_bound_increased_value": 0,
|
||||
"average_increased_value": 0,
|
||||
"average_increase": 0
|
||||
"lower_bound_increased_value": relative_improvement["lower_bound_increased_value"] - 1,
|
||||
"upper_bound_increased_value": relative_improvement["upper_bound_increased_value"] - 1,
|
||||
"average_increased_value": relative_improvement["average_increased_value"] - 1,
|
||||
"average_increase": relative_improvement["average_increase"]
|
||||
}
|
||||
|
||||
return cls.estimate_valuation_improvement(current_value, current_epc, target_epc, total_cost)
|
||||
|
|
|
|||
|
|
@ -1,38 +1,111 @@
|
|||
# Initial Code
|
||||
|
||||
from seleniumbase import SB
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import time
|
||||
from stealth_requests import StealthSession
|
||||
import random
|
||||
from multiprocessing import Pool
|
||||
from tqdm import tqdm
|
||||
|
||||
uprns = [
|
||||
100071297618,
|
||||
100080893397,
|
||||
100060778033,
|
||||
200004793081,
|
||||
100071265143,
|
||||
100071297618,
|
||||
100080893397,
|
||||
100060778033,
|
||||
200004793081,
|
||||
100071265143,
|
||||
]
|
||||
ENGINES = ["safari", "chrome"]
|
||||
|
||||
estimate_list = []
|
||||
|
||||
for uprn in uprns:
|
||||
def scrape_all_estimates(session, url):
|
||||
# Rotate impersonation per request
|
||||
resp = session.get(url, impersonate=ENGINES[random.randint(0, 1)])
|
||||
page_source = BeautifulSoup(resp.text, "html.parser")
|
||||
estimates = page_source.find_all("div", {"data-testid": "sale-estimate"})
|
||||
is_blocked = len(estimates) == 0
|
||||
return estimates, is_blocked
|
||||
|
||||
# Probably can change the timings here
|
||||
time.sleep(5)
|
||||
with SB(uc=True) as sb:
|
||||
sb.uc_open_with_reconnect(
|
||||
f"https://www.zoopla.co.uk/property/uprn/{uprn}/",
|
||||
3,
|
||||
|
||||
def parallel_task(url):
|
||||
# No impersonate argument here
|
||||
with StealthSession() as session:
|
||||
estimates, is_blocked = scrape_all_estimates(session, url)
|
||||
|
||||
while is_blocked:
|
||||
print(f"Blocked by Zoopla for URL: {url}")
|
||||
time.sleep(random.uniform(0, 1))
|
||||
estimates, is_blocked = scrape_all_estimates(session, url)
|
||||
|
||||
low_estimate = estimates[0].find("span", {"data-testid": "low-estimate-blurred"}).text
|
||||
middle_estimate = estimates[0].find("p", {"data-testid": "estimate-blurred"}).text
|
||||
high_estimate = estimates[0].find("span", {"data-testid": "high-estimate-blurred"}).text
|
||||
|
||||
return {
|
||||
"URL": url,
|
||||
"Low Estimate": low_estimate,
|
||||
"Middle Estimate": middle_estimate,
|
||||
"High Estimate": high_estimate,
|
||||
}
|
||||
|
||||
|
||||
def parse_price(p):
|
||||
p = p.replace("£", "").strip().lower()
|
||||
if p.endswith("k"):
|
||||
return float(p[:-1]) * 1000
|
||||
elif p.endswith("m"):
|
||||
return float(p[:-1]) * 1_000_000
|
||||
else:
|
||||
return float(p)
|
||||
|
||||
|
||||
# def parallel_task(url):
|
||||
# with StealthSession(impersonate=ENGINES[random.randint(0, 1)]) as session:
|
||||
# estimates, is_blocked = scrape_all_estimates(session, url)
|
||||
#
|
||||
# while is_blocked:
|
||||
# # Will need to wait and retry if blocked by Zoopla
|
||||
# print(f"Blocked by Zoopla for URL: {url}")
|
||||
# sleep_factor = random.uniform(0, 1) # Random delay to avoid detection
|
||||
# time.sleep(sleep_factor * 1)
|
||||
# estimates, is_blocked = scrape_all_estimates(session, url)
|
||||
#
|
||||
# low_estimate = (
|
||||
# estimates[0].find("span", {"data-testid": "low-estimate-blurred"}).text
|
||||
# ) # Find all span elements with data-testid="low-estimate"
|
||||
# middle_estimate = (
|
||||
# estimates[0].find("p", {"data-testid": "estimate-blurred"}).text
|
||||
# ) # Find all span elements with data-testid="middle-estimate"
|
||||
# high_estimate = (
|
||||
# estimates[0].find("span", {"data-testid": "high-estimate-blurred"}).text
|
||||
# ) # Find all span elements with data-testid="high-estimate-blurred"
|
||||
#
|
||||
# return {
|
||||
# "URL": url,
|
||||
# "Low Estimate": low_estimate,
|
||||
# "Middle Estimate": middle_estimate,
|
||||
# "High Estimate": high_estimate,
|
||||
# }
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Get a SAL
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box/Property Box Finance Portfolio - "
|
||||
"Standardised.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
)
|
||||
asset_list["epc_os_uprn"] = asset_list["epc_os_uprn"].astype(int).astype(str)
|
||||
uprns = asset_list["epc_os_uprn"].tolist()
|
||||
urls = [f"https://www.zoopla.co.uk/property/uprn/{uprn}/" for uprn in uprns]
|
||||
|
||||
with Pool(processes=5) as pool:
|
||||
estimates_list = list(
|
||||
tqdm(
|
||||
pool.imap(parallel_task, urls),
|
||||
total=len(urls),
|
||||
)
|
||||
)
|
||||
|
||||
soup = sb.get_beautiful_soup()
|
||||
df = pd.DataFrame(estimates_list)
|
||||
# Extract UPRN from URL
|
||||
df["uprn"] = df["URL"].str.extract(r"uprn/(\d+)/")
|
||||
df["valuation"] = df["Middle Estimate"].apply(parse_price)
|
||||
df.to_csv("zoopla_estimates.csv", index=False)
|
||||
|
||||
estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
|
||||
# Can change the way we extract the text here
|
||||
estimate_text = (
|
||||
estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"]
|
||||
)
|
||||
estimate_list.append(estimate_text)
|
||||
df["uprn"] = df["uprn"].astype(int).astype(str)
|
||||
|
||||
asset_list.merge(df[["uprn", "valuation"]], left_on="epc_os_uprn", right_on="uprn", how="left").to_excel(
|
||||
"Property Box Finance Portfolio - Standardised - with valuations.xlsx", index=False
|
||||
)
|
||||
|
|
|
|||
5
etl/webscrape/requirements.txt
Normal file
5
etl/webscrape/requirements.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
beautifulsoup4>=4.12.0
|
||||
pandas>=2.0.0
|
||||
stealth-requests>=1.0.7
|
||||
tqdm>=4.65.0
|
||||
openpyxl
|
||||
Loading…
Add table
Reference in a new issue