diff --git a/.idea/Model.iml b/.idea/Model.iml index df6c4faa..96ad7a95 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index a6b8f973..b7c79c79 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -354,7 +354,10 @@ class AssetList: self.local_filepath = local_filepath self.sheet_name = sheet_name # Read in the data - self.raw_asset_list = pd.read_excel(local_filepath, header=header, sheet_name=sheet_name) + if local_filepath.endswith(".xlsx"): + self.raw_asset_list = pd.read_excel(local_filepath, header=header, sheet_name=sheet_name) + else: + self.raw_asset_list = pd.read_csv(local_filepath) self.standardised_asset_list = self.raw_asset_list.copy() # Will be used to store aggregated figures against the various work types self.work_type_figures = {} @@ -442,6 +445,9 @@ class AssetList: lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]), axis=1 ) + + for _, x in asset_list.iterrows(): + SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]) return asset_list raise ValueError(f"Method {method} not recognized") @@ -509,6 +515,18 @@ class AssetList: return str(int(x)) return x + @staticmethod + def _clean_postcode(postcode): + # Remove double spaces + postcode = postcode.replace(" ", " ") + if " " not in postcode: + # Restructure it + return " ".join( + [postcode[:-3], postcode[-3:]] + ) + + return postcode + def init_standardise(self): """ This function is used to standardise the asset list @@ -518,6 +536,10 @@ class AssetList: # Remove rows without a postcode if self.postcode_colname is not None: self.standardised_asset_list = self.standardised_asset_list.dropna(subset=[self.postcode_colname]) + # We also clean postcode columns where if there is not space, we create one + self.standardised_asset_list[self.postcode_colname] = self.standardised_asset_list[ + self.postcode_colname + ].apply(self._clean_postcode) # We clean up portential non-breaking spaces, and double spaces for col in [ @@ -667,7 +689,8 @@ class AssetList: "#MULTIVALUE", "This cell has an external reference that can't be shown or edited. Editing this cell will " "remove the external reference.", - "ND" + "ND", + 'PIMSS EMPTY' ] if pd.isnull(date_str) or date_str in known_errors: @@ -693,7 +716,7 @@ class AssetList: if str(date_str).isdigit() & (len(str(date_str)) == 4): return int(date_str) - raise NotImplementedError("Unhandled format for year built - implement me") + raise NotImplementedError(f"Unhandled format for year built, value is {date_str} - implement me") self.standardised_asset_list[self.landlord_year_built] = self.standardised_asset_list[ self.landlord_year_built @@ -2376,12 +2399,12 @@ class AssetList: outcomes_filepath, outcomes_sheetname, outcomes_postcode, - outcomes_houseno + outcomes_houseno, + outcomes_id ): if outcomes_filepath is None: return - # ToDO: Parameterise for future use? self.outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname) self.outcomes["row_id"] = self.outcomes.index @@ -2390,6 +2413,26 @@ class AssetList: lookup = [] nomatch = [] for _, x in tqdm(self.outcomes.iterrows(), total=len(self.outcomes)): + + # Check if we have an id + oid = x[outcomes_id] if outcomes_id is not None else None + + if oid is not None: + matched = self.standardised_asset_list[ + (self.standardised_asset_list[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].str.strip() == oid) + ] + + if matched.shape[0] == 1: + lookup.append( + { + "row_id": x["row_id"], + self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + } + ) + continue + address_clean = x["Address"].lower().replace(",", "").replace(" ", " ") matched = self.standardised_asset_list[ @@ -2407,20 +2450,6 @@ class AssetList: ) continue - if "UPRN" in x: - matched = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == x["UPRN"] - ] - - if matched.shape[0] == 1: - lookup.append( - { - "row_id": x["row_id"], - self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] - } - ) - continue - matched = self.standardised_asset_list[ (self.standardised_asset_list[self.STANDARD_POSTCODE] == x[outcomes_postcode]) ].copy() @@ -2459,6 +2488,9 @@ class AssetList: self.outcomes_no_match = self.outcomes[self.outcomes["row_id"].isin(nomatch)] lookup = pd.DataFrame(lookup) + if lookup.empty: + return + # We will have duplicated domna property IDs, where a surveyor has been to a property multiple times # Where we have multiple rows, we want to make a call on what the action should be. For example, # there may be properties that have been visited multiple times where the outcome was "See notes" implying @@ -2529,9 +2561,13 @@ class AssetList: else "INSTALL / CANCELLATION DATE" ) + submission_col = ( + "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS" + ) + # We just need to check if any were cancelled master_to_append = master_data[ - ["UPRN", install_col, "SUBMISSION DATE"] + ["UPRN", install_col, submission_col] ].rename(columns={"UPRN": self.STANDARD_LANDLORD_PROPERTY_ID, install_col: "survey_status"}) master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") diff --git a/asset_list/app.py b/asset_list/app.py index f2a85ac3..78ad1a29 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -88,6 +88,67 @@ def app(): # - We want: fully insulated property (all wall types), EPC D or below (floors should be solid) # - Or the insulation required is loft/cavity (floors should be solid) + # Live West (2018 Asset list) + data_folder = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset List" + ) + data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx" + sheet_name = "Assets" + postcode_column = 'Postcode' + fulladdress_column = "Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "Build Year" + landlord_os_uprn = None + landlord_property_type = "Property Archetype" + landlord_built_form = None + landlord_wall_construction = None + landlord_heating_system = "Heating Fuel Type" + landlord_existing_pv = None + landlord_property_id = "Uprn - DO NOT DELETE" + outcomes_filename = "RT - LiveWest.xlsx" + outcomes_sheetname = "Feedback" + outcomes_postcode = "Poscode" + outcomes_houseno = "No." + outcomes_id = "UPRN" + master_filepaths = [ + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master " + "- redacted for analysis/CAVITY-Table 1.csv" + ] + master_to_asset_list_filepath = None + + # Live West (South West asset list) + data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March " + "2025/Livewest Asset List (Original) - csv") + data_filename = "Report-Table 1.csv" + sheet_name = None + postcode_column = 'Postcode' + fulladdress_column = "T1_Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "Build Yr" + landlord_os_uprn = None + landlord_property_type = "T1_AssetType" + landlord_built_form = "T1_AssetType" + landlord_wall_construction = "Wall Type Cavity" + landlord_heating_system = "Heating Fuel" + landlord_existing_pv = None + landlord_property_id = "T1_UPRN" + outcomes_filename = "RT - LiveWest.xlsx" + outcomes_sheetname = "Feedback" + outcomes_postcode = "Poscode" + outcomes_houseno = "No." + outcomes_id = "UPRN" + master_filepaths = [ + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master " + "- redacted for analysis/CAVITY-Table 1.csv" + ] + master_to_asset_list_filepath = None + # PFP East data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East" data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx" @@ -218,6 +279,7 @@ def app(): # landlord_year_built = "YEAR BUILT" # landlord_os_uprn = None # landlord_property_type = "Property type" + # landlord_built_form = None # landlord_wall_construction = "Wall Constuction" # landlord_heating_system = "Heating" # landlord_existing_pv = None @@ -325,7 +387,8 @@ def app(): outcomes_filepath=os.path.join(data_folder, outcomes_filename) if outcomes_filename else None, outcomes_sheetname=outcomes_sheetname, outcomes_postcode=outcomes_postcode, - outcomes_houseno=outcomes_houseno + outcomes_houseno=outcomes_houseno, + outcomes_id=outcomes_id ) asset_list.flag_survey_master( @@ -340,7 +403,7 @@ def app(): epc_api_only = False force_retrieve_data = False skip = None # Used to skip already completed chunks - chunk_size = 5000 + chunk_size = 2000 filename = "Chunk {i}.csv" download_folder = os.path.join(data_folder, "Chunks") if not os.path.exists(download_folder): @@ -355,6 +418,8 @@ def app(): if all(x in folder_contents for x in downloaded_files): skip = max(chunk_indexes) + # folder_contents = [f for f in folder_contents if "nodata" not in f and f.endswith(".csv")] + for i in range(0, len(asset_list.standardised_asset_list), chunk_size): print(f"Processing chunk {i} to {i + chunk_size}") if skip is not None and not force_retrieve_data: @@ -418,8 +483,6 @@ def app(): epc_df = pd.concat(epc_data) epc_df["estimated"] = epc_df["estimated"].fillna(False) - epc_df["number-habitable-rooms"].mean() + 1 - # We expand out the recommendations recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]] diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 915f84c6..0da1f412 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -40,5 +40,20 @@ BUILT_FORM_MAPPINGS = { 'House': 'unknown', 'Second Floor Flat': 'mid-floor', 'First Floor Flat': 'ground floor', - 'Room Only': 'unknown' + 'Room Only': 'unknown', + + 'End Terrace Housex': 'end-terrace', + 'Mid Terrace Bungalow': 'mid-terrace', + 'End Terrace Bungalow': 'end-terrace', + 'Mid Terrace House': 'mid-terrace', + 'Detached Bungalow': 'detached', + 'End Terrace House': 'end-terrace', + 'Mid Terrace Housekeeping ': 'mid-terrace', + 'Semi Detached Bung': 'semi-detached', + 'Guest Room': 'unknown', + 'Coach House': 'detached', + 'Office Buildings': 'unknown', + 'Maisonnette': 'mid-floor', + 'Bedspace': 'unknown' + } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 73e2679e..a11ce418 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -16,7 +16,12 @@ STANDARD_HEATING_SYSTEMS = { "unknown", "communal gas boiler", "high heat retention storage heaters", - "room heaters" + "room heaters", + 'electric fuel', + 'oil fuel', + 'solid fuel', + 'gas combi boiler', + 'unknown' } HEATING_MAPPINGS = { @@ -106,5 +111,16 @@ HEATING_MAPPINGS = { 'Quantum storage heaters (Old SH on EPC)': 'high heat retention storage heaters', 'Quantum storage heaters': 'high heat retention storage heaters', 'Air Source (EPC says SH)': 'air source heat pump', - 'ASHP - Was logged as oil': 'air source heat pump' + 'ASHP - Was logged as oil': 'air source heat pump', + 'Ground Source': 'ground source heat pump', + 'District Heating': 'district heating', + 'Mains Gas (Communal)': 'communal gas boiler', + 'LPG': 'boiler - other fuel', + 'Mains Gas': 'gas condensing boiler', + + 'ELECTRIC': 'electric fuel', + 'OIL': 'oil fuel', + 'SOLID FUEL': 'solid fuel', + 'GAS': 'gas combi boiler', + 'DO NOT SURVEY': 'unknown' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index add53cd8..4a4bcb54 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -92,5 +92,28 @@ PROPERTY_MAPPING = { 'Guest room in a complex': 'other', 'PIMSS EMPTY': 'bedsit', 'Room Only': 'other', - 'Detached Property': 'house' + 'Detached Property': 'house', + 'End Terrace Housex': 'house', + 'Coach House': 'coach house', + 'Mid Terrace Bungalow': 'bungalow', + 'End Terrace Bungalow': 'bungalow', + 'Mid Terrace House': 'house', + 'Detached Bungalow': 'bungalow', + 'End Terrace House': 'house', + 'Mid Terrace Housekeeping ': 'house', + 'Maisonnette': 'maisonette', + 'Guest Room': 'unknown', + 'Office Buildings': 'unknown', + 'Semi Detached Bung': 'bungalow', + 'Bedspace': 'bedsit', + + 'Houses/Bungalows': 'bungalow', + 'Bedsits': 'bedsit', + 'Unknown': 'unknown', + 'Sheltered Flats/besits': 'flat', + 'House/Bungalow ': 'bungalow', + 'Low/Med Rise Flats/Mais': 'flat', + 'Staff/Comm': 'other', + 'A Rooms': 'other' + } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 894d9e01..065aa988 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -135,5 +135,6 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Solid brick EWI installed': 'insulated solid brick', 'Cavity Cavity batts': 'filled cavity', 'Cavity CWI Completed by Dyson': 'filled cavity', - None: "unknown" + None: "unknown", + "Cavity": "cavity unknown insulation", } diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index d33b2e70..2b3f0c02 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -207,12 +207,12 @@ class SearchEpc: try: # Updated regex to catch house numbers including alphanumeric ones - pattern = r'(?i)(?:flat|apartment)\s*(\d+\w*)|^\s*(\d+\w*)' + pattern = r'(?i)(?:flat|apartment|room)\s*(\d+\w*)|^\s*(\d+\w*)' match1 = re.search(pattern, address) if match1: return next(g for g in match1.groups() if g is not None) - pattern2 = r'(?i)(flat|apartment)\s*([a-zA-Z]?\d+[a-zA-Z]?)' + pattern2 = r'(?i)(flat|apartment|room)\s*([a-zA-Z]?\d+[a-zA-Z]?)' match2 = re.search(pattern2, address) if match2: return match2.group(2) @@ -226,8 +226,8 @@ class SearchEpc: continue if part == postcode.split(" ")[1]: continue - return part.rstrip( - ",") # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary + return part.rstrip(",") + # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary # number # Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found diff --git a/etl/customers/mod/pilot/2. Create Excel Model.py b/etl/customers/mod/pilot/2. Create Excel Model.py index a74e22ec..9a9eda86 100644 --- a/etl/customers/mod/pilot/2. Create Excel Model.py +++ b/etl/customers/mod/pilot/2. Create Excel Model.py @@ -98,7 +98,7 @@ def app(): ) property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False) - property_asset_data["pre_2002"] = property_asset_data["BUILD_YEAR"] < 2002 + property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970 property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip() property_asset_data["is_insulated"] = ( property_asset_data["walls"].str.split(",").str[1].str.strip().isin( @@ -111,11 +111,11 @@ def app(): property_asset_data["is_pitched"] = np.where( property_asset_data["is_pitched"], "Pitched roof", "Not Pitched Roof" ) - property_asset_data["pre_2002"] = np.where( - property_asset_data["pre_2002"], "Pre 2002", "Post 2002" + property_asset_data["pre_1970"] = np.where( + property_asset_data["pre_1970"], "Pre 1970", "Post 1970" ) - archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_2002"] + archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"] assigned_archetypes = ( property_asset_data.groupby( @@ -129,8 +129,8 @@ def app(): ) # Most prominent archetypes - prominent_archetypes = assigned_archetypes.head(3) - other_archetypes = assigned_archetypes.tail(-3) + prominent_archetypes = assigned_archetypes.head(6) + other_archetypes = assigned_archetypes.tail(-6) # 2 or fewer properties in the other archetypes property_asset_data = property_asset_data.merge( @@ -195,6 +195,13 @@ def app(): reset_index() .rename(columns={"archetype_group": "Archetype"}) ) + property_types = ( + (property_asset_data["property_type"] + ": " + property_asset_data["built_form"]). + value_counts(). + to_frame(). + reset_index() + .rename(columns={"index": "Property Type", 0: "Count"}) + ) # epc breakdown epc_breakdown = ( @@ -345,6 +352,11 @@ def app(): df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"] df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"] + # Add on the archetype + df = df.merge( + property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn" + ) + # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it # the bills go up recommending HHRSH, so it doesn't make it to EPC B # For mid-terrace units, use the ordnance survey API to check if there is space for a heat pump? @@ -451,8 +463,190 @@ def app(): pprint(scenario_metrics[scenario_ids[0]]) pprint(scenario_metrics[scenario_ids[1]]) - # TODO: Add a slide on valuation improvement, on a sample of properties? + scenario_data[scenario_ids[0]]["loft_insulation"][ + scenario_data[scenario_ids[0]]["loft_insulation"] > 0 + ].mean() - # TODO: Read in costing data and breakdown + scenario_data[scenario_ids[0]]["cavity_wall_insulation"][ + scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0 + ].mean() - zz = scenario_recommendations_df[scenario_recommendations_df["type"] == "mechanical_ventilation"] + # Testing checking floor risk + + import requests + + def get_flood_risk(lat, lon, radius_km=1): + url = "https://environment.data.gov.uk/flood-monitoring/id/floods" + params = { + 'lat': lat, + 'long': lon, + 'dist': radius_km # search radius in km + } + + response = requests.get(url, params=params) + response.raise_for_status() + data = response.json() + + flood_warnings = data.get("items", []) + + if not flood_warnings: + print("No active flood warnings near this location.") + else: + print(f"{len(flood_warnings)} warning(s) found near the location:") + for warning in flood_warnings: + print(f"- Area: {warning.get('description')}") + print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})") + print(f" Message changed at: {warning.get('timeMessageChanged')}") + print() + + return flood_warnings + + from shapely.geometry import shape, Point + def get_flood_areas_near_point(lat, lon, radius_km=2): + url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas" + params = { + 'lat': lat, + 'long': lon, + 'dist': radius_km + } + + response = requests.get(url, params=params) + response.raise_for_status() + return response.json().get("items", []) + + def point_in_flood_area(lat, lon): + flood_areas = get_flood_areas_near_point(lat, lon, radius_km=1) + point = Point(lon, lat) # GeoJSON uses (lon, lat) format + + for area in flood_areas: + polygon_url = area.get("polygon") + if not polygon_url: + continue + + polygon_response = requests.get(polygon_url) + polygon_response.raise_for_status() + polygon_geojson = polygon_response.json() + + features = polygon_geojson.get("features", []) + if not features: + continue + + flood_polygon = shape(features[0]['geometry']) + + try: + is_inside = flood_polygon.contains(point) + except: + is_inside = False + + if is_inside: + print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})") + return area + + from tqdm import tqdm + floor_warnings_data = [] + for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)): + # warnings = floor_warnings_data.extend( + # get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1) + # ) + + resp = point_in_flood_area(lat=property["LATITUDE"], lon=property["LONGITUDE"]) + if resp: + floor_warnings_data.append( + { + "uprn": property["uprn"], + "address": property["address"], + "postcode": property["postcode"], + "area": resp + } + ) + continue + + import plotly.graph_objects as go + + labels = [ + "House_Cavity_Insulated_Pitched roof_Pre 1970", + "House_Cavity_Insulated_Pitched roof_Post 1970", + "House_Cavity_Uninsulated_Pitched roof_Pre 1970", + "House_Cavity_Uninsulated_Pitched roof_Post 1970", + "other", + "House_System_Uninsulated_Pitched roof_Pre 1970", + "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970" + ] + + values = [62, 36, 21, 16, 16, 4, 2] + + hovertext = [ + "Loft insulation, draft proofing", + "Top-up loft insulation", + "Cavity wall insulation, loft insulation", + "Cavity wall insulation, ventilation", + "Bespoke retrofit measures", + "External wall insulation, roof insulation", + "Flat roof insulation, internal wall insulation" + ] + + fig = go.Figure(go.Treemap( + labels=labels, + parents=[""] * len(labels), # No root + values=values, + hovertext=hovertext, + hoverinfo="text", + textinfo="none", + marker=dict( + line=dict(color="white", width=4), + colors=values, + colorscale="Blues" + ) + )) + + fig.update_layout( + margin=dict(t=10, l=10, r=10, b=10), + plot_bgcolor="white", + paper_bgcolor="white" + ) + + fig.show() + + # Get the recommended measures by scenario id + recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c] + measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[ + recommendation_cols + ].sum().reset_index() + + measure_counts_by_scenario.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv" + ) + + # Estimate average valuation improvment by scenarios + valuation_data = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/property_valuation.csv" + ) + + from backend.ml_models.Valuation import PropertyValuation + + uplift = [] + for _, x in valuation_data.iterrows(): + uprn = x["uprn"] + + to_append = {"uprn": uprn} + for _id in scenario_ids: + scenario = scenario_data[_id][ + scenario_data[_id]["uprn"] == uprn + ].squeeze() + + val = PropertyValuation.estimate_valuation_improvement( + current_value=x["valuation"], + current_epc=scenario["Current EPC Rating"].value, + target_epc=scenario["Predicted Post Works EPC"], + total_cost=None + ) + + to_append[_id] = val["average_increase"] + + uplift.append(to_append) + + uplift = pd.DataFrame(uplift) + print(uplift[scenario_ids[0]].mean()) + # £8,161 + print(uplift[scenario_ids[1]].mean()) + # £16,938 diff --git a/etl/customers/mod/pilot/3. Past Project Costs.py b/etl/customers/mod/pilot/3. Past Project Costs.py new file mode 100644 index 00000000..79a0493c --- /dev/null +++ b/etl/customers/mod/pilot/3. Past Project Costs.py @@ -0,0 +1,76 @@ +import pandas as pd + +# Get the wave 2 costing data and produce some breakdowns +costs = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/Measure cost study for MOD.xlsx", + header=2 +) + +# Get the EPC data for these + + +# Cavity +cwi_costs = costs[ + ['Model', 'Total invoiced (including VAT)'] +].copy() +cwi_costs["Model"] = "CWI - " + cwi_costs["Model"] +cwi_costs = cwi_costs[~pd.isnull(cwi_costs["Total invoiced (including VAT)"])] + +# Loft +li_costs = costs[ + ['Model.2', 'Total invoiced (including VAT).2'] +].copy() +li_costs["Model.2"] = "LI - " + li_costs["Model.2"] +li_costs = li_costs[~pd.isnull(li_costs["Total invoiced (including VAT).2"])] +# Rename +li_costs.columns = ["Model", "Total invoiced (including VAT)"] + +# Windows +windows_costs = costs[ + ['Model.3', 'Total invoiced (including VAT).3'] +].copy() +windows_costs["Model.3"] = "Windows - " + windows_costs["Model.3"] +windows_costs = windows_costs[~pd.isnull(windows_costs["Total invoiced (including VAT).3"])] +# Rename +windows_costs.columns = ["Model", "Total invoiced (including VAT)"] + +# Doors +doors_costs = costs[ + ['Model.4', 'Total invoiced (including VAT).4'] +].copy() +doors_costs["Model.4"] = "Doors - " + doors_costs["Model.4"] +doors_costs = doors_costs[~pd.isnull(doors_costs["Total invoiced (including VAT).4"])] +# Rename +doors_costs.columns = ["Model", "Total invoiced (including VAT)"] + +# ASHP +ashps_costs = costs[ + ['Model.5', 'Total invoiced (including VAT).5'] +].copy() +ashps_costs["Model.5"] = "ASHP - " + ashps_costs["Model.5"] +ashps_costs = ashps_costs[~pd.isnull(ashps_costs["Total invoiced (including VAT).5"])] +# Rename +ashps_costs.columns = ["Model", "Total invoiced (including VAT)"] + +# Solar +solar_costs = costs[ + ['Model.6', 'Total invoiced (including VAT).6'] +].copy() +solar_costs["Model.6"] = "Solar - " + solar_costs["Model.6"] +solar_costs = solar_costs[~pd.isnull(solar_costs["Total invoiced (including VAT).6"])] +# Rename +solar_costs.columns = ["Model", "Total invoiced (including VAT)"] + +fabric_costing_data = pd.concat([cwi_costs, li_costs]) +windows_doors_costing_data = pd.concat([windows_costs, doors_costs]) + +windows_doors_costing_data.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/windows_doors_costs.csv" +) +fabric_costing_data.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/fabric_costing_data.csv" +) +ashps_costs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/ashps_costs.csv") +solar_costs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/solar_costs.csv") + +project_cost_by_age = costs[["Property age ", "TOTAL Cost of Works"]].groupby("Property age ").mean().reset_index()