Model/model_data/app.py

from tqdm import tqdm
import os
from model_data.BoreholeClient import BoreholeClient
from model_data.LandRegistryClient import LandRegistryClient
from model_data.ConservationAreaClient import ConservationAreaClient

from model_data.temp_inputs import input_data
from model_data.Property import Property
from model_data.config import EPC_AUTH_TOKEN
from epc_api.client import EpcClient
from model_data.downloader import pagenated_epc_download
from model_data.EpcClean import EpcClean
from model_data.OpenUprnClient import OpenUprnClient
from model_data.analysis.UvalueEstimations import UvalueEstimations

LAND_REGISTRY_PATHS = [
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
]


def handler():
    # To begin with, the input data is a list of dictionaries, however we would read this file in

    epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)

    input_properties = [
        Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client)
        for config in input_data
    ]

    for p in input_properties:
        p.search_address_epc()
        p.set_year_built()

    uprns = [p.data['uprn'] for p in input_properties]

    open_uprn_client = OpenUprnClient(
        path=os.path.abspath(
            os.path.dirname(__file__)
        ) + "/model_data/local_data/osopenuprn_202306_csv/osopenuprn_202305.csv",
        uprns=uprns
    )
    open_uprn_client.read()

    # We're using Ordinance Survey Open Uprn data
    # to find the coordinates of each address, which we will then be able to use at a later stage
    for p in input_properties:
        p.get_coordinates(open_uprn_client)

    conservation_area_client = ConservationAreaClient(
        historic_england_path=os.path.abspath(
            os.path.dirname(__file__)
        ) + "/model_data/local_data/Historic_Eng_Conservation_Areas/Conservation_Areas.shp",
        gov_path=os.path.abspath(
            os.path.dirname(__file__)
        ) + "/model_data/local_data/gov-conservation-area.geojson"
    )
    conservation_area_client.read()

    # Check if the property is in a conversation area
    for p in input_properties:
        p.set_is_in_conservation_area(conservation_area_client)

    local_authorities = {p.data['local-authority'] for p in input_properties}

    data = []
    for la in tqdm(local_authorities):
        data.extend(
            pagenated_epc_download(
                client=epc_client,
                params={"local-authority": la},
                page_size=5000,
                n_pages=10,
            )
        )

    # Incorporate input data into cleaning
    cleaner = EpcClean(data + [p.data for p in input_properties])
    cleaner.clean()

    address_meta = [
        {
            "postcode": x["postcode"].upper(),
            "address1": x["address1"].upper(),
            "address2": x["address2"].upper(),
            "address3": x["address3"].upper(),
            "address": x["address"],
            "uprn": x["uprn"]
        } for x in data
    ]

    # Land registry
    land_registry_client = LandRegistryClient(
        paths=LAND_REGISTRY_PATHS,
        addresses=address_meta
    )
    land_registry_client.read()

    # Borehole
    borehole_client = BoreholeClient(
        path=os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf"
    )
    borehole_client.read()

    # Now, for our input properties, we need to identify the components of the building, based
    # on the cleaning we've done
    for p in input_properties:
        p.get_components(cleaner)

    # TODO: Add property age band into this
    uvalue_estimates = UvalueEstimations(data=data)
    uvalue_estimates.get_estimates(cleaner=cleaner)

    input_properties[4].data["address1"]
    input_properties[4].data["postcode"]
    floors_df["address1"].values[4]
    floors_df["original_description"].values[4]

    df = pd.DataFrame(
        [
            x.data for x in input_properties
        ]
    )
    df["property-type"].unique()

    from model_data.recommendations.WallRecommendations import WallRecommendations
    all_res = []
    for p in input_properties:
        inst = WallRecommendations(property_instance=p, uvalue_estimates=uvalue_estimates)
        inst.recommend()
        n_recs = len(inst.recommendations)
        all_res.append(n_recs)

    self = WallRecommendations(property_instance=input_properties[2], uvalue_estimates=uvalue_estimates)
    input_properties[6].walls
    self.recommend()
    df = pd.DataFrame(self.recommendations[0]["parts"])
    recommendations = pd.DataFrame(self.recommendations)

    from model_data.recommendations.FloorRecommendations import FloorRecommendations
    self = FloorRecommendations(property_instance=input_properties[4], uvalue_estimates=uvalue_estimates)
    self.recommendations
    self.recommend()
    self.recommendations

    # We need to deduce a U-value for "Good" energy effieciency

    mainheating = pd.DataFrame(
        [{"address1": p.address1, "postcode": p.postcode, **p.main_heating} for p in input_properties])
    hotwater = pd.DataFrame([{"address1": p.address1, **p.hotwater} for p in input_properties])

    mainheating[["address1", "postcode"]]

    # TODO: I want to knwo what "Good" efficiency means for the description
    #  'Flat 28, 22 Adelina Grove' 'Solid brick, as built, insulated (assumed)'
    #    so to do this, filter on the local authority code and property type, where we have U
    #   values for the wall and take a median!

    p = input_properties[6]
    df = pd.DataFrame(data)

    res = []
    for p in input_properties:
        distances = []
        for borehole in tqdm(borehole_client.data, total=len(borehole_client.data)):
            dist_meeters, _ = borehole_client.distance_between_bng_coords(
                x1_bng=p.coordinates['x_coordinate'],
                y1_bng=p.coordinates['y_coordinate'],
                x2_bng=float(borehole['EASTING']),
                y2_bng=float(borehole['NORTHING'])
            )
            distances.append(dist_meeters)

        res.append(
            {
                "uprn": int(p.data["uprn"]),
                "meters_to_nearest_borehole": min(distances)
            }

        )
    res = pd.DataFrame(res)

    properties_dataset = [
        {
            **p.data,
            "in_conservation_area": p.in_conservation_area,
            **p.coordinates,

        } for p in input_properties
    ]

    properties_dataset = pd.DataFrame(properties_dataset)
    properties_dataset = properties_dataset.merge(res, on="uprn", how="left")

    properties_dataset.to_csv("properties_dataset.csv")