From a948688df6c4e220be03688700834fbb78701ccd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 27 Dec 2023 10:28:06 +0000 Subject: [PATCH] set up get_epc_data function --- etl/eligibility/ha_15_32/ha16_app.py | 98 ++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py index 402527df..f930452e 100644 --- a/etl/eligibility/ha_15_32/ha16_app.py +++ b/etl/eligibility/ha_15_32/ha16_app.py @@ -248,3 +248,101 @@ def load_data(): data["warmfront_identified"] = data["warmfront_identified"].fillna(False) return data + + +def get_epc_data(data, cleaned, cleaning_data, created_at): + scoring_data = [] + results = [] + nodata = [] + + for _, property_meta in tqdm(data.iterrows(), total=len(data)): + searcher = SearchEpc( + address1=property_meta["HouseNo"], + postcode=property_meta["Postcode"], + size=1000 + ) + searcher.search() + + if searcher.data is None: + nodata.append(property_meta) + continue + + newest_epc, older_epcs, full_sap_epc = searcher.retrieve(address=property_meta["Address"]) + # We also want to get the penultimate epc + penultimate_epc, _ = searcher.filter_newest_epc(older_epcs) + if not penultimate_epc: + penultimate_epc = newest_epc + + eligibility = Eligibility(epc=newest_epc, cleaned=cleaned) + eligibility.check_gbis_warmfront() + eligibility.check_eco4_warmfront() + + if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront) and ( + property_meta["warmfront_identified"] + ): + eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) + eligibility.check_gbis_warmfront() + eligibility.check_eco4_warmfront() + # If this is the case, we need to update the older epcs + older_epcs = [ + x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]] + ] + + # Full checks + eligibility.check_gbis() + eligibility.check_eco4() + + if eligibility.eco4_warmfront["eligible"]: + scoring_dictionary = prepare_model_data_row( + property_id=eligibility.epc["uprn"], + modelling_epc=eligibility.epc, + cleaned=cleaned, + cleaning_data=cleaning_data, + created_at=created_at, + old_data=older_epcs, + full_sap_epc=full_sap_epc + ) + scoring_data.extend(scoring_dictionary) + + results.append( + { + "row_id": property_meta["row_id"], + "uprn": eligibility.epc["uprn"], + "Address": property_meta["Address"], + "Postcode": property_meta["Postcode"], + "property_type": eligibility.epc["property-type"], + "gbis_eligible": eligibility.gbis_warmfront, + "eco4_eligible": eligibility.eco4_warmfront["eligible"], + "eco4_message": eligibility.eco4_warmfront["message"], + "sap": float(eligibility.epc["current-energy-efficiency"]), + "gbis_eligible_future": eligibility.gbis["eligible"], + "gbis_eligible_future_message": eligibility.gbis["message"], + "eco4_eligible_future": eligibility.eco4["eligible"], + "eco4_eligible_future_message": eligibility.eco4["message"], + # Property components + "roof": eligibility.roof["clean_description"], + "walls": eligibility.walls["clean_description"], + "cavity_type": eligibility.cavity["type"], + "heating": eligibility.epc["mainheat-description"], + "tenure": eligibility.tenure, + "date_epc": eligibility.epc["lodgement-date"], + } + ) + + +def app(): + data = load_data() + + data["row_id"] = ["ha16" + str(i) for i in range(0, len(data))] + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + cleaning_data = read_parquet_from_s3( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + created_at = datetime.now().isoformat()