mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
[Cincreased concurrency of backend
This commit is contained in:
parent
b271ad5c97
commit
110cb8070c
4 changed files with 74 additions and 18 deletions
|
|
@ -1,8 +1,15 @@
|
|||
"""
|
||||
Rough script to prepare the data for Lincs Rural project
|
||||
"""
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
|
||||
from backend.SearchEpc import SearchEpc
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes.xlsx",
|
||||
|
|
@ -11,16 +18,58 @@ data = pd.read_excel(
|
|||
|
||||
# We have property RRNs - we need UPRN
|
||||
|
||||
for _, x in data.iterrows():
|
||||
rrn = x["EPC Ref."]
|
||||
standardised_ara_list = []
|
||||
missed = []
|
||||
for _, x in tqdm(data.iterrows(), total=len(data)):
|
||||
try:
|
||||
rrn = x["EPC Ref."]
|
||||
|
||||
# Fetch from find my epc
|
||||
retriever = RetrieveFindMyEpc(
|
||||
address="",
|
||||
postcode="",
|
||||
rrn=rrn,
|
||||
address_postal_town="",
|
||||
sap_rating=x["Actual"]
|
||||
)
|
||||
# Fetch from find my epc
|
||||
retriever = RetrieveFindMyEpc(
|
||||
address="",
|
||||
postcode="",
|
||||
rrn=rrn,
|
||||
address_postal_town="",
|
||||
)
|
||||
|
||||
find_epc_data = retriever.retrieve_all_find_my_epc_data()
|
||||
find_epc_data = retriever.retrieve_newest_find_my_epc_data(rrn=rrn)
|
||||
|
||||
# Find the UPRN
|
||||
epc_searcher = SearchEpc(
|
||||
address1=str(find_epc_data["address1"]),
|
||||
postcode=str(find_epc_data["postcode"]),
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key="",
|
||||
property_type=None,
|
||||
fast=False,
|
||||
full_address=",".join([find_epc_data["address1"], find_epc_data["address2"]]),
|
||||
max_retries=5,
|
||||
)
|
||||
epc_searcher.find_property(skip_os=True)
|
||||
|
||||
# Append in format we need
|
||||
# Stuff we need:
|
||||
standardised_ara_list.append(
|
||||
{
|
||||
"landlord_property_id": x["Property Ref."],
|
||||
"landlord_property_type": epc_searcher.newest_epc.get("property-type"),
|
||||
"landlord_built_form": epc_searcher.newest_epc.get("built-form"),
|
||||
"landlord_heating_system": epc_searcher.newest_epc.get("mainheat-description", ""),
|
||||
"epc_os_uprn": epc_searcher.newest_epc.get("uprn"),
|
||||
"domna_property_id": x["Property Ref."],
|
||||
"domna_full_address": epc_searcher.newest_epc.get(
|
||||
"address", ", ".join([
|
||||
find_epc_data["address1"],
|
||||
find_epc_data["address2"],
|
||||
])
|
||||
),
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
missed.append({
|
||||
"property_ref": x["Property Ref."],
|
||||
"rrn": x["EPC Ref."],
|
||||
"error": str(e)
|
||||
})
|
||||
|
||||
missed_df = pd.DataFrame(missed)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,6 @@
|
|||
"""
|
||||
We have found, within the Peabody data, a large volume of properties with missing and incorrects
|
||||
UPRNS and incorrect address data. We want to flag these records and also find missings where we can
|
||||
|
||||
We also have duplicate UPRNS that should be flagged
|
||||
"""
|
||||
|
|
@ -465,12 +465,13 @@ class RetrieveFindMyEpc:
|
|||
potential_rating = ratings.split(".")[1]
|
||||
current_sap = int(current_rating.split(' ')[-1])
|
||||
|
||||
if current_sap != self.sap_rating:
|
||||
# This means we likely have the wrong data. If we are in this scenario, we return nothing
|
||||
return {
|
||||
"epc_certificate": None,
|
||||
"page_source": None,
|
||||
}
|
||||
if self.sap_rating:
|
||||
if current_sap != self.sap_rating:
|
||||
# This means we likely have the wrong data. If we are in this scenario, we return nothing
|
||||
return {
|
||||
"epc_certificate": None,
|
||||
"page_source": None,
|
||||
}
|
||||
|
||||
# Retrieve the energy consumption
|
||||
bills = address_res.find('div', {'id': 'bills-affected'})
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ functions:
|
|||
- sqs:
|
||||
arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue
|
||||
batchSize: 1
|
||||
maximumConcurrency: 5 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits
|
||||
maximumConcurrency: 10 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits
|
||||
|
||||
|
||||
resources:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue