mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
minor
This commit is contained in:
parent
8c711c9658
commit
d65ce731c0
2 changed files with 49 additions and 44 deletions
|
|
@ -132,51 +132,56 @@ def app():
|
|||
|
||||
energy_consumption_data = []
|
||||
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
|
||||
|
||||
# Skip the first 50
|
||||
if i < 18:
|
||||
continue
|
||||
|
||||
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
||||
# Rename the columns to the same format as the api returns
|
||||
data.columns = [c.replace("_", "-").lower() for c in data.columns]
|
||||
|
||||
# Take just date before the date threshold
|
||||
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
|
||||
|
||||
data = data[~pd.isnull(data["uprn"])]
|
||||
# Take just the newest EPC per uprn, based on lodgement-date
|
||||
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
|
||||
|
||||
data = data.sample(sample_size, replace=False)
|
||||
# We use the addreess data to find the related information
|
||||
|
||||
collected_data = []
|
||||
for _, property_data in data.iterrows():
|
||||
time.sleep(np.random.uniform(0.2, 1.5))
|
||||
|
||||
uprn = int(property_data["uprn"])
|
||||
address = property_data["address1"]
|
||||
postcode = property_data["postcode"]
|
||||
expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
|
||||
|
||||
response = retrieve_find_my_epc_data(
|
||||
uprn=uprn,
|
||||
postcode=postcode,
|
||||
address=address,
|
||||
expected_expiry_date=expected_expiry_date
|
||||
)
|
||||
if response is None:
|
||||
try:
|
||||
# Skip the first 50
|
||||
if i < 40:
|
||||
continue
|
||||
collected_data.append(
|
||||
{
|
||||
**response,
|
||||
"epc": property_data.to_dict(),
|
||||
"epc_directory": str(directory)
|
||||
}
|
||||
)
|
||||
|
||||
energy_consumption_data.extend(collected_data)
|
||||
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
||||
# Rename the columns to the same format as the api returns
|
||||
data.columns = [c.replace("_", "-").lower() for c in data.columns]
|
||||
|
||||
# Take just date before the date threshold
|
||||
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
|
||||
|
||||
data = data[~pd.isnull(data["uprn"])]
|
||||
# Take just the newest EPC per uprn, based on lodgement-date
|
||||
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
|
||||
|
||||
data = data.sample(sample_size, replace=False)
|
||||
# We use the addreess data to find the related information
|
||||
|
||||
collected_data = []
|
||||
for _, property_data in data.iterrows():
|
||||
time.sleep(np.random.uniform(0.2, 1.5))
|
||||
|
||||
uprn = int(property_data["uprn"])
|
||||
address = property_data["address1"]
|
||||
postcode = property_data["postcode"]
|
||||
expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
|
||||
|
||||
response = retrieve_find_my_epc_data(
|
||||
uprn=uprn,
|
||||
postcode=postcode,
|
||||
address=address,
|
||||
expected_expiry_date=expected_expiry_date
|
||||
)
|
||||
if response is None:
|
||||
continue
|
||||
collected_data.append(
|
||||
{
|
||||
**response,
|
||||
"epc": property_data.to_dict(),
|
||||
"epc_directory": str(directory)
|
||||
}
|
||||
)
|
||||
|
||||
energy_consumption_data.extend(collected_data)
|
||||
except Exception as e:
|
||||
print(f"Error for directory {directory}: {e}")
|
||||
# If we have an error, then we wait for a bit since it's likely due to timeout
|
||||
time.sleep(300)
|
||||
continue
|
||||
|
||||
# Store the pickle in s3
|
||||
save_time = datetime.now()
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from pprint import pprint
|
||||
import msgpack
|
||||
from utils.s3 import read_from_s3
|
||||
from training_data.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
|
||||
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
|
||||
|
||||
|
||||
def handler():
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue