mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
adding storage of dummy schema
This commit is contained in:
parent
523ca28b68
commit
96235ed3a9
7 changed files with 269 additions and 84 deletions
|
|
@ -512,7 +512,11 @@ class Property:
|
|||
return output
|
||||
|
||||
def get_components(
|
||||
self, cleaned, photo_supply_lookup, floor_area_decile_thresholds
|
||||
self,
|
||||
cleaned,
|
||||
photo_supply_lookup,
|
||||
floor_area_decile_thresholds,
|
||||
energy_consumption_client
|
||||
):
|
||||
"""
|
||||
Given the cleaning that has been performed, we'll use this to identify the property
|
||||
|
|
@ -522,6 +526,8 @@ class Property:
|
|||
of the roof that is suitable for solar panels
|
||||
:param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the
|
||||
solar pv roof area
|
||||
:param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current
|
||||
energy annual consumption in kWh
|
||||
:return:
|
||||
"""
|
||||
|
||||
|
|
@ -592,12 +598,28 @@ class Property:
|
|||
self.find_energy_sources()
|
||||
self.set_current_energy_bill()
|
||||
|
||||
def set_current_energy_bill(self):
|
||||
def set_current_energy_bill(self, energy_consumption_client):
|
||||
"""
|
||||
Given what we know about the property now, estimates the current energy consumption using the UCL paper
|
||||
https://www.sciencedirect.com/science/article/pii/S0378778823002542
|
||||
:return:
|
||||
"""
|
||||
scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
|
||||
# Change columns from underscores to hyphens
|
||||
scoring_df.columns = [
|
||||
x.lower().replace("_", "-") for x in scoring_df.columns
|
||||
]
|
||||
for col in ["heating_kwh", "hot_water_kwh"]:
|
||||
scoring_df[col] = None
|
||||
energy_consumption_client.data = None
|
||||
heating_prediction = energy_consumption_client.score_new_data(
|
||||
new_data=scoring_df, target="heating_kwh"
|
||||
)
|
||||
|
||||
hot_water_prediction = energy_consumption_client.score_new_data(
|
||||
new_data=scoring_df, target="hot_water_kwh"
|
||||
)
|
||||
|
||||
starting_heat_demand = (
|
||||
float(self.data["energy-consumption-current"]) * self.floor_area
|
||||
)
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ from recommendations.Mds import Mds
|
|||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
|
||||
from backend.ml_models.Valuation import PropertyValuation
|
||||
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
@ -262,6 +263,7 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
# TODO: insert building id
|
||||
input_properties = []
|
||||
for config in tqdm(plan_input):
|
||||
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
|
||||
|
|
@ -337,6 +339,11 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
if not input_properties:
|
||||
return Response(status_code=204)
|
||||
|
||||
# TOOD: TEMP - store locally as pickle
|
||||
# import pickle
|
||||
# with open("input_properties.pkl", "wb") as f:
|
||||
# pickle.dump(input_properties, f)
|
||||
|
||||
# The materials data could be cached or local so we don't need to make
|
||||
# consistent requests to the backend for
|
||||
# the same data
|
||||
|
|
@ -350,9 +357,30 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
|
||||
solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
|
||||
|
||||
dataset_version = "2024-07-05"
|
||||
energy_consumption_client = EnergyConsumptionModel(
|
||||
model_paths={
|
||||
"heating_kwh": f"model_directory/energy_consumption_model/heating_kwh_{dataset_version}.pkl",
|
||||
"hot_water_kwh": f"model_directory/energy_consumption_model/hot_water_kwh_{dataset_version}.pkl"
|
||||
},
|
||||
cleaned=cleaned
|
||||
)
|
||||
|
||||
# Store all of these locally
|
||||
# with open("temp_inputs.pkl", "wb") as f:
|
||||
# pickle.dump({
|
||||
# "input_properties": input_properties,
|
||||
# "materials": materials,
|
||||
# "cleaned": cleaned,
|
||||
# "uprn_filenames": uprn_filenames,
|
||||
# "photo_supply_lookup": photo_supply_lookup,
|
||||
# "floor_area_decile_thresholds": floor_area_decile_thresholds,
|
||||
# "model_client": model_client
|
||||
# }, f)
|
||||
|
||||
logger.info("Getting spatial data")
|
||||
for p in input_properties:
|
||||
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
|
||||
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client)
|
||||
p.get_spatial_data(uprn_filenames)
|
||||
# Call Google Solar API
|
||||
# TODO: Complete me
|
||||
|
|
|
|||
|
|
@ -36,4 +36,7 @@ boto3==1.28.3
|
|||
pandas==1.5.3
|
||||
pyarrow==12.0.1
|
||||
textblob
|
||||
usaddress==0.5.10
|
||||
usaddress==0.5.10
|
||||
|
||||
# Requirements we may not need
|
||||
xgboost==1.7.6
|
||||
|
|
@ -1,17 +1,14 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import msgpack
|
||||
from xgboost import XGBRegressor
|
||||
from datetime import datetime
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
|
||||
from sklearn.feature_selection import RFECV
|
||||
from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_from_s3
|
||||
import logging
|
||||
from pprint import pprint
|
||||
from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet
|
||||
from utils.logger import setup_logger
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class EnergyConsumptionModel:
|
||||
|
|
@ -61,6 +58,7 @@ class EnergyConsumptionModel:
|
|||
self.training_predictions = {}
|
||||
self.testing_predictions = {}
|
||||
self.best_iteration = {}
|
||||
self.dummy_schema = None
|
||||
|
||||
self.x_train = {}
|
||||
self.x_test = {}
|
||||
|
|
@ -81,13 +79,13 @@ class EnergyConsumptionModel:
|
|||
|
||||
def read_dataset(self, file_path):
|
||||
"""Reads the dataset from the specified file path."""
|
||||
logging.info(f"Reading dataset from {file_path}")
|
||||
logger.info(f"Reading dataset from {file_path}")
|
||||
self.data = read_dataframe_from_s3_parquet(bucket_name="retrofit-data-dev", file_key=file_path)
|
||||
self.input_data = self.data.copy()
|
||||
|
||||
def feature_engineering(self):
|
||||
def feature_engineering(self, drop_first=False):
|
||||
"""Performs feature engineering on the dataset."""
|
||||
logging.info("Starting feature engineering")
|
||||
logger.info("Starting feature engineering")
|
||||
self.data["lodgement-date"] = pd.to_datetime(self.data["lodgement-date"])
|
||||
self.data["lodgement-year"] = self.data["lodgement-date"].dt.year
|
||||
self.data["lodgement-month"] = self.data["lodgement-date"].dt.month
|
||||
|
|
@ -141,20 +139,13 @@ class EnergyConsumptionModel:
|
|||
)
|
||||
self.data = self.data.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
|
||||
|
||||
# Modify number of heated rooms and number of habitable rooms
|
||||
# self.data["number-heated-rooms"] = self.data["number-heated-rooms"].apply(
|
||||
# lambda x: "16_or_more" if x > 15 else str(x)
|
||||
# )
|
||||
# self.data["number-habitable-rooms"] = self.data["number-habitable-rooms"].apply(
|
||||
# lambda x: "10+" if x > 10 else str(x)
|
||||
# )
|
||||
|
||||
# Convert data types
|
||||
self.data[self.NUMERICAL_COLUMNS] = self.data[self.NUMERICAL_COLUMNS].apply(pd.to_numeric)
|
||||
self.data[self.CATEGORICAL_COLUMNS] = self.data[self.CATEGORICAL_COLUMNS].astype(str)
|
||||
|
||||
# Convert categorical columns to dummies
|
||||
self.data = pd.get_dummies(self.data, columns=self.CATEGORICAL_COLUMNS, drop_first=True)
|
||||
self.data = pd.get_dummies(self.data, columns=self.CATEGORICAL_COLUMNS, drop_first=drop_first)
|
||||
self.dummy_schema = self.data.columns.tolist()
|
||||
|
||||
# Store the dummy columns
|
||||
self.dummy_columns = {}
|
||||
|
|
@ -168,14 +159,14 @@ class EnergyConsumptionModel:
|
|||
dummy_feature_columns.append(feature)
|
||||
self.dummy_columns[target] = dummy_feature_columns
|
||||
|
||||
logging.info("Feature engineering completed")
|
||||
logger.info("Feature engineering completed")
|
||||
|
||||
def split_dataset(self, target, test_size=0.2, validation_size=0.2, random_state=42):
|
||||
"""Splits the dataset into training, validation, and testing sets."""
|
||||
if target not in self.TARGETS:
|
||||
raise ValueError(f"Target {target} not in {self.TARGETS}")
|
||||
|
||||
logging.info(f"Splitting dataset for target {target}")
|
||||
logger.info(f"Splitting dataset for target {target}")
|
||||
|
||||
# Split into train + validation and test sets
|
||||
x_train_val, x_test, y_train_val, y_test = train_test_split(
|
||||
|
|
@ -209,7 +200,7 @@ class EnergyConsumptionModel:
|
|||
if target not in self.TARGETS:
|
||||
raise ValueError(f"Target {target} not in {self.TARGETS}")
|
||||
|
||||
logging.info(f"Starting feature selection for target {target}")
|
||||
logger.info(f"Starting feature selection for target {target}")
|
||||
|
||||
# Sample the data if specified
|
||||
if sample_fraction < 1.0:
|
||||
|
|
@ -236,7 +227,7 @@ class EnergyConsumptionModel:
|
|||
self.x_test[target] = self.x_test[target][self.selected_features[target]]
|
||||
self.x_val[target] = self.x_val[target][self.selected_features[target]]
|
||||
|
||||
logging.info(f"Feature selection completed for target {target}")
|
||||
logger.info(f"Feature selection completed for target {target}")
|
||||
|
||||
def init_model(self, feature_selection=False):
|
||||
|
||||
|
|
@ -269,7 +260,7 @@ class EnergyConsumptionModel:
|
|||
def fit_model(self, target):
|
||||
"""Fits the model to the training data and removes zero-importance features."""
|
||||
|
||||
logging.info(f"Fitting model for target {target}")
|
||||
logger.info(f"Fitting model for target {target}")
|
||||
|
||||
# Initialize and fit the model
|
||||
model = self.init_model()
|
||||
|
|
@ -291,7 +282,7 @@ class EnergyConsumptionModel:
|
|||
zero_importance_features = feature_importance[feature_importance['Importance'] == 0]['Feature'].tolist()
|
||||
|
||||
if zero_importance_features:
|
||||
logging.info(f"Removing zero-importance features for target {target}: {zero_importance_features}")
|
||||
logger.info(f"Removing zero-importance features for target {target}: {zero_importance_features}")
|
||||
|
||||
self.x_train[target] = self.x_train[target].drop(columns=zero_importance_features)
|
||||
self.x_val[target] = self.x_val[target].drop(columns=zero_importance_features)
|
||||
|
|
@ -312,22 +303,22 @@ class EnergyConsumptionModel:
|
|||
# Store the best iteration
|
||||
self.best_iteration[target] = self.models[target].best_iteration
|
||||
|
||||
logging.info(f"Model fitting completed for target {target}")
|
||||
logger.info(f"Model fitting completed for target {target}")
|
||||
|
||||
def re_train_final_model(self, target):
|
||||
"""Re-trains the final model on the combined training and validation set."""
|
||||
logging.info(f"Re-training final model for target {target}")
|
||||
logger.info(f"Re-training final model for target {target}")
|
||||
x_train_val = pd.concat([self.x_train[target], self.x_val[target]])
|
||||
y_train_val = pd.concat([self.y_train[target], self.y_val[target]])
|
||||
|
||||
self.models[target] = self.init_model()
|
||||
|
||||
self.models[target].fit(x_train_val, y_train_val, verbose=False)
|
||||
logging.info(f"Re-training final model completed for target {target}")
|
||||
logger.info(f"Re-training final model completed for target {target}")
|
||||
|
||||
def evaluate_model(self, target):
|
||||
"""Evaluates the model on training and testing data."""
|
||||
logging.info(f"Evaluating model for target {target}")
|
||||
logger.info(f"Evaluating model for target {target}")
|
||||
y_train_pred = self.models[target].predict(self.x_train[target])
|
||||
train_mse = mean_squared_error(self.y_train[target], y_train_pred)
|
||||
train_r2 = r2_score(self.y_train[target], y_train_pred)
|
||||
|
|
@ -365,7 +356,7 @@ class EnergyConsumptionModel:
|
|||
'Importance': self.models[target].feature_importances_
|
||||
}).sort_values(by='Importance', ascending=False)
|
||||
|
||||
logging.info(f"Evaluation completed for target {target}")
|
||||
logger.info(f"Evaluation completed for target {target}")
|
||||
|
||||
return {
|
||||
'train': {
|
||||
|
|
@ -381,14 +372,19 @@ class EnergyConsumptionModel:
|
|||
}
|
||||
}
|
||||
|
||||
def save_model(self, target):
|
||||
def save_model(self, target, dataset_version):
|
||||
"""Saves the model to S3."""
|
||||
logging.info(f"Saving model for target {target}")
|
||||
run_date = datetime.now().strftime("%Y-%m-%d")
|
||||
logger.info(f"Saving model for target {target}")
|
||||
save_pickle_to_s3(
|
||||
self.models[target],
|
||||
bucket_name="retrofit-model-directory-dev",
|
||||
s3_file_name=f"model_directory/energy_consumption_model/{target}_{run_date}.pkl"
|
||||
s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}.pkl"
|
||||
)
|
||||
logger.info("Saving dummy schema for target {target}")
|
||||
save_pickle_to_s3(
|
||||
self.dummy_schema,
|
||||
bucket_name="retrofit-model-directory-dev",
|
||||
s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}_dummy_schema.pkl"
|
||||
)
|
||||
|
||||
def score_new_data(self, new_data, target):
|
||||
|
|
@ -404,57 +400,24 @@ class EnergyConsumptionModel:
|
|||
self.data = new_data.copy()
|
||||
|
||||
# Run feature engineering
|
||||
self.feature_engineering()
|
||||
# TODO: This needs to be dummied out according to the training data
|
||||
self.feature_engineering(drop_first=False)
|
||||
|
||||
# Select the transformed data
|
||||
new_data_transformed = self.data[self.dummy_columns[target]]
|
||||
new_data_transformed = self.data[self.dummy_columns[target]].copy()
|
||||
|
||||
# Ensure the columns match the selected features
|
||||
new_data_transformed = new_data_transformed[self.selected_features[target]]
|
||||
missed_dummies = [c for c in self.models[target].feature_names_in_ if c not in new_data_transformed.columns]
|
||||
zero_df = pd.DataFrame([dict(zip(missed_dummies, [0, ] * len(missed_dummies)))])
|
||||
|
||||
new_data_transformed = pd.concat([new_data_transformed, zero_df], axis=1)
|
||||
# When we dummy in this case, we run with drop_first = False so we may end up with some of those
|
||||
# first columns, we we'll need to dorp them
|
||||
new_data_transformed = new_data_transformed[self.models[target].feature_names_in_]
|
||||
|
||||
# Generate predictions
|
||||
predictions = self.models[target].predict(new_data_transformed)
|
||||
prediction = self.models[target].predict(new_data_transformed)
|
||||
|
||||
# Reset self.data to None
|
||||
self.data = None
|
||||
|
||||
return predictions
|
||||
|
||||
|
||||
# Usage:
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
|
||||
model.read_dataset('energy_consumption/2024-07-05/energy_consumption_dataset.parquet')
|
||||
model.feature_engineering()
|
||||
|
||||
# For heating_kwh
|
||||
model.split_dataset(target='heating_kwh')
|
||||
model.fit_model(target='heating_kwh')
|
||||
model.re_train_final_model(target='heating_kwh')
|
||||
evaluation_results = model.evaluate_model(target='heating_kwh')
|
||||
|
||||
pprint(evaluation_results["train"])
|
||||
pprint(evaluation_results["test"])
|
||||
|
||||
importance_df = evaluation_results["train"]["Feature Importance"]
|
||||
testing_predictions = model.testing_predictions["heating_kwh"]
|
||||
testing_predictions = testing_predictions.sort_values("residual", ascending=False)
|
||||
training_predictions = model.training_predictions["heating_kwh"]
|
||||
training_predictions = training_predictions.sort_values("residual", ascending=False)
|
||||
# Merge on model.input_data, by the index
|
||||
merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
|
||||
merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
|
||||
|
||||
# For hot_water_kwh
|
||||
model.split_dataset(target='hot_water_kwh')
|
||||
model.fit_model(target='hot_water_kwh')
|
||||
model.re_train_final_model(target='hot_water_kwh')
|
||||
evaluation_results = model.evaluate_model(target='hot_water_kwh')
|
||||
pprint(evaluation_results["train"])
|
||||
pprint(evaluation_results["test"])
|
||||
return prediction
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ def app():
|
|||
energy_consumption_data = []
|
||||
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
|
||||
# Skip the first 50
|
||||
if i < 36:
|
||||
if i < 260:
|
||||
continue
|
||||
|
||||
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,56 @@
|
|||
def hanlder():
|
||||
from pprint import pprint
|
||||
import msgpack
|
||||
from utils.s3 import read_from_s3
|
||||
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
|
||||
|
||||
|
||||
def handler():
|
||||
"""
|
||||
This function is used to train the model and store the final models in s3 as pickles
|
||||
:return:
|
||||
"""
|
||||
|
||||
dataset_version = "2024-07-05"
|
||||
|
||||
# Usage:
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
|
||||
model.read_dataset(f'energy_consumption/{dataset_version}/energy_consumption_dataset.parquet')
|
||||
model.feature_engineering()
|
||||
|
||||
# For heating_kwh
|
||||
model.split_dataset(target='heating_kwh')
|
||||
model.fit_model(target='heating_kwh')
|
||||
model.re_train_final_model(target='heating_kwh')
|
||||
evaluation_results = model.evaluate_model(target='heating_kwh')
|
||||
|
||||
pprint(evaluation_results["train"])
|
||||
pprint(evaluation_results["test"])
|
||||
|
||||
model.save_model(target='heating_kwh', dataset_version=dataset_version)
|
||||
|
||||
# importance_df = evaluation_results["train"]["Feature Importance"]
|
||||
# testing_predictions = model.testing_predictions["heating_kwh"]
|
||||
# testing_predictions = testing_predictions.sort_values("residual", ascending=False)
|
||||
# training_predictions = model.training_predictions["heating_kwh"]
|
||||
# training_predictions = training_predictions.sort_values("residual", ascending=False)
|
||||
# # Merge on model.input_data, by the index
|
||||
# merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
|
||||
# merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
|
||||
|
||||
# For hot_water_kwh
|
||||
model.split_dataset(target='hot_water_kwh')
|
||||
model.fit_model(target='hot_water_kwh')
|
||||
model.re_train_final_model(target='hot_water_kwh')
|
||||
evaluation_results = model.evaluate_model(target='hot_water_kwh')
|
||||
|
||||
pprint(evaluation_results["train"])
|
||||
pprint(evaluation_results["test"])
|
||||
|
||||
model.save_model(target='hot_water_kwh', dataset_version=dataset_version)
|
||||
|
|
|
|||
118
etl/customers/places_for_people/demo_portfolio.py
Normal file
118
etl/customers/places_for_people/demo_portfolio.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
import pandas as pd
|
||||
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
PORTFOLIO_ID = 83
|
||||
USER_ID = 8
|
||||
|
||||
|
||||
def app():
|
||||
# TODO: We can insert a variable, indicating the they own all of the units in the building
|
||||
asset_list = [
|
||||
{
|
||||
"address": "Flat 1, Fenton Court",
|
||||
"postcode": "N2 8DS",
|
||||
"uprn": 200140644,
|
||||
"building_id": 1,
|
||||
},
|
||||
{
|
||||
"address": "Flat 2, Fenton Court",
|
||||
"postcode": "N2 8DS",
|
||||
"uprn": 200140645,
|
||||
"building_id": 1,
|
||||
},
|
||||
{
|
||||
"address": "Flat 3, Fenton Court",
|
||||
"postcode": "N2 8DS",
|
||||
"uprn": 200140646,
|
||||
"building_id": 1,
|
||||
},
|
||||
{
|
||||
"address": "Flat 4, Fenton Court",
|
||||
"postcode": "N2 8DS",
|
||||
"uprn": 200140647,
|
||||
"building_id": 1,
|
||||
},
|
||||
{
|
||||
"address": "Flat 5, Fenton Court",
|
||||
"postcode": "N2 8DS",
|
||||
"uprn": 200140648,
|
||||
"building_id": 1,
|
||||
},
|
||||
{
|
||||
"address": "Flat 6, Fenton Court",
|
||||
"postcode": "N2 8DS",
|
||||
"uprn": 200140649,
|
||||
"building_id": 1,
|
||||
}
|
||||
]
|
||||
|
||||
asset_list = pd.DataFrame(asset_list)
|
||||
|
||||
# Store the asset list in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=asset_list,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Private",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "B",
|
||||
"trigger_file_path": filename,
|
||||
"already_installed_file_path": "",
|
||||
"patches_file_path": "",
|
||||
"non_invasive_recommendations_file_path": "",
|
||||
"budget": None,
|
||||
}
|
||||
print(body)
|
||||
|
||||
# Get an example of flats with solar panels from epc data
|
||||
|
||||
# import inspect
|
||||
# import pandas as pd
|
||||
# from tqdm import tqdm
|
||||
# from pathlib import Path
|
||||
#
|
||||
# src_file_path = inspect.getfile(lambda: None)
|
||||
#
|
||||
# EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
|
||||
#
|
||||
# epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
#
|
||||
# directory = epc_directories[1]
|
||||
# data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
||||
# # Get flats
|
||||
# data = data[data["PROPERTY_TYPE"].str.lower().str.contains("flat")]
|
||||
# data = data[~pd.isnull(data["UPRN"])]
|
||||
# data["UPRN"] = data["UPRN"].astype(int).astype(str)
|
||||
# data = data[pd.to_datetime(data["LODGEMENT_DATE"]) > "2020-01-01"]
|
||||
# flats_with_solar = data[data['PHOTO_SUPPLY'] > 0]
|
||||
#
|
||||
# print(flats_with_solar["UPRN"])
|
||||
#
|
||||
# flats_with_solar[["ADDRESS", "UPRN"]]
|
||||
#
|
||||
# # Good example:
|
||||
# # UPRN: 10013160824, Flat 39, The Meadow, 30 Busk Meadow S5 7JH (care home with 39 flats, have solar panels)
|
||||
# #
|
||||
# # Mostly, For a mid-floor flat, the property doesn't show as having solar panels through the photo_supply variable
|
||||
# # But actually for UPRN: 10013245713, Apartment 4, Orchard House, Gill Lane PR4 5QN, this has a dwelling above
|
||||
# # but the photo_supply variable is 20
|
||||
#
|
||||
# # Small flat consisting of 2 units
|
||||
# # UPRN: 42172953, FLAT 2, 276 CLAUGHTON ROAD, BIRKENHEAD CH41 4DX
|
||||
#
|
||||
# # Flat containing 5 units
|
||||
# # UPRN: 10013247127 Flat 1, Old Church House PR4 5GE
|
||||
# # UPRN: 10013247130 Flat 4, Old Church House PR4 5GE
|
||||
#
|
||||
# # Flat containing multiple units:
|
||||
# # UPRNS: 10013245710, 10013245716, 10013245711, 10013245717, 10013245714, 10013245715, 10013245712, 10013245713
|
||||
#
|
||||
# # Look for flats with air source heat pumps!
|
||||
# flats_with_asps = data[data["MAINHEAT_DESCRIPTION"].str.lower().str.contains("air source heat pump")]
|
||||
# print(flats_with_asps[["UPRN", "ADDRESS"]])
|
||||
Loading…
Add table
Reference in a new issue