mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Final fixes to get portfolio working
This commit is contained in:
parent
be9a960186
commit
76a03cdf23
3 changed files with 20 additions and 6 deletions
|
|
@ -749,6 +749,7 @@ class Property(Definitions):
|
|||
"TOTAL_FLOOR_AREA": self.floor_area,
|
||||
**epc_raw_data,
|
||||
"BUILT_FORM": built_form,
|
||||
"POSTCODE": self.data["postcode"],
|
||||
}
|
||||
|
||||
return property_data
|
||||
|
|
|
|||
|
|
@ -211,11 +211,18 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
logger.info("Preparing data for scoring in sap change api")
|
||||
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=recommendations_scoring_data,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=recommendations_scoring_data,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
recommendations_scoring_data = DataProcessor.clean_missings_after_description_process(
|
||||
|
|
@ -303,7 +310,7 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
# 3) the recommendations
|
||||
|
||||
logger.info("Uploading recommendations to the database")
|
||||
for i in tqdm(range(0, len(input_properties), BATCH_SIZE)):
|
||||
for i in range(0, len(input_properties), BATCH_SIZE):
|
||||
try:
|
||||
# Take a slice of the input_properties list to make a batch
|
||||
batch_properties = input_properties[i:i + BATCH_SIZE]
|
||||
|
|
|
|||
|
|
@ -179,7 +179,6 @@ class DataProcessor:
|
|||
# We have some non-standard construction age bands which we'll clean for matching
|
||||
if not self.newdata:
|
||||
self.standardise_construction_age_band()
|
||||
|
||||
self.clean_missing_rooms()
|
||||
|
||||
self.recast_df_columns(
|
||||
|
|
@ -451,7 +450,7 @@ class DataProcessor:
|
|||
self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)
|
||||
|
||||
@staticmethod
|
||||
def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on):
|
||||
def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on, colnames=None):
|
||||
"""
|
||||
Clean the input DataFrame using averages from a cleaning DataFrame.
|
||||
|
||||
|
|
@ -459,11 +458,16 @@ class DataProcessor:
|
|||
:param cleaning_data: DataFrame containing data for cleaning.
|
||||
:param cols_to_merge_on: Columns on which merging is based. We pass cols_to_merge_on to this function as this
|
||||
differs depending on where the function is being used.
|
||||
:param colnames: If specified can be used to state exactly which columns to clean
|
||||
:return: Cleaned DataFrame.
|
||||
"""
|
||||
|
||||
# The desired colnames to clean - which may not be present
|
||||
if colnames is None:
|
||||
colnames = ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "FIXED_LIGHTING_OUTLETS_COUNT"]
|
||||
|
||||
cols_to_clean = [
|
||||
c for c in ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "FIXED_LIGHTING_OUTLETS_COUNT"] if
|
||||
c for c in colnames if
|
||||
c in data_to_clean.columns
|
||||
]
|
||||
|
||||
|
|
@ -492,6 +496,8 @@ class DataProcessor:
|
|||
for col in cols_to_clean:
|
||||
data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
|
||||
data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
|
||||
# If we still have missings
|
||||
data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
|
||||
|
||||
return data_to_clean
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue