Final fixes to get portfolio working

This commit is contained in:
Khalim Conn-Kowlessar 2023-11-20 11:12:34 +00:00
parent be9a960186
commit 76a03cdf23
3 changed files with 20 additions and 6 deletions

View file

@ -749,6 +749,7 @@ class Property(Definitions):
"TOTAL_FLOOR_AREA": self.floor_area,
**epc_raw_data,
"BUILT_FORM": built_form,
"POSTCODE": self.data["postcode"],
}
return property_data

View file

@ -211,11 +211,18 @@ async def trigger_plan(body: PlanTriggerRequest):
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
# Perform the same cleaning as in the model
# Perform the same cleaning as in the model - first clean number of room variables though
recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
data_to_clean=recommendations_scoring_data,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
data_to_clean=recommendations_scoring_data,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
recommendations_scoring_data = DataProcessor.clean_missings_after_description_process(
@ -303,7 +310,7 @@ async def trigger_plan(body: PlanTriggerRequest):
# 3) the recommendations
logger.info("Uploading recommendations to the database")
for i in tqdm(range(0, len(input_properties), BATCH_SIZE)):
for i in range(0, len(input_properties), BATCH_SIZE):
try:
# Take a slice of the input_properties list to make a batch
batch_properties = input_properties[i:i + BATCH_SIZE]

View file

@ -179,7 +179,6 @@ class DataProcessor:
# We have some non-standard construction age bands which we'll clean for matching
if not self.newdata:
self.standardise_construction_age_band()
self.clean_missing_rooms()
self.recast_df_columns(
@ -451,7 +450,7 @@ class DataProcessor:
self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)
@staticmethod
def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on):
def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on, colnames=None):
"""
Clean the input DataFrame using averages from a cleaning DataFrame.
@ -459,11 +458,16 @@ class DataProcessor:
:param cleaning_data: DataFrame containing data for cleaning.
:param cols_to_merge_on: Columns on which merging is based. We pass cols_to_merge_on to this function as this
differs depending on where the function is being used.
:param colnames: If specified can be used to state exactly which columns to clean
:return: Cleaned DataFrame.
"""
# The desired colnames to clean - which may not be present
if colnames is None:
colnames = ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "FIXED_LIGHTING_OUTLETS_COUNT"]
cols_to_clean = [
c for c in ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "FIXED_LIGHTING_OUTLETS_COUNT"] if
c for c in colnames if
c in data_to_clean.columns
]
@ -492,6 +496,8 @@ class DataProcessor:
for col in cols_to_clean:
data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
# If we still have missings
data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
return data_to_clean