fixed col names

This commit is contained in:
Michael Duong 2023-12-14 21:05:07 +00:00
parent 1ce9a4521f
commit eeeea467cf
3 changed files with 26 additions and 25 deletions

View file

@ -74,15 +74,15 @@ class TrainingDataset(BaseDataset):
"""
Drop features that are not needed for modelling
"""
self.df = self.df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"])
self.df = self.df.drop(columns=["lodgement_date_starting", "lodgement_date_ending"])
def _feature_generation(self):
"""
Generate features for modelling
"""
self.df["DAYS_TO_STARTING"] = self._calculate_days_to(self.df["LODGEMENT_DATE_STARTING"])
self.df["DAYS_TO_ENDING"] = self._calculate_days_to(self.df["LODGEMENT_DATE_ENDING"])
self.df["days_to_starting"] = self._calculate_days_to(self.df["lodgement_date_starting"])
self.df["day_to_ending"] = self._calculate_days_to(self.df["lodgement_date_ending"])
def _clean_efficiency_variables(self):

View file

@ -259,12 +259,12 @@ class EPCDifferenceRecord:
carbon_change = self.record2.get(CARBON_RESPONSE) - self.record1.get(CARBON_RESPONSE)
component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES
ending_record = self.record2.get(component_variables + ["LODGEMENT_DATE"], return_asdict=True, key_suffix="_ENDING")
starting_record = self.record1.get(component_variables + ["LODGEMENT_DATE"], return_asdict=True, key_suffix="_STARTING")
ending_record = self.record2.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_ending")
starting_record = self.record1.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_starting")
# TODO: Take the earliest potentials
self.difference_record = {
"uprn": self.record1.get("UPRN"),
"uprn": self.record1.get("uprn"),
"rdsap_change": rdsap_change,
"heat_demand_change": heat_demand_change,
"carbon_change": carbon_change,

View file

@ -552,37 +552,38 @@ def app():
epc_records = [EPCRecord(uprn, **x) for x in variable_data.to_dict(orient='records')]
# TODO: Make this part of a strategy pattern, as we can generate different training datasets
property_model_data, all_equal_rows = compare_consecutive_epcs(epc_records, uprn, directory, fixed_data, property_model_data, all_equal_rows)
for idx in range(0, len(epc_records) - 1):
# for idx in range(0, len(epc_records) - 1):
if idx >= len(epc_records) - 1:
break
# if idx >= len(epc_records) - 1:
# break
earliest_record: EPCRecord = epc_records[idx]
latest_record: EPCRecord = epc_records[idx + 1]
# earliest_record: EPCRecord = epc_records[idx]
# latest_record: EPCRecord = epc_records[idx + 1]
# Auto sort the records so that the record with highest RDSAP score is always record1
difference_record: EPCDifferenceRecord = latest_record - earliest_record
# # Auto sort the records so that the record with highest RDSAP score is always record1
# difference_record: EPCDifferenceRecord = latest_record - earliest_record
# TODO: Pull out RDSAP_CHANGE to a variable
if difference_record.get("RDSAP_CHANGE") == 0:
continue
# # TODO: Pull out RDSAP_CHANGE to a variable
# if difference_record.get("RDSAP_CHANGE") == 0:
# continue
all_equal = difference_record.compare_fields_in_records(
fields=CORE_COMPONENT_FEATURES
)
# all_equal = difference_record.compare_fields_in_records(
# fields=CORE_COMPONENT_FEATURES
# )
if all_equal:
# Keep track of this for the moment so we can analyse
all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
continue
# if all_equal:
# # Keep track of this for the moment so we can analyse
# all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
# continue
difference_record.append_fixed_data(fixed_data)
# difference_record.append_fixed_data(fixed_data)
property_model_data.append(difference_record)
# property_model_data.append(difference_record)
# property_model_data.append(difference_record.difference_record)