mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
prepared sap model dataset
This commit is contained in:
parent
8e65ac05cf
commit
d586441769
1 changed files with 19 additions and 6 deletions
|
|
@ -72,6 +72,7 @@ class SalModel:
|
|||
self.df = pd.DataFrame(data)
|
||||
self.cleaner = cleaner
|
||||
|
||||
self.model_data = None
|
||||
self.train_x = None
|
||||
self.train_y = None
|
||||
self.results = None
|
||||
|
|
@ -128,6 +129,15 @@ class SalModel:
|
|||
how="left",
|
||||
left_on="roof-description",
|
||||
right_on="original_description"
|
||||
).drop(
|
||||
columns=["original_description"]
|
||||
).merge(
|
||||
lighting_proportions,
|
||||
how="left",
|
||||
left_on="lighting-description",
|
||||
right_on="original_description"
|
||||
).drop(
|
||||
columns=["original_description"]
|
||||
)
|
||||
|
||||
return model_data
|
||||
|
|
@ -141,13 +151,11 @@ class SalModel:
|
|||
@staticmethod
|
||||
def _clean_numericals(model_data):
|
||||
|
||||
for col in ["photo-supply", "multi-glaze-proportion", "low-energy-lighting"]:
|
||||
for col in ["photo-supply", "multi-glaze-proportion", "low-energy-lighting", "number-open-fireplaces"]:
|
||||
model_data[col] = np.where(
|
||||
model_data[col] == "", "0", model_data["photo-supply"]
|
||||
).astype(float)
|
||||
|
||||
# We need to clean lighting
|
||||
|
||||
return model_data
|
||||
|
||||
def create_dataset(self):
|
||||
|
|
@ -176,9 +184,9 @@ class SalModel:
|
|||
exclude_features = ["walls-description", "floor-description", "roof-description", "transaction-type"]
|
||||
|
||||
features = [
|
||||
x for x in self.BASE_FEATURES +
|
||||
self.COMPONENT_FEATURES +
|
||||
["walls_u_value", "floor_u_value", "roof_u_value", self.RESPONSE] if x not in exclude_features
|
||||
x for x in self.BASE_FEATURES + self.COMPONENT_FEATURES + [
|
||||
"walls_u_value", "floor_u_value", "roof_u_value", self.RESPONSE
|
||||
] if x not in exclude_features
|
||||
]
|
||||
|
||||
model_data = model_data[features]
|
||||
|
|
@ -186,6 +194,11 @@ class SalModel:
|
|||
for col in self.CATEGORICAL_COLS:
|
||||
model_data[col] = model_data[col].astype('category')
|
||||
|
||||
# Convert response
|
||||
self.model_data[self.RESPONSE] = self.model_data[self.RESPONSE].astype(float)
|
||||
|
||||
self.model_data = model_data
|
||||
|
||||
def make_training_test(self):
|
||||
# Split into training and test
|
||||
# Dummy data
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue