mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Added HA25
This commit is contained in:
parent
46f5ee8ea4
commit
d9e9be4389
1 changed files with 51 additions and 28 deletions
|
|
@ -159,19 +159,18 @@ class DataLoader:
|
|||
}
|
||||
|
||||
UNMATCHED_CIGA = {
|
||||
# We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not
|
||||
# the asset list
|
||||
"HA6": 117,
|
||||
"HA14": 3,
|
||||
"HA16": 7,
|
||||
# There's just too many unmatched here
|
||||
"HA6": 117,
|
||||
"HA24": 12,
|
||||
"HA107": 51,
|
||||
}
|
||||
|
||||
def __init__(self, directories, december_figures_filepath, use_cache):
|
||||
def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
|
||||
self.directories = directories
|
||||
self.use_cache = use_cache
|
||||
self.december_figures_filepath = december_figures_filepath
|
||||
self.rebuild = rebuild
|
||||
|
||||
self.data = {}
|
||||
self.december_figures = None
|
||||
|
|
@ -312,23 +311,20 @@ class DataLoader:
|
|||
return asset_list
|
||||
|
||||
@staticmethod
|
||||
def create_ciga_list_house_no(ha_name, ciga_list):
|
||||
def create_ciga_list_house_no(ciga_list):
|
||||
"""
|
||||
This function will append the House number onto the asset list
|
||||
:return:
|
||||
"""
|
||||
|
||||
if ha_name in ["HA6", "HA14", "HA107", "HA16"]:
|
||||
split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
|
||||
house_numbers = split_addresses[0].str.split(' ', expand=True)
|
||||
# THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
|
||||
# many columns there might be
|
||||
house_numbers = house_numbers.iloc[:, 0:1]
|
||||
house_numbers.columns = ['HouseNo']
|
||||
split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
|
||||
house_numbers = split_addresses[0].str.split(' ', expand=True)
|
||||
# THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
|
||||
# many columns there might be
|
||||
house_numbers = house_numbers.iloc[:, 0:1]
|
||||
house_numbers.columns = ['HouseNo']
|
||||
|
||||
ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1)
|
||||
else:
|
||||
raise NotImplementedError("Implement me")
|
||||
ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1)
|
||||
|
||||
return ciga_list
|
||||
|
||||
|
|
@ -447,7 +443,7 @@ class DataLoader:
|
|||
# Remove rows with missing postcode which happens in a small number of cases
|
||||
ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
|
||||
ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
|
||||
ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
|
||||
ciga_list = self.create_ciga_list_house_no(ciga_list)
|
||||
ciga_list = self.dedupe_ciga_list(ciga_list)
|
||||
ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
|
||||
|
||||
|
|
@ -800,6 +796,10 @@ class DataLoader:
|
|||
"st. leodegars close", "st leodegars close"
|
||||
)
|
||||
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"montgomery crescent", "montgomery road"
|
||||
)
|
||||
|
||||
return survey_list
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -1102,16 +1102,18 @@ class DataLoader:
|
|||
for col in ["ECO4", "GBIS", "ECO4 remaining", "GBIS remaining"]:
|
||||
self.december_figures[col] = self.december_figures[col].astype("Int64")
|
||||
|
||||
if self.use_cache:
|
||||
self.data = read_pickle_from_s3(
|
||||
if self.use_cache and not self.rebuild:
|
||||
data = read_pickle_from_s3(
|
||||
bucket_name="retrofit-datalake-dev",
|
||||
s3_file_name="ha-analysis/batch3-inputs.pickle",
|
||||
)
|
||||
return
|
||||
else:
|
||||
data = {}
|
||||
|
||||
data = {}
|
||||
for filepath in self.directories:
|
||||
ha_name = filepath.split("/")[2]
|
||||
if ha_name in data:
|
||||
continue
|
||||
# Load asset list
|
||||
logger.info("Loading data for {}".format(ha_name))
|
||||
asset_list, survey_list, ciga_list = self.load_asset_list(
|
||||
|
|
@ -2635,6 +2637,10 @@ def forecast_remaining_sales(loader):
|
|||
# and I don't want the numbers to change too much, depenent on the CIGA conversation rate
|
||||
maximum_ciga_conversion = 0.75
|
||||
|
||||
# This is a hard limit to the allowed conversion rates to final sale. These are typically very
|
||||
# high but there are some anomalies, amongst surveys that are early on
|
||||
sales_conversion_lower_bound = 0.8
|
||||
|
||||
gbis_rate = 600
|
||||
eco4_rate = 1710
|
||||
# old_gbis_rate = 432
|
||||
|
|
@ -2796,14 +2802,30 @@ def forecast_remaining_sales(loader):
|
|||
eco4_ciga_independent_passrates = pd.DataFrame(eco4_ciga_independent_passrates)
|
||||
gbis_ciga_independent_passrates = pd.DataFrame(gbis_ciga_independent_passrates)
|
||||
|
||||
eco4_ciga_independent_passrates["conversion"] = (
|
||||
eco4_ciga_independent_passrates["# ECO4 successfully installed"] /
|
||||
eco4_ciga_independent_passrates["# ECO4 at install stage"]
|
||||
)
|
||||
eco4_ciga_independent_passrates_clipped = eco4_ciga_independent_passrates[
|
||||
eco4_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound
|
||||
]
|
||||
|
||||
gbis_ciga_independent_passrates["conversion"] = (
|
||||
gbis_ciga_independent_passrates["# GBIS successfully installed"] /
|
||||
gbis_ciga_independent_passrates["# GBIS at install stage"]
|
||||
)
|
||||
gbis_ciga_independent_passrates_clipped = gbis_ciga_independent_passrates[
|
||||
gbis_ciga_independent_passrates["conversion"] >= sales_conversion_lower_bound
|
||||
]
|
||||
|
||||
median_eco4_to_install = (
|
||||
eco4_ciga_independent_passrates["# ECO4 successfully installed"].sum() /
|
||||
eco4_ciga_independent_passrates["# ECO4 at install stage"].sum()
|
||||
eco4_ciga_independent_passrates_clipped["# ECO4 successfully installed"].sum() /
|
||||
eco4_ciga_independent_passrates_clipped["# ECO4 at install stage"].sum()
|
||||
)
|
||||
|
||||
median_gbis_to_install = (
|
||||
gbis_ciga_independent_passrates["# GBIS successfully installed"].sum() /
|
||||
gbis_ciga_independent_passrates["# GBIS at install stage"].sum()
|
||||
gbis_ciga_independent_passrates_clipped["# GBIS successfully installed"].sum() /
|
||||
gbis_ciga_independent_passrates_clipped["# GBIS at install stage"].sum()
|
||||
)
|
||||
|
||||
# Produce the final output
|
||||
|
|
@ -3270,6 +3292,8 @@ def app():
|
|||
use_cache = True
|
||||
# Determines if we want to perform the data pull
|
||||
pull_data = False
|
||||
# Override to re-build all inputs
|
||||
rebuild_inputs = False
|
||||
|
||||
# List all of the data in the folder
|
||||
|
||||
|
|
@ -3278,12 +3302,11 @@ def app():
|
|||
# Grab the December HA figures filepath
|
||||
december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
|
||||
|
||||
# priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA39", "HA107"]
|
||||
priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA39", "HA107"]
|
||||
priority_has = ["HA1", "HA6", "HA7", "HA14", "HA16", "HA24", "HA25", "HA39", "HA107"]
|
||||
# Filter down the directories to only the priority HAs
|
||||
directories = [d for d in directories if d.split("/")[2] in priority_has]
|
||||
|
||||
loader = DataLoader(directories, december_figures_filepath, use_cache)
|
||||
loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
|
||||
loader.load()
|
||||
loader.ha_facts_and_figures()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue