working on updating the HA code for new file formats

This commit is contained in:
Khalim Conn-Kowlessar 2024-02-15 10:57:57 +00:00
parent 0f60082ba1
commit b39e9c9899
4 changed files with 84 additions and 96 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -158,11 +158,6 @@ async def trigger_plan(body: PlanTriggerRequest):
property_recommendations, property_representative_recommendations
)
p.recommendations_scoring_data[0]["id"]
p.recommendations_scoring_data[0]["walls_thermal_transmittance"]
p.recommendations_scoring_data[0]["walls_thermal_transmittance_ending"]
p.recommendations_scoring_data[0]["walls_thermal_transmittance_ending"]
recommendations_scoring_data.extend(p.recommendations_scoring_data)
logger.info("Preparing data for scoring in sap change api")

View file

@ -24,27 +24,6 @@ load_dotenv(ENV_FILE)
class DataLoader:
COLOUR_CONFIG = {
"ha_1": {
"asset_list": {"red": "FFFF0000", "green": "FF00B050"},
},
"ha_6": {
"asset_list": {"red": "FFFF0000", "green": "FF00B050"},
"survey_list": {
"green": "FF92D050", "purple": "FF7030A0", "red": "FFFF0000", "blue": "FF00B0F0"
}
},
"ha_14": {
"asset_list": {"red": "FFFF0000", "green": "FF00B050"},
},
"ha_39": {
"asset_list": {"red": "FFFF0000", "green": "FF00B050"},
},
"ha_107": {
"asset_list": {"red": "FFFF0000", "green": "FF00B050"},
}
}
MIN_ROWS = {
"ha_1": 2,
"ha_6": 2,
@ -53,12 +32,87 @@ class DataLoader:
"ha_107": 2,
}
COLUMN_CONFIG = {
"ha_1": {
"address": "Address",
"postcode": "Address - Postcode"
}
}
def __init__(self, files, use_cache):
self.files = files
self.use_cache = use_cache
self.data = {}
def create_asset_list_matching_address(self, ha_name, asset_list):
if ha_name in ["ha_1", "ha_6"]:
asset_list["matching_address"] = asset_list[
self.COLUMN_CONFIG[ha_name]["address"]
].str.lower().str.strip()
asset_list["matching_postcode"] = asset_list[
self.COLUMN_CONFIG[ha_name]["postcode"]
].str.lower().str.strip()
elif ha_name == "ha_14":
# Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \
asset_list["Address 2"].str.lower().str.strip() + ", " + \
asset_list["Address 3"].str.lower().str.strip() + ", " + \
asset_list["Address 4"].str.lower().str.strip() + ", " + \
asset_list["Postcode"].str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
elif ha_name == "ha_39":
# Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
asset_list["add_2"].astype(str).str.lower().str.strip() + ", " + \
asset_list["add_3"].astype(str).str.lower().str.strip() + ", " + \
asset_list["add_4"].astype(str).str.lower().str.strip() + ", " + \
asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
asset_list["post_code"].astype(str).str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["post_code"].str.lower().str.strip()
elif ha_name == "ha_107":
# Create matching_address by concatenating House No, Street, Town, District, Postcode
asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
asset_list["Street"].str.lower().str.strip() + ", " + \
asset_list["Town"].str.lower().str.strip() + ", " + \
asset_list["District"].str.lower().str.strip() + ", " + \
asset_list["Postcode"].str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
else:
raise NotImplementedError("implement me")
return asset_list
def append_asset_list_built_form(self, ha_name, asset_list):
# Finally, we process property_type or built form, where needed
if ha_name == "ha_6":
asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6)
return asset_list
@staticmethod
def create_asset_list_house_no(ha_name, asset_list):
"""
This function will append the House number onto the asset list
:return:
"""
if ha_name in ["ha_107"]:
asset_list["HouseNo"] = asset_list["House No"].copy()
else:
split_addresses = asset_list['matching_address'].str.split(',', expand=True)
house_numbers = split_addresses[0].str.split(' ', expand=True)
# THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
# many columns there might be
house_numbers = house_numbers.iloc[:, 0:1]
house_numbers.columns = ['HouseNo']
asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1)
return asset_list
def load_asset_list(self, file_path, ha_name, sheet_name=None):
workbook = openpyxl.load_workbook(file_path)
if sheet_name is not None:
@ -87,74 +141,15 @@ class DataLoader:
# Remove entirely empty roww - consider all rows apart from row_color
asset_list = asset_list.loc[asset_list.loc[:, asset_list.columns != 'row_color'].notnull().any(axis=1)]
asset_list_colours = self.COLOUR_CONFIG[ha_name]["asset_list"]
asset_list["row_colour_name"] = np.where(
asset_list["row_color"] == asset_list_colours["red"], "red",
np.where(asset_list["row_color"] == asset_list_colours["green"], "green", "yellow")
)
asset_list["row_meaning"] = np.where(
asset_list["row_colour_name"] == "red", "does not meet criteria",
np.where(
asset_list["row_colour_name"] == "green", "identified potential eco works (CWI)", "maybe in the future"
)
)
# Add in asset_list_row_id
asset_list["asset_list_row_id"] = [ha_name + str(i) for i in range(0, len(asset_list))]
# Prepare the asset list
# Depending on the HA, we need to rename some columns
if ha_name == "ha_1":
asset_list["matching_address"] = asset_list["Address"].str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["Address - Postcode"].str.lower().str.strip()
elif ha_name == "ha_6":
asset_list["matching_address"] = asset_list["propertyaddress"].str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["Post Code"].str.lower().str.strip()
elif ha_name == "ha_14":
# Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \
asset_list["Address 2"].str.lower().str.strip() + ", " + \
asset_list["Address 3"].str.lower().str.strip() + ", " + \
asset_list["Address 4"].str.lower().str.strip() + ", " + \
asset_list["Postcode"].str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
elif ha_name == "ha_39":
# Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
asset_list["add_2"].astype(str).str.lower().str.strip() + ", " + \
asset_list["add_3"].astype(str).str.lower().str.strip() + ", " + \
asset_list["add_4"].astype(str).str.lower().str.strip() + ", " + \
asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
asset_list["post_code"].astype(str).str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["post_code"].str.lower().str.strip()
elif ha_name == "ha_107":
# Create matching_address by concatenating House No, Street, Town, District, Postcode
asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
asset_list["Street"].str.lower().str.strip() + ", " + \
asset_list["Town"].str.lower().str.strip() + ", " + \
asset_list["District"].str.lower().str.strip() + ", " + \
asset_list["Postcode"].str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
else:
raise NotImplementedError("implement me")
# Create matching address and matching postcode
asset_list = self.create_asset_list_matching_address(ha_name=ha_name, asset_list=asset_list)
if ha_name in ["ha_107"]:
asset_list["HouseNo"] = asset_list["House No"].copy()
else:
split_addresses = asset_list['matching_address'].str.split(',', expand=True)
house_numbers = split_addresses[0].str.split(' ', expand=True)
# THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
# many columns there might be
house_numbers = house_numbers.iloc[:, 0:1]
house_numbers.columns = ['HouseNo']
asset_list = self.create_asset_list_house_no(ha_name=ha_name, asset_list=asset_list)
asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1)
# Finally, we process property_type or built form, where needed
if ha_name == "ha_6":
asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6)
asset_list = self.append_asset_list_built_form(ha_name=ha_name, asset_list=asset_list)
return asset_list
@ -177,9 +172,7 @@ class DataLoader:
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
# Remove columns that are None
survey_list = survey_list.loc[:, survey_list.columns.notnull()]
survey_list["row_colour"] = survey_colors
survey_list_colours = self.COLOUR_CONFIG[ha_name]["survey_list"]
# The survey list has 4 possible colours:
# PURPLE - Installer advised install complete and a complimentary post works EPC has been completed.
@ -1252,13 +1245,13 @@ def app():
:return:
"""
use_cache = True
use_cache = False
files = {
"ha_1": {
"asset_list": {
"filepath": "etl/eligibility/ha_15_32/HA 1 - ASSET LIST.xlsx",
"sheetname": "HA 1"
"filepath": "local_data/ha_data/HA1/ACCENT GROUP.xlsx",
"sheetname": "Energy data"
}
},
"ha_6": {