mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Debugging list loading
This commit is contained in:
parent
a45cf2f319
commit
615f2289e7
3 changed files with 29 additions and 56 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
|
||||
<component name="PythonCompatibilityInspectionAdvertiser">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -131,9 +131,17 @@ class DataLoader:
|
|||
|
||||
return ciga_list
|
||||
|
||||
@staticmethod
|
||||
def get_sheetname(workbook):
|
||||
if "Asset List" in workbook.sheetnames:
|
||||
return "Asset List"
|
||||
else:
|
||||
return "Assets"
|
||||
|
||||
def load_asset_list(self, filepath, ha_name):
|
||||
workbook = openpyxl.load_workbook(filepath)
|
||||
asset_sheet = workbook["Assets"]
|
||||
sheetname = self.get_sheetname(workbook)
|
||||
asset_sheet = workbook[sheetname]
|
||||
asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
|
||||
|
||||
rows_data = []
|
||||
|
|
@ -170,8 +178,10 @@ class DataLoader:
|
|||
# Remove columns that are None
|
||||
survey_list = survey_list.loc[:, survey_list.columns.notnull()]
|
||||
survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
|
||||
|
||||
# Perform survey list merge
|
||||
survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
|
||||
if not survey_list.empty:
|
||||
survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
|
||||
|
||||
# We check if there are CIGA checks
|
||||
ciga_list = pd.DataFrame()
|
||||
|
|
@ -185,9 +195,10 @@ class DataLoader:
|
|||
ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
|
||||
# Remove columns that are None
|
||||
ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
|
||||
ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
|
||||
# Perform ciga list merge
|
||||
ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
|
||||
if not ciga_list.empty:
|
||||
ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
|
||||
ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
|
||||
|
||||
return asset_list, survey_list, ciga_list
|
||||
|
||||
|
|
@ -208,6 +219,10 @@ class DataLoader:
|
|||
|
||||
return asset_list
|
||||
|
||||
@staticmethod
|
||||
def correct_ha39_asset_list(asset_list):
|
||||
return asset_list
|
||||
|
||||
@staticmethod
|
||||
def correct_ha6_survey_list(survey_list):
|
||||
|
||||
|
|
@ -337,6 +352,10 @@ class DataLoader:
|
|||
|
||||
return survey_list
|
||||
|
||||
@staticmethod
|
||||
def correct_ha39_survey_list(survey_list):
|
||||
return survey_list
|
||||
|
||||
def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
|
||||
|
||||
# Correct the asset list
|
||||
|
|
@ -491,23 +510,10 @@ class DataLoader:
|
|||
ha_name=ha_name,
|
||||
)
|
||||
|
||||
if file_config.get("survey_list"):
|
||||
# TODO: Delete this
|
||||
logger.info("Loading survey list for {}".format(ha_name))
|
||||
survey_list, matched_lookup = self.load_survey_list(
|
||||
asset_list=asset_list,
|
||||
file_path=file_config["survey_list"]["filepath"],
|
||||
ha_name=ha_name,
|
||||
sheet_name=file_config["survey_list"]["sheetname"]
|
||||
)
|
||||
else:
|
||||
survey_list = None
|
||||
matched_lookup = None
|
||||
|
||||
data[ha_name] = {
|
||||
"asset_list": asset_list,
|
||||
"survey_list": survey_list,
|
||||
"matched_lookup": matched_lookup
|
||||
"ciga_list": ciga_list
|
||||
}
|
||||
|
||||
self.data = data
|
||||
|
|
@ -1288,42 +1294,9 @@ def app():
|
|||
# List all of the data in the folder
|
||||
directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
|
||||
|
||||
files = {
|
||||
"ha_1": {
|
||||
"asset_list": {
|
||||
"filepath": "local_data/ha_data/HA1/ACCENT GROUP.xlsx",
|
||||
"sheetname": "Energy data"
|
||||
}
|
||||
},
|
||||
"ha_6": {
|
||||
"asset_list": {
|
||||
"filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx",
|
||||
"sheetname": "HA 6"
|
||||
},
|
||||
"survey_list": {
|
||||
"filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx",
|
||||
"sheetname": "HA 6"
|
||||
}
|
||||
},
|
||||
"ha_14": {
|
||||
"asset_list": {
|
||||
"filepath": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx",
|
||||
"sheetname": "HA 14"
|
||||
}
|
||||
},
|
||||
"ha_39": {
|
||||
"asset_list": {
|
||||
"filepath": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx",
|
||||
"sheetname": "Sheet1"
|
||||
}
|
||||
},
|
||||
"ha_107": {
|
||||
"asset_list": {
|
||||
"filepath": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx",
|
||||
"sheetname": "HA 107"
|
||||
}
|
||||
}
|
||||
}
|
||||
priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"]
|
||||
# Filter down the directories to only the priority HAs
|
||||
directories = [d for d in directories if d.split("/")[2] in priority_has]
|
||||
|
||||
loader = DataLoader(directories, use_cache)
|
||||
loader.load()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue