creating checking code for Stonewater

This commit is contained in:
Khalim Conn-Kowlessar 2024-12-10 18:55:30 +00:00
parent 3c98cfa7cc
commit 2b7ca82d09

View file

@ -2699,28 +2699,6 @@ def identify_incorrect_pacakges():
# The next check is to identify properties with specific features that are not condusive to specific packages. E.g.
# Solar PV packages for properties that have another dwelling above
z = units_with_assigned_packages[
units_with_assigned_packages["Package Ref"].isin(
[
"3A", "3B", "4", 4
]
)
]
z["Roof Type"].value_counts()
z["Survey: Main Roof Type"].value_counts()
z[z["Survey: Main Roof Type"].str.contains("A Another dwelling above")][
"Survey: Matching Address ID"].value_counts()
zz = z[z["Survey: Main Roof Type"].str.contains("A Another dwelling above")][
["Survey: Matching Address ID", "Survey: Org. ref.", "Survey: Main Roof Type"]
].drop_duplicates()
zz = zz.sort_values("Survey: Matching Address ID")
zz.to_csv(os.path.join(CUSTOMER_FOLDER_PATH, "3A, 3B or 4 Packages with a dwelling above.csv"), index=False)
z[z["Survey: Main Roof Type"].str.contains("A Another dwelling above")]["Package Ref"].value_counts()
# Label properties that have been matched to a package, during coordination, that includes Solar PV and has
# a property with a dwelling above
units_with_assigned_packages["Invalid Roof Type for Solar - coordination to be reviewed"] = (
@ -2731,6 +2709,60 @@ def identify_incorrect_pacakges():
# Label properties that have a dwelling above in the Parity data, and weren't surveyed, but have been assigned
# a package that includes solar PV
units_with_assigned_packages["Invalid Roof Type for Solar - coordination to be reviewed"] = (
(units_with_assigned_packages["Package Ref"].isin(["3A", "3B", "4", 4])) & (
units_with_assigned_packages["Survey: Main Roof Type"].str.contains("A Another dwelling above")
)
)
# We now iterate through postcodes and find anomalous properties based on the partiy data and survey data
fields_to_check = [
'Wall Type', 'Roof Type', 'Heating', 'Main Fuel',
'Survey: Main Wall Type',
'Survey: Main Roof Type', 'Survey: Primary Heating System'
]
# Create an empty dictionary to store results
aggregated_results = {}
units_with_assigned_packages['Wall Type'] = units_with_assigned_packages['Wall Type'].str.replace(
r'\s*\(.*?\)', '', regex=True
)
def check_mixed_types(row):
# Count distinct primary types with non-zero values
primary_types_present = set()
for col in field_counts.columns:
if ':' in col:
primary_type = col.split(':')[0]
if row[col] > 0: # Non-zero count means this type is present
primary_types_present.add(primary_type)
return len(primary_types_present) > 1 # True if more than one primary type
# Process each field
for field in fields_to_check:
# Group by postcode and count occurrences of each unique value
field_counts = (
units_with_assigned_packages.groupby(['Postcode', field])
.size()
.unstack(fill_value=0)
.reset_index()
)
# Calculate dominant value and percentage before modifying the DataFrame
dominant_value = field_counts.iloc[:, 1:].idxmax(axis=1)
dominant_percentage = (
(field_counts.iloc[:, 1:].max(axis=1) / field_counts.iloc[:, 1:].sum(axis=1)) * 100
)
number_of_properties = field_counts.iloc[:, 1:].sum(axis=1)
# Add these as new columns after computation
field_counts['Dominant Value'] = dominant_value
field_counts['% Dominant'] = dominant_percentage
field_counts['Number of Properties'] = number_of_properties
field_counts['Mixed Type'] = field_counts.apply(check_mixed_types, axis=1)
# Store the result in the dictionary
aggregated_results[field] = field_counts
# if __name__ == "__main__":
# main()