preparing data for hubspot upload

This commit is contained in:
Khalim Conn-Kowlessar 2025-03-05 12:46:15 +00:00
parent 3ab1e94ea1
commit 61eb2349ba
2 changed files with 279 additions and 1 deletions

View file

@ -376,6 +376,7 @@ class AssetList:
}
self.variable_mappings = {}
self.hubspot_data = None
self.rename_map = {}
self.keep_variables = []
@ -1526,3 +1527,221 @@ class AssetList:
flat_data = pd.DataFrame(flat_data)
self.flat_data = flat_data
def prepare_for_crm(self, contact_details, company_domain, crm_pipeline_name, first_dealstage, assigned_surveyors):
"""
This function prepares the data for upload into Hubspot
:return:
"""
# This is a placeholder for now
# This maps the opportunities as we reference them, to the product data as stored in Hubspot
product_lookup_table = {
"Non-Intrusive Data Showed Cavity Extraction": {
"name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500
},
"Non-Intrusive Data Showed Empty Cavity": {
"name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000
},
"Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed": {
"name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000
},
"Non-Intrusive Data Showed Cavity Extraction but all SAP scores allowed": {
"name": "Extract & Fill - ECO4", "id": 100307905778, "unit_price": 500
},
"EPC Data Showed Empty Cavity": {
"name": "Empty Cavity & Loft - ECO4", "id": 82733738177, "unit_price": 1000
},
"Solid Floor, Insulated, No Solar": {
"name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
},
"Solid Floor, Insulated, Needs Loft": {
"name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
},
"Other Floor, Insulated, No Solar": {
"name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
},
"Other Floor, Insulated, Needs Loft": {
"name": "Solar PV - ECO4", "id": 82623589564, "unit_price": 1608
}
}
# We check if all products are covered in the lookup table
cavity_products = self.standardised_asset_list["cavity_reason"].unique()
solar_products = self.standardised_asset_list["solar_reason"].unique()
# Check if there any options not in out lookup table
if (
any(x for x in cavity_products if x not in product_lookup_table) or
any(x for x in solar_products if x not in product_lookup_table)
):
raise ValueError("We have products not referenced in the lookup table - check this")
programme_data = self.standardised_asset_list.copy()
# Exclusions - these are properties we won't treat for the moment
product_exclusions = [
"Other Floor, Insulated, No Solar",
"Other Floor, Insulated, Needs Loft"
]
if product_exclusions:
logger.warning("Excluding products: %s", product_exclusions)
programme_data = programme_data[programme_data["solar_reason"].isin(product_exclusions) == False]
# Merge on the contact details
programme_data = programme_data.merge(
contact_details,
how="left",
left_on=self.STANDARD_LANDLORD_PROPERTY_ID,
right_on=self.landlord_property_id,
)
programme_data["Company Domain Name <COMPANY domain>"] = company_domain
# Append the product data onto the programme data
programme_data["cavity_product"] = programme_data["cavity_reason"].map(
lambda x: product_lookup_table.get(x, {"name": None})["name"]
)
programme_data["solar_product"] = programme_data["solar_reason"].map(
lambda x: product_lookup_table.get(x, {"name": None})["name"]
)
programme_data["domna_product"] = programme_data["solar_reason"].copy()
programme_data["domna_product"] = np.where(
pd.isnull(programme_data["domna_product"]),
programme_data["solar_product"],
programme_data["domna_product"]
)
# We filter just on rows where we have a product
programme_data = programme_data[
~pd.isnull(programme_data["domna_product"])
]
programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
product_df = (
pd.DataFrame(product_lookup_table).T[["name", "id", "unit_price"]]
.reset_index()
.rename(
columns={
"name": "Name <LINE_ITEM name>",
"id": 'Product ID <LINE_ITEM hs_product_id>',
"unit_price": 'Unit price <LINE_ITEM price>',
"index": "domna_product"
}
)
)
product_df['Quantity <LINE_ITEM quantity>'] = 1
# Append on the product data
programme_data = programme_data.merge(
product_df,
how="left",
on="domna_product",
)
# Add in deal and pipeline information
programme_data["dealname"] = programme_data[self.STANDARD_FULL_ADDRESS] + " : " + programme_data[
"domna_product"]
programme_data['Pipeline <DEAL pipeline>'] = crm_pipeline_name
programme_data['Deal Stage <DEAL dealstage>'] = first_dealstage
programme_data['Associations: Listing'] = "Property Owner"
programme_data = programme_data.merge(
assigned_surveyors.rename(
columns={self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID}
), how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID
)
# This maps the hubspot schema to the template. Anything that is not covered in this will be flagged
schema_mappings = {
'Name <LISTING hs_name>': self.DOMNA_PROPERTY_ID, # TODO: Maybe change this?
'Company Domain Name <COMPANY domain>': 'Company Domain Name <COMPANY domain>',
'Email <CONTACT email>': 'email', # TODO: Review
'First Name <CONTACT firstname>': 'first name', # TODO: Review
'Last Name <CONTACT lastname>': 'last name', # TODO: Review
'Phone <CONTACT phone>': 'phone', # TODO: Review
'Full Address <LISTING full_address>': self.STANDARD_FULL_ADDRESS,
'Address 1 <LISTING hs_address_1>': self.STANDARD_ADDRESS_1,
'Address 2 <LISTING hs_address_2>': None, # TODO: Don't have this for the moment
'Postcode <LISTING hs_zip>': self.STANDARD_POSTCODE,
'Property Type <LISTING property_type>': self.STANDARD_PROPERTY_TYPE,
'Property Sub Type <LISTING property_sub_type>': None, # TODO: Don't have this for the moment
'Bedroom(s) <LISTING hs_bedrooms>': None, # TODO: Don't have this for the moment
'Domna Property ID <LISTING domna_property_id>': self.DOMNA_PROPERTY_ID,
'National UPRN <LISTING national_uprn>': (
self.STANDARD_UPRN if self.STANDARD_UPRN is not None else self.EPC_API_DATA_NAMES["uprn"]
),
'Owner Property ID <LISTING owner_property_id>': self.STANDARD_LANDLORD_PROPERTY_ID,
'Wall Construction <LISTING wall_construction>': self.STANDARD_WALL_CONSTRUCTION,
'Heating System <LISTING heating_system>': self.STANDARD_HEATING_SYSTEM,
'Year Built <LISTING hs_year_built>': self.STANDARD_YEAR_BUILT,
'Boiler Make <LISTING boiler_make>': None, # TODO: Don't have this for the moment
'Boiler Model <LISTING boiler_model>': None, # TODO: Don't have this for the moment
'Non-Intrusives: Date Checked <LISTING non_intrusives__date_checked>': None,
# TODO: Don't have this for the moment
'Non-Intrusives: Wall Type <LISTING non_intrusives__wall_type>': (
"non-intrusives: Construction" if self.non_intrusives_present else None
),
'Non-intrusives: Insulation <LISTING non_intrusives__insulation>': (
"non-intrusives: Insulated" if self.non_intrusives_present else None
),
'Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>': (
"non-intrusives: Material" if self.non_intrusives_present else None
),
'Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>': (
'non-intrusives: CIGA Check Required' if self.non_intrusives_present else None
),
'Non-Intrusives: PV Access Issues <LISTING non_intrusives__access_issues>': (
'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' if self.non_intrusives_present else None
),
'Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>': (
'non-intrusives: OFF GAS - ROOF ORIENTATION' if self.non_intrusives_present else None
),
'Non-Intrusives: Surveyor Notes <LISTING non_intrusives__surveyor_notes>': (
'non-intrusives: Any further surveyor notes' if self.non_intrusives_present else None
),
'Non-Intrusives: Surveyor Name <LISTING non_intrusives__surveyor_name>': (
'non-intrusives: Surveyors Name' if self.non_intrusives_present else None
),
'CIGA: Date Requested <LISTING ciga__date_requested>': None, # TODO: Don't have this for the moment
'CIGA: Cavity Guarantee Found <LISTING ciga__cavity_guarantee_found>': None,
'Last EPC: Is Estimated <LISTING last_epc__is_estimated>': self.EPC_API_DATA_NAMES["estimated"],
'Last EPC: EPC Rating <LISTING last_epc__epc_rating>': self.EPC_API_DATA_NAMES["current-energy-rating"],
'Last EPC: SAP Rating <LISTING last_epc__sap_rating>': self.EPC_API_DATA_NAMES["current-energy-efficiency"],
'Last EPC: Main Heating Description <LISTING last_epc__main_heating_description>': self.EPC_API_DATA_NAMES[
"mainheat-description"],
'Last EPC: Heating Controls <LISTING last_epc__heating_controls>': self.EPC_API_DATA_NAMES[
"mainheatcont-description"],
'Last EPC: Lodgement Date <LISTING last_epc__lodgement_date>': self.EPC_API_DATA_NAMES["inspection-date"],
'Last EPC: Floor Area <LISTING last_epc__floor_area>': self.EPC_API_DATA_NAMES["total-floor-area"],
'Last EPC: Wall <LISTING last_epc__wall>': self.EPC_API_DATA_NAMES["walls-description"],
'Last EPC: Roof <LISTING last_epc__roof>': self.EPC_API_DATA_NAMES["roof-description"],
'Last EPC: Floor <LISTING last_epc__floor>': self.EPC_API_DATA_NAMES["floor-description"],
'Last EPC: Room Height <LISTING last_epc__room_height>': self.EPC_API_DATA_NAMES["floor-height"],
'Last EPC: Age Band <LISTING last_epc__age_band>': self.EPC_API_DATA_NAMES["construction-age-band"],
'Deal Stage <DEAL dealstage>': 'Deal Stage <DEAL dealstage>',
'Pipeline <DEAL pipeline>': 'Pipeline <DEAL pipeline>',
'Expected Commencement Date <DEAL expected_commencement_date>': None, # TODO: Need to set this,
'Deal Name <DEAL dealname>': "dealname", # Need to create this,
'Product ID <LINE_ITEM hs_product_id>': 'Product ID <LINE_ITEM hs_product_id>',
'Name <LINE_ITEM name>': 'Name <LINE_ITEM name>',
'Unit price <LINE_ITEM price>': 'Unit price <LINE_ITEM price>',
'Quantity <LINE_ITEM quantity>': 'Quantity <LINE_ITEM quantity>',
'Deal Owner': 'surveyor_email',
'Amount <DEAL amount>': 'Unit price <LINE_ITEM price>',
}
# We now create the finalised dataset to be uploaded into Hubspot
variables_required = list(schema_mappings.values())
variables_required = [v for v in variables_required if v is not None]
# We now flag anything that has a none value, which is information we haven't got right now
none_variables = [k for k, v in schema_mappings.items() if v is None]
# We'll add placeholder columns for the None variables
programme_data = programme_data[variables_required]
for col in none_variables:
programme_data[col] = None
programme_data = programme_data.rename(
columns={v: k for k, v in schema_mappings.items() if v is not None}
)
self.hubspot_data = programme_data

View file

@ -262,7 +262,25 @@ def app():
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Property Ref"
landlord_property_id = "Property ref"
# For Westward
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
# data_filename = "WESTWARD - completed list..xlsx"
# sheet_name = "Sheet1"
# postcode_column = "WFT EDIT Postcode"
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build date"
# landlord_os_uprn = "UPRN"
# landlord_property_type = "Location type"
# landlord_wall_construction = "Wall Construction (EPC)"
# landlord_heating_system = "Heat Source"
# landlord_existing_pv = "PV (Y/N)"
# landlord_property_id = "Place ref"
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
@ -454,6 +472,47 @@ def app():
asset_list.flat_analysis()
# Convert to a format suitable for CRM
contact_details = pd.DataFrame(
[
{
asset_list.landlord_property_id: "EXETEMORH0100010",
"first name": "Khalim",
"last name": "Conn-Kowlessar",
"email": "kconnkowlessar@gmail.com",
"phone": "075399248"
}
]
)
assigned_surveyors = pd.DataFrame(
[
{
asset_list.landlord_property_id: "EXETEMORH0100010",
"surveyor_name": "Khalim Conn-Kowlessar",
"surveyor_email": "khalim@domna.homes",
}
]
)
# TODO: Sort the output by postcode
company_domain = "ealing.gov.uk"
crm_pipeline_name = "Survey Management"
first_dealstage = "READY TO BEGIN SCHEDULING"
# TODO - temp, upload to either SharePoint or AWS
hubspot_template = pd.read_csv("~/Downloads/Hubspot Upload Template - Demo V2(Template).csv")
hubspot_schema = hubspot_template.columns.tolist()
asset_list.prepare_for_crm(
contact_details=contact_details,
assigned_surveyors=assigned_surveyors,
company_domain=company_domain,
crm_pipeline_name=crm_pipeline_name,
first_dealstage=first_dealstage
)
hubspt_data = asset_list.hubspot_data
# Store as an excel
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data