import pandas as pd from model_data.utils import setup_logger logger = setup_logger() class Need: """ Contains methods to read and interface with the NEED dataset. Current iterations of this data is the 2021 anonymised dataset, which can be found here: https://www.gov.uk/government/statistics/national-energy-efficiency-data-framework-need-anonymised-data-2021 """ def __init__(self, local_authorities, path): self.local_authorities = local_authorities self.path = path def read(self) -> pd.DataFrame: """ Reads the NEED dataset from a csv file. :param path: path to the csv file :return: pandas dataframe containing the data """ logger.info("Reading NEED data - could take a moment") df = pd.read_csv(self.path) df = df[df["REGION"].isin(self.local_authorities)] z = df[df["REGION"].str.contains("E9")] type(df["REGION"].values[0]) return df