Source code for autopredictor.bestscore

import pandas as pd
from tabulate import tabulate

[docs] def display_best_score(X, scoring_metric): """ This function identifies the best score with respect to a specific scoring metric along with the corresponding model. It returns a DataFrame and displays the result in a table format. Parameters ---------- X : DataFrame A DataFrame containing all scoring metrics results alongside the corresponding model, sorted alphabetically. scoring_metric : str A string containing the regression scoring metric, which is used to display best model. Returns ------- DataFrame If the scoring metric is found, a dataframe containing the best score and the corresponding model is returned. If the scoring metric is not found, a ValueError is raised. Examples -------- >>> from autopredictor.bestscore import display_best_score >>> df = pd.DataFrame({'MAE': [5.6, 3.4], 'MSE': [9.4, 21.4], 'MAPE': [0.34, 0.45], 'R2': [0.239, 0.712]}, index=['Linear Regression', 'Random Forest']) >>> display_best_score(df, 'MAE') MAE Random Forest 3.4 >>> display_best_score(df, 'F1') ValueError: Invalid Scoring metric 'F1'.The specified metric is not in the list of available metrics. Available metrics: MAE, MSE, MAPE, R2. """ if X is None or not isinstance(X, pd.DataFrame): raise TypeError("Invalid DataFrame provided.") if X.empty: raise TypeError("DataFrame is empty.") if not isinstance(scoring_metric, str): raise ValueError("scoring_metric must be a string.") if scoring_metric not in X.columns: available_metrics = X.columns.tolist() available_metrics_string = ", ".join(available_metrics) raise ValueError (f"Invalid Scoring metric '{scoring_metric}'. The specified metric is not in the list of available metrics. Available metrics: {available_metrics_string}.") if X[scoring_metric].isnull().any(): raise ValueError(f"Invalid Scoring metric '{scoring_metric}'. The specified metric contains null values. Please handle or remove null values before using this function.") if scoring_metric == 'R2': best_model = X[scoring_metric].idxmax() best_score = X.loc[best_model, scoring_metric] else: best_model = X[scoring_metric].idxmin() best_score = X.loc[best_model, scoring_metric] result_table = pd.DataFrame({scoring_metric: [best_score]}, index=[best_model]) print(tabulate(result_table, headers='keys', tablefmt='github', showindex=True)) return result_table