Source code for autopredictor.show_all

import pandas as pd
from tabulate import tabulate


[docs]
def show_all(result):
    """
    This function converts the trained regression model scores stored in a dictionary by 
    the "fit" function into a DataFrame, sorted alphabetically by model. It also 
    outputs the DataFrame in a table format.

    Parameters
    ----------
    result : dict
        A dictionary containing scoring metrics data for each regression model. The keys 
        represent model names where the values represent the scoring results and should 
        be numeric.

    Returns
    -------
    DataFrame
        A DataFrame containing all scoring metrics results alongside the corresponding 
        model, sorted alphabetically.


    Raises
    ------
    TypeError
        If result is not a dictionary.
    ValueError
        If result dictionary is empty.
        If there is invalid scoring metrics in the result dictionary's value.
        If the number of scoring metrics are not correct.

    Examples
    --------
    >>> from autopredictor.show_all import show_all
    >>> model_scores = {
        'Linear Regression': {'Mean Absolute Error': 0.453,
                            'Mean Absolute Percentage Error': 0.346,
                            'R2 Score': 0.512,
                            'Mean Squared Error': 0.567,
                            'Root Mean Squared Error': 0.987},
        'Linear Regression (L1)': {'Mean Absolute Error': 61.2,
                                    'Mean Absolute Percentage Error': 0.457,
                                    'R2 Score': 0.239,
                                    'Mean Squared Error': 0.873,
                                    'Root Mean Squared Error': 72.4}
                                }
    >>> test_scores = show_all(model_scores)        
    """

    # Check if result is of type dictionary
    if not isinstance(result, dict):
        raise TypeError("Input should be of dictionary type.")
    
    # Check if result is an empty dictionary
    if result == {}:
        raise ValueError("Input should not be an empty dictionary. No training scores are avaialble. Call fit function to test the model.")
    
    # Check if inner dictionaries have correct scoring metrics
    valid_metrics = {"MEAN ABSOLUTE ERROR", "MEAN ABSOLUTE PERCENTAGE ERROR", "R2 SCORE", "MEAN SQUARED ERROR", "ROOT MEAN SQUARED ERROR"}
    for model_name, model_score in result.items():
        if not all(metric.upper() in valid_metrics for metric in model_score):
            raise ValueError(f"Invalid scoring metrics for model.")
        if len(model_score) != 5:
            raise ValueError(f"Scoring metrics is incomplete.")
    
    # Convert scoring metrics into acronym
    key_mapping = {
        'MEAN ABSOLUTE ERROR': 'MAE',
        'MEAN ABSOLUTE PERCENTAGE ERROR': 'MAPE',
        'R2 SCORE': 'R2',
        'MEAN SQUARED ERROR': 'MSE',
        'ROOT MEAN SQUARED ERROR': 'RMSE'
    }

    for model_name, model_metric in result.items():
        updated_scores = {key_mapping[key.upper()]:value for key, value in model_metric.items()}
        result[model_name] = updated_scores
    
        
    # Convert the dictionary to a DataFrame
    result_df = pd.DataFrame.from_dict(result, orient='index')
    result_df = result_df.sort_index()

    # Convert DataFrame to tabulate and print it in table format
    result_table = tabulate(result_df, headers='keys', tablefmt='github')
    print(result_table)

    return result_df