Source code for autopredictor.fit

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import LinearSVR, SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percentage_error, mean_squared_error
import numpy as np
import pandas as pd


[docs] def fit(X_train,X_test,y_train,y_test,return_train=False): """ Train and evaluate multiple regression models on the given training and test data. Parameters ---------- X_train : DataFrame Training data features. X_test : DataFrame Test data features. y_train : Series Training data target values. y_test : Series Test data target values. return_train : bool, optional, default=False If True, returns scores for training data as well. Raises ------ ValueError If any of the inputs are empty or None. Returns ------- tuple of dict A tuple containing dictionaries with performance scores for each model and metric. The first dictionary contains scores for test data, and the second for training data. Examples -------- >>> X_train = pd.DataFrame({'feature1': [1, 2, 3], 'feature2': [4, 5, 6]}) >>> y_train = pd.Series([10, 20, 30]) >>> X_test = pd.DataFrame({'feature1': [7, 8, 9], 'feature2': [10, 11, 12]}) >>> y_test = pd.Series([40, 50, 60]) >>> scores_train, scores_test = fit(X_train, X_test, y_train, y_test, return_train=True) >>> print(scores_train['Linear Regression']['Mean Absolute Error']) 0.0 >>> print(scores_test['Linear Regression']['R2 Score']) -3.0 """ if X_train is not None and X_test is not None and y_train is not None and y_test is not None: scores_list_train = {} scores_list_test = {} models = { 'Linear Regression': LinearRegression(), 'Linear Regression (L1)': Lasso(), 'Linear Regression (L2)': Ridge(), 'Linear Support Vector Machine': LinearSVR(), 'Support Vector Machine': SVR(), 'Decision Tree': DecisionTreeRegressor(), 'Random Forest': RandomForestRegressor(), 'Gradient Boosting': GradientBoostingRegressor(), 'AdaBoost': AdaBoostRegressor() } for name, model in models.items(): model.fit(X_train,y_train) print(f'{name} trained.') scores_list_train[name] = {} # Train scores if return_train: metrics_train = { 'Mean Absolute Error': mean_absolute_error(y_train, model.predict(X_train)), 'Mean Absolute Percentage Error': mean_absolute_percentage_error(y_train, model.predict(X_train)), 'R2 Score': r2_score(y_train, model.predict(X_train)), 'Mean Squared Error': mean_squared_error(y_train, model.predict(X_train)), 'Root Mean Squared Error': np.sqrt(mean_squared_error(y_train, model.predict(X_train))) } for metric_name, metric in metrics_train.items(): scores_list_train[name][metric_name] = metric else: pass scores_list_test[name] = {} metrics_test = { 'Mean Absolute Error': mean_absolute_error(y_test, model.predict(X_test)), 'Mean Absolute Percentage Error': mean_absolute_percentage_error(y_test, model.predict(X_test)), 'R2 Score': r2_score(y_test, model.predict(X_test)), 'Mean Squared Error': mean_squared_error(y_test, model.predict(X_test)), 'Root Mean Squared Error': np.sqrt(mean_squared_error(y_test, model.predict(X_test))) } for metric_name, metric in metrics_test.items(): scores_list_test[name][metric_name] = metric else: raise Exception('Please input a valid DataFrame') if return_train: return (scores_list_test, scores_list_train) else: return (scores_list_test,{})