Octocat This notebook is part of a GitHub repository: https://github.com/pessini/moby-bikes
MIT Licensed
Author: Leandro Pessini

In [2]:
import pandas as pd
from pandas import MultiIndex, Int16Dtype
import numpy as np
import datetime
import sys
import os
import joblib

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Models & Evaluation
from sklearn.model_selection import KFold

# Boost models
import xgboost as xgb
from sklearn import metrics

# Custom objects
sys.path.insert(0, os.path.abspath('../src/'))
import experiment_tracker as et

import time
import warnings
warnings.simplefilter('ignore', FutureWarning)
In [2]:
# ideas_df = pd.read_excel('../documentation/experiment_tracker.xlsx', sheet_name='Ideas')
# experiments_df = pd.read_excel('../documentation/experiment_tracker.xlsx', sheet_name='Experiments')
In [4]:
# creates a new object to keep track of the experiments
experiment_tracker = et.ExperimentTracker()
In [3]:
df_train = pd.read_csv('../data/processed/df_train.csv')
df_test = pd.read_csv('../data/processed/df_test.csv')
In [5]:
df = df_train.copy()
X = df.drop(['count'], axis=1)
y = df.pop('count')
all_columns = list(X.columns)
# X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
X.shape
Out[5]:
(8760, 22)
In [6]:
test_df = df_test.copy()
X_test = test_df.drop(['count'], axis=1)
y_test = test_df.pop('count')
X_test.shape
Out[6]:
(1464, 22)
In [7]:
def get_metrics_to_Experiment(dict_scores = None) -> list:
    if dict_scores is None:
        dict_scores = {}
    rsme = et.Score('RSME', '{:.4f}'.format(dict_scores['train_rsme']), '{:.4f}'.format(dict_scores['val_rsme']))
    mae = et.Score('MAE', '{:.4f}'.format(dict_scores['train_mae']), '{:.4f}'.format(dict_scores['val_mae']))
    return [rsme, mae]
In [8]:
import category_encoders as ce

def preprocessor(predictors: list) -> ColumnTransformer:
    # Setting remainder='passthrough' will mean that all columns not specified in the list of “transformers” 
    #   will be passed through without transformation, instead of being dropped

    ##################### Categorical variables #####################
    all_cat_vars = ['timesofday','dayofweek','holiday','peak','hour','working_day','season','month']
    cat_vars = [categorical_var for categorical_var in all_cat_vars if categorical_var in predictors]

    # categorical variables
    cat_pipe = Pipeline([
        ('encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
    ])

    cat_encoder = 'cat', cat_pipe, cat_vars

    ##################### Numerical variables #####################
    all_num_vars = ['rain', 'temp', 'rhum','wdsp','temp_r']
    num_vars = [numerical_var for numerical_var in all_num_vars if numerical_var in predictors]

    num_pipe = Pipeline([
        ('scaler', StandardScaler())
        # ('scaler', MinMaxScaler())
    ])

    num_enconder =  'num', num_pipe, num_vars

    ##################### Ordinal variables #####################
    all_ord_vars = ['wind_speed_group','rainfall_intensity']
    ord_vars = [ordinal_var for ordinal_var in all_ord_vars if ordinal_var in predictors]

    ordinal_cols_mapping = []
    if 'wind_speed_group' in predictors:
        ordinal_cols_mapping.append(
            {"col":"wind_speed_group",    
            "mapping": {
                'Calm / Light Breeze': 0, 
                'Breeze': 1, 
                'Moderate Breeze': 2, 
                'Strong Breeze / Near Gale': 3, 
                'Gale / Storm': 4
            }}
        )

    if 'rainfall_intensity' in predictors:
        ordinal_cols_mapping.append(
            {"col":"rainfall_intensity",    
            "mapping": {
                'no rain': 0, 
                'drizzle': 1, 
                'light rain': 2, 
                'moderate rain': 3, 
                'heavy rain': 4
            }}
        )

    # ordinal variables
    ord_pipe = Pipeline([
        ('ordinal', ce.OrdinalEncoder(mapping=ordinal_cols_mapping))
    ])

    ord_enconder =  'ordinal', ord_pipe, ord_vars
    
    #################################################################################
    
    orig_vars = [var for var in predictors if var not in cat_vars and var not in num_vars and var not in ord_vars]
    orig_enconder = 'pass_vars', 'passthrough', orig_vars
     # ['temp_bin','rhum_bin']
    # ord_pipe = 'passthrough'

    transformers_list = []
    transformers_list.append(cat_encoder) if cat_vars else None
    transformers_list.append(ord_enconder) if ord_vars else None
    transformers_list.append(num_enconder) if num_vars else None
    # transformers_list.append(orig_enconder) if orig_vars else None
    
    return ColumnTransformer(transformers=transformers_list, 
                             remainder='drop')
In [9]:
def summarize_dict(dictionary, function):
    return {k: function(v) for k,v in dictionary.items()}
In [10]:
def kfold_score(params, predictors, X=X, y=y, n_folds=5, verbose=50, early_stopping_rounds=10):
    
    pipe_xgboost = Pipeline([
        ('preprocessor', preprocessor(predictors)),
        ('model', xgb.XGBRegressor(**params))
    ])

    X = X[[c for c in X.columns if c in predictors]]
    cv = KFold(n_splits=n_folds, shuffle=True, random_state=2022)
    scores = {"train_rsme":[],"val_rsme":[],"train_mae":[],"val_mae":[]}

    for n_fold, (train_index, test_index) in enumerate(cv.split(X, y)):
        print('#'*40, f'Fold {n_fold+1} out of {cv.n_splits}', '#'*40)
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Xy = xgb.DMatrix(X_train, y_train, enable_categorical=True)
        X_test_transformed = pipe_xgboost['preprocessor'].fit_transform(X_test)
        pipe_xgboost.fit(X_train, y_train,
                         model__eval_set=[(X_test_transformed, y_test)], 
                         model__early_stopping_rounds=early_stopping_rounds,
                         model__verbose=verbose)
        # pipe_xgboost.fit(X_train, y_train)
        # print(pipe_xgboost['model'].evals_result())

        # Predict on training and validation set
        y_pred_train = pipe_xgboost.predict(X_train)
        y_pred_val = pipe_xgboost.predict(X_test)

        # Calculate the RSME and MAE
        # If squared = True returns MSE value, if False returns RMSE value.
        scores['train_rsme'].append(metrics.mean_squared_error(y_train, y_pred_train, squared=False))
        scores['val_rsme'].append(metrics.mean_squared_error(y_test, y_pred_val, squared=False))
        scores['train_mae'].append(metrics.mean_absolute_error(y_train, y_pred_train))
        scores['val_mae'].append(metrics.mean_absolute_error(y_test, y_pred_val))

        print(f"Fold {n_fold+1} - best iteration: {pipe_xgboost['model'].get_booster().best_iteration}\n")

    return summarize_dict(scores, np.mean), pipe_xgboost
In [11]:
#predictors = ['temp','rhum','dayofweek', 'holiday','timesofday','wdsp','rainfall_intensity','peak','working_day', 'hour', 'season']

Hyperparameters tuning

In [11]:
# Baseline model
predictors = ['temp','rhum','dayofweek', 'holiday','timesofday','wdsp','rainfall_intensity','peak','working_day', 'hour', 'season']

params_xgboost = {'max_depth':3,
                  'n_estimators': 500,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, predictors, n_folds=3)
exp_xgboost = et.Experiment('XGBoost (Baseline)', predictors=predictors, hyperparameters=xgb_model['model'].get_params(),
                               score=get_metrics_to_Experiment(dict_scores), notes='Baseline XGBoost')
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 3 ########################################
[0]	validation_0-rmse:4.21537
[50]	validation_0-rmse:2.74378
[59]	validation_0-rmse:2.74717
Fold 1 - best iteration: 50

######################################## Fold 2 out of 3 ########################################
[0]	validation_0-rmse:3.78092
[50]	validation_0-rmse:2.50728
[52]	validation_0-rmse:2.51101
Fold 2 - best iteration: 43

######################################## Fold 3 out of 3 ########################################
[0]	validation_0-rmse:3.96064
[50]	validation_0-rmse:2.65082
[60]	validation_0-rmse:2.65222
Fold 3 - best iteration: 50

--- New Experiment added! ---
ID#: 5375691408 
Algorithm: XGBoost (Baseline) 
Predictors: ['temp', 'rhum', 'dayofweek', 'holiday', 'timesofday', 'wdsp', 'rainfall_intensity', 'peak', 'working_day', 'hour', 'season']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 1, 'enable_categorical': False, 'gamma': 0, 'gpu_id': -1, 'importance_type': None, 'interaction_constraints': '', 'learning_rate': 0.300000012, 'max_delta_step': 0, 'max_depth': 3, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 500, 'n_jobs': 4, 'num_parallel_tree': 1, 'predictor': 'auto', 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 1, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'seed': 42, 'eval_metric': 'rmse'}
Date: 09/07/2022 19:07:30
Metric: [{ 'metric': RSME, 'train': 2.4729,  'validation': 2.6320, 'test': None }, { 'metric': MAE, 'train': 1.8143,  'validation': 1.9269, 'test': None }]
Notes: Baseline XGBoost
In [13]:
predictors = ['temp','rhum','dayofweek', 'holiday','timesofday','wdsp','rainfall_intensity','peak','working_day', 'hour', 'season']

params_xgboost = {'max_depth':3,
                  'eta': 0.2,
                  'n_estimators': 500,
                  'subsample': 1,
                  'colsample_bytree': 0.5,
                  'gamma': 1,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, predictors, n_folds=3)
exp_xgboost = et.Experiment('XGBoost 1', predictors=predictors, hyperparameters=xgb_model['model'].get_params(),
                               score=get_metrics_to_Experiment(dict_scores), notes='')
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 3 ########################################
[0]	validation_0-rmse:4.51870
[50]	validation_0-rmse:2.74515
[80]	validation_0-rmse:2.73048
Fold 1 - best iteration: 71

######################################## Fold 2 out of 3 ########################################
[0]	validation_0-rmse:4.08433
[50]	validation_0-rmse:2.50050
[87]	validation_0-rmse:2.48756
Fold 2 - best iteration: 78

######################################## Fold 3 out of 3 ########################################
[0]	validation_0-rmse:4.26917
[50]	validation_0-rmse:2.66483
[64]	validation_0-rmse:2.66475
Fold 3 - best iteration: 54

--- New Experiment added! ---
ID#: 6510649104 
Algorithm: XGBoost 1 
Predictors: ['temp', 'rhum', 'dayofweek', 'holiday', 'timesofday', 'wdsp', 'rainfall_intensity', 'peak', 'working_day', 'hour', 'season']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.5, 'gamma': 1, 'gpu_id': -1, 'importance_type': 'gain', 'interaction_constraints': '', 'learning_rate': 0.200000003, 'max_delta_step': 0, 'max_depth': 3, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 500, 'n_jobs': 12, 'num_parallel_tree': 1, 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 1, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'eta': 0.2, 'seed': 42, 'eval_metric': 'rmse'}
Date: 05/07/2022 21:16:22
Metric: [{ 'metric': RSME, 'train': 2.4930,  'validation': 2.6233, 'test': None }, { 'metric': MAE, 'train': 1.8292,  'validation': 1.9204, 'test': None }]

In [14]:
predictors = ['temp','rhum','dayofweek', 'holiday','timesofday','wdsp','rainfall_intensity','peak','working_day', 'hour', 'season']

params_xgboost = {'max_depth':9,
                  'eta': 0.01,
                  'n_estimators': 1000,
                  'subsample': 0.7,
                  'colsample_bytree': 0.5,
                  'gamma': 1,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, predictors, n_folds=5, verbose=250, early_stopping_rounds=30)
exp_xgboost = et.Experiment('XGBoost (max_depth, eta, subsampel and estimators)', predictors=predictors, hyperparameters=xgb_model['model'].get_params(), 
                            score=get_metrics_to_Experiment(dict_scores))
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 5 ########################################
[0]	validation_0-rmse:4.98932
[250]	validation_0-rmse:2.74287
[464]	validation_0-rmse:2.69763
Fold 1 - best iteration: 434

######################################## Fold 2 out of 5 ########################################
[0]	validation_0-rmse:4.92424
[250]	validation_0-rmse:2.67194
[500]	validation_0-rmse:2.62396
[554]	validation_0-rmse:2.62388
Fold 2 - best iteration: 525

######################################## Fold 3 out of 5 ########################################
[0]	validation_0-rmse:4.69002
[250]	validation_0-rmse:2.47367
[486]	validation_0-rmse:2.43117
Fold 3 - best iteration: 456

######################################## Fold 4 out of 5 ########################################
[0]	validation_0-rmse:4.71785
[250]	validation_0-rmse:2.55160
[459]	validation_0-rmse:2.51858
Fold 4 - best iteration: 430

######################################## Fold 5 out of 5 ########################################
[0]	validation_0-rmse:4.85740
[250]	validation_0-rmse:2.70376
[474]	validation_0-rmse:2.64725
Fold 5 - best iteration: 444

--- New Experiment added! ---
ID#: 6511250544 
Algorithm: XGBoost (max_depth, eta, subsampel and estimators) 
Predictors: ['temp', 'rhum', 'dayofweek', 'holiday', 'timesofday', 'wdsp', 'rainfall_intensity', 'peak', 'working_day', 'hour', 'season']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.5, 'gamma': 1, 'gpu_id': -1, 'importance_type': 'gain', 'interaction_constraints': '', 'learning_rate': 0.00999999978, 'max_delta_step': 0, 'max_depth': 9, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 1000, 'n_jobs': 12, 'num_parallel_tree': 1, 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.7, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'eta': 0.01, 'seed': 42, 'eval_metric': 'rmse'}
Date: 05/07/2022 21:16:41
Metric: [{ 'metric': RSME, 'train': 1.8872,  'validation': 2.5822, 'test': None }, { 'metric': MAE, 'train': 1.3823,  'validation': 1.8758, 'test': None }]

In [15]:
experiment_tracker.print_partial_results(filter_metric='rsme')
--- Experiments ---

Model: XGBoost (Baseline)
RSME - Train: 2.4729 - Validation: 2.6320 - Test: None

Model: XGBoost 1
RSME - Train: 2.4930 - Validation: 2.6233 - Test: None

Model: XGBoost (max_depth, eta, subsampel and estimators)
RSME - Train: 1.8872 - Validation: 2.5822 - Test: None
In [16]:
predictors = ['temp','rhum','dayofweek', 'holiday','timesofday','wdsp','rainfall_intensity','peak','working_day', 'hour', 'season']

params_xgboost = {'max_depth':9,
                  'eta': 0.01,
                  'n_estimators': 1000,
                  'subsample': 0.7,
                  'colsample_bytree': 0.5,
                  'gamma': 1.5,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, predictors, n_folds=5, verbose=250, early_stopping_rounds=30)
exp_xgboost = et.Experiment('XGBoost (gamma: 1.5)', predictors=predictors, hyperparameters=xgb_model['model'].get_params(), 
                            score=get_metrics_to_Experiment(dict_scores))
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 5 ########################################
[0]	validation_0-rmse:4.98932
[250]	validation_0-rmse:2.74273
[464]	validation_0-rmse:2.69694
Fold 1 - best iteration: 435

######################################## Fold 2 out of 5 ########################################
[0]	validation_0-rmse:4.92424
[250]	validation_0-rmse:2.67145
[500]	validation_0-rmse:2.62315
[562]	validation_0-rmse:2.62348
Fold 2 - best iteration: 533

######################################## Fold 3 out of 5 ########################################
[0]	validation_0-rmse:4.69001
[250]	validation_0-rmse:2.47430
[500]	validation_0-rmse:2.43228
Fold 3 - best iteration: 470

######################################## Fold 4 out of 5 ########################################
[0]	validation_0-rmse:4.71784
[250]	validation_0-rmse:2.54976
[456]	validation_0-rmse:2.51627
Fold 4 - best iteration: 426

######################################## Fold 5 out of 5 ########################################
[0]	validation_0-rmse:4.85738
[250]	validation_0-rmse:2.70294
[477]	validation_0-rmse:2.64475
Fold 5 - best iteration: 448

--- New Experiment added! ---
ID#: 6511302688 
Algorithm: XGBoost (gamma: 1.5) 
Predictors: ['temp', 'rhum', 'dayofweek', 'holiday', 'timesofday', 'wdsp', 'rainfall_intensity', 'peak', 'working_day', 'hour', 'season']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.5, 'gamma': 1.5, 'gpu_id': -1, 'importance_type': 'gain', 'interaction_constraints': '', 'learning_rate': 0.00999999978, 'max_delta_step': 0, 'max_depth': 9, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 1000, 'n_jobs': 12, 'num_parallel_tree': 1, 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.7, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'eta': 0.01, 'seed': 42, 'eval_metric': 'rmse'}
Date: 05/07/2022 21:17:01
Metric: [{ 'metric': RSME, 'train': 1.8869,  'validation': 2.5816, 'test': None }, { 'metric': MAE, 'train': 1.3840,  'validation': 1.8753, 'test': None }]

In [17]:
experiment_tracker.print_partial_results(filter_metric='rsme')
--- Experiments ---

Model: XGBoost (Baseline)
RSME - Train: 2.4729 - Validation: 2.6320 - Test: None

Model: XGBoost 1
RSME - Train: 2.4930 - Validation: 2.6233 - Test: None

Model: XGBoost (max_depth, eta, subsampel and estimators)
RSME - Train: 1.8872 - Validation: 2.5822 - Test: None

Model: XGBoost (gamma: 1.5)
RSME - Train: 1.8869 - Validation: 2.5816 - Test: None
In [18]:
predictors = ['temp','rhum','dayofweek', 'holiday','timesofday','wdsp','rainfall_intensity','peak','working_day', 'hour', 'season']

params_xgboost = {'max_depth':7,
                  'eta': 0.01,
                  'n_estimators': 1000,
                  'subsample': 0.7,
                  'colsample_bytree': 0.5,
                  'gamma': 1.5,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, predictors, n_folds=5, verbose=250, early_stopping_rounds=30)
exp_xgboost = et.Experiment('XGBoost (max_depth: 7)', predictors=predictors, hyperparameters=xgb_model['model'].get_params(), 
                            score=get_metrics_to_Experiment(dict_scores))
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 5 ########################################
[0]	validation_0-rmse:4.98938
[250]	validation_0-rmse:2.74379
[500]	validation_0-rmse:2.68098
[567]	validation_0-rmse:2.68054
Fold 1 - best iteration: 538

######################################## Fold 2 out of 5 ########################################
[0]	validation_0-rmse:4.92448
[250]	validation_0-rmse:2.68077
[500]	validation_0-rmse:2.62479
[634]	validation_0-rmse:2.62210
Fold 2 - best iteration: 605

######################################## Fold 3 out of 5 ########################################
[0]	validation_0-rmse:4.69044
[250]	validation_0-rmse:2.48931
[500]	validation_0-rmse:2.43671
[583]	validation_0-rmse:2.43432
Fold 3 - best iteration: 554

######################################## Fold 4 out of 5 ########################################
[0]	validation_0-rmse:4.71821
[250]	validation_0-rmse:2.56305
[464]	validation_0-rmse:2.51914
Fold 4 - best iteration: 435

######################################## Fold 5 out of 5 ########################################
[0]	validation_0-rmse:4.85732
[250]	validation_0-rmse:2.70761
[500]	validation_0-rmse:2.64460
[567]	validation_0-rmse:2.64252
Fold 5 - best iteration: 537

--- New Experiment added! ---
ID#: 6511055104 
Algorithm: XGBoost (max_depth: 7) 
Predictors: ['temp', 'rhum', 'dayofweek', 'holiday', 'timesofday', 'wdsp', 'rainfall_intensity', 'peak', 'working_day', 'hour', 'season']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.5, 'gamma': 1.5, 'gpu_id': -1, 'importance_type': 'gain', 'interaction_constraints': '', 'learning_rate': 0.00999999978, 'max_delta_step': 0, 'max_depth': 7, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 1000, 'n_jobs': 12, 'num_parallel_tree': 1, 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.7, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'eta': 0.01, 'seed': 42, 'eval_metric': 'rmse'}
Date: 05/07/2022 21:17:18
Metric: [{ 'metric': RSME, 'train': 2.1507,  'validation': 2.5762, 'test': None }, { 'metric': MAE, 'train': 1.5812,  'validation': 1.8766, 'test': None }]

In [12]:
predictors = ['temp','rhum','dayofweek','timesofday','wdsp','rainfall_intensity', 'working_day', 'hour', 'season']

params_xgboost = {'max_depth':7,
                  'eta': 0.01,
                  'n_estimators': 1000,
                  'subsample': 0.7,
                  'colsample_bytree': 0.5,
                  'gamma': 1.5,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, predictors, n_folds=5, verbose=250, early_stopping_rounds=30)
exp_xgboost = et.Experiment('XGBoost (- holiday and - peak features)', predictors=predictors, hyperparameters=xgb_model['model'].get_params(), 
                            score=get_metrics_to_Experiment(dict_scores))
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 5 ########################################
[0]	validation_0-rmse:4.99104
[250]	validation_0-rmse:2.75939
[500]	validation_0-rmse:2.68531
[660]	validation_0-rmse:2.68095
Fold 1 - best iteration: 631

######################################## Fold 2 out of 5 ########################################
[0]	validation_0-rmse:4.92611
[250]	validation_0-rmse:2.69384
[500]	validation_0-rmse:2.62939
[637]	validation_0-rmse:2.62626
Fold 2 - best iteration: 607

######################################## Fold 3 out of 5 ########################################
[0]	validation_0-rmse:4.69171
[250]	validation_0-rmse:2.50074
[500]	validation_0-rmse:2.44295
[587]	validation_0-rmse:2.43860
Fold 3 - best iteration: 557

######################################## Fold 4 out of 5 ########################################
[0]	validation_0-rmse:4.71944
[250]	validation_0-rmse:2.56594
[500]	validation_0-rmse:2.51415
[534]	validation_0-rmse:2.51421
Fold 4 - best iteration: 505

######################################## Fold 5 out of 5 ########################################
[0]	validation_0-rmse:4.85890
[250]	validation_0-rmse:2.71289
[500]	validation_0-rmse:2.64412
[672]	validation_0-rmse:2.64067
Fold 5 - best iteration: 642

--- New Experiment added! ---
ID#: 5375688864 
Algorithm: XGBoost (- holiday and - peak features) 
Predictors: ['temp', 'rhum', 'dayofweek', 'timesofday', 'wdsp', 'rainfall_intensity', 'working_day', 'hour', 'season']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.5, 'enable_categorical': False, 'gamma': 1.5, 'gpu_id': -1, 'importance_type': None, 'interaction_constraints': '', 'learning_rate': 0.00999999978, 'max_delta_step': 0, 'max_depth': 7, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 1000, 'n_jobs': 4, 'num_parallel_tree': 1, 'predictor': 'auto', 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.7, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'eta': 0.01, 'seed': 42, 'eval_metric': 'rmse'}
Date: 09/07/2022 19:08:41
Metric: [{ 'metric': RSME, 'train': 2.1291,  'validation': 2.5783, 'test': None }, { 'metric': MAE, 'train': 1.5662,  'validation': 1.8773, 'test': None }]

Saving model

In [13]:
import pickle
pickle.dump(xgb_model, open('../models/xgb_pipeline.pkl', 'wb'))
pickle.dump(xgb_model['model'], open('../models/xgboost.pkl', 'wb'))
In [14]:
xgb_model['model'].save_model('../models/XGBoost.json')
xgb_model['model'].save_model('../models/XGBoost.model')
In [30]:
predictors = ['temp','rhum','dayofweek','timesofday','wdsp','rainfall_intensity', 'hour', 'season']

params_xgboost = {'max_depth':7,
                  'eta': 0.01,
                  'n_estimators': 1000,
                  'subsample': 0.7,
                  'colsample_bytree': 0.5,
                  'gamma': 1.5,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, predictors, n_folds=5, verbose=250, early_stopping_rounds=30)
exp_xgboost = et.Experiment('XGBoost (- working_day feat)', predictors=predictors, hyperparameters=xgb_model['model'].get_params(), 
                            score=get_metrics_to_Experiment(dict_scores))
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 5 ########################################
[0]	validation_0-rmse:4.99293
[250]	validation_0-rmse:2.76406
[500]	validation_0-rmse:2.69522
[609]	validation_0-rmse:2.69265
Fold 1 - best iteration: 580

######################################## Fold 2 out of 5 ########################################
[0]	validation_0-rmse:4.92741
[250]	validation_0-rmse:2.70217
[500]	validation_0-rmse:2.63041
[529]	validation_0-rmse:2.63042
Fold 2 - best iteration: 499

######################################## Fold 3 out of 5 ########################################
[0]	validation_0-rmse:4.69318
[250]	validation_0-rmse:2.51337
[500]	validation_0-rmse:2.45737
[584]	validation_0-rmse:2.45454
Fold 3 - best iteration: 555

######################################## Fold 4 out of 5 ########################################
[0]	validation_0-rmse:4.72042
[250]	validation_0-rmse:2.58852
[500]	validation_0-rmse:2.54086
[523]	validation_0-rmse:2.54173
Fold 4 - best iteration: 493

######################################## Fold 5 out of 5 ########################################
[0]	validation_0-rmse:4.85950
[250]	validation_0-rmse:2.73400
[500]	validation_0-rmse:2.66597
[674]	validation_0-rmse:2.66306
Fold 5 - best iteration: 644

--- New Experiment added! ---
ID#: 6512059056 
Algorithm: XGBoost (- working_day feat) 
Predictors: ['temp', 'rhum', 'dayofweek', 'timesofday', 'wdsp', 'rainfall_intensity', 'hour', 'season']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.5, 'gamma': 1.5, 'gpu_id': -1, 'importance_type': 'gain', 'interaction_constraints': '', 'learning_rate': 0.00999999978, 'max_delta_step': 0, 'max_depth': 7, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 1000, 'n_jobs': 12, 'num_parallel_tree': 1, 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.7, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'eta': 0.01, 'seed': 42, 'eval_metric': 'rmse'}
Date: 05/07/2022 21:22:01
Metric: [{ 'metric': RSME, 'train': 2.1735,  'validation': 2.5935, 'test': None }, { 'metric': MAE, 'train': 1.5976,  'validation': 1.8900, 'test': None }]

In [31]:
predictors = ['temp','rhum','dayofweek','timesofday','wdsp','rainfall_intensity', 'hour', 'working_day']

params_xgboost = {'max_depth':7,
                  'eta': 0.01,
                  'n_estimators': 1000,
                  'subsample': 0.7,
                  'colsample_bytree': 0.5,
                  'gamma': 1.5,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, predictors, n_folds=5, verbose=250, early_stopping_rounds=30)
exp_xgboost = et.Experiment('XGBoost (- season feat)', predictors=predictors, hyperparameters=xgb_model['model'].get_params(), 
                            score=get_metrics_to_Experiment(dict_scores))
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 5 ########################################
[0]	validation_0-rmse:4.99089
[250]	validation_0-rmse:2.79409
[500]	validation_0-rmse:2.73584
[540]	validation_0-rmse:2.73672
Fold 1 - best iteration: 511

######################################## Fold 2 out of 5 ########################################
[0]	validation_0-rmse:4.92489
[250]	validation_0-rmse:2.71905
[500]	validation_0-rmse:2.67074
[512]	validation_0-rmse:2.67069
Fold 2 - best iteration: 482

######################################## Fold 3 out of 5 ########################################
[0]	validation_0-rmse:4.69081
[250]	validation_0-rmse:2.54104
[496]	validation_0-rmse:2.49853
Fold 3 - best iteration: 466

######################################## Fold 4 out of 5 ########################################
[0]	validation_0-rmse:4.71852
[250]	validation_0-rmse:2.59544
[500]	validation_0-rmse:2.55696
[515]	validation_0-rmse:2.55722
Fold 4 - best iteration: 486

######################################## Fold 5 out of 5 ########################################
[0]	validation_0-rmse:4.85769
[250]	validation_0-rmse:2.74029
[500]	validation_0-rmse:2.69212
[512]	validation_0-rmse:2.69169
Fold 5 - best iteration: 482

--- New Experiment added! ---
ID#: 6511058176 
Algorithm: XGBoost (- season feat) 
Predictors: ['temp', 'rhum', 'dayofweek', 'timesofday', 'wdsp', 'rainfall_intensity', 'hour', 'working_day']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.5, 'gamma': 1.5, 'gpu_id': -1, 'importance_type': 'gain', 'interaction_constraints': '', 'learning_rate': 0.00999999978, 'max_delta_step': 0, 'max_depth': 7, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 1000, 'n_jobs': 12, 'num_parallel_tree': 1, 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.7, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'eta': 0.01, 'seed': 42, 'eval_metric': 'rmse'}
Date: 05/07/2022 21:23:22
Metric: [{ 'metric': RSME, 'train': 2.2543,  'validation': 2.6297, 'test': None }, { 'metric': MAE, 'train': 1.6586,  'validation': 1.9207, 'test': None }]

In [32]:
experiment_tracker.print_partial_results(filter_metric='rsme')
--- Experiments ---

Model: XGBoost (Baseline)
RSME - Train: 2.4729 - Validation: 2.6320 - Test: None

Model: XGBoost 1
RSME - Train: 2.4930 - Validation: 2.6233 - Test: None

Model: XGBoost (max_depth, eta, subsampel and estimators)
RSME - Train: 1.8872 - Validation: 2.5822 - Test: None

Model: XGBoost (gamma: 1.5)
RSME - Train: 1.8869 - Validation: 2.5816 - Test: None

Model: XGBoost (max_depth: 7)
RSME - Train: 2.1507 - Validation: 2.5762 - Test: None

Model: XGBoost (- holiday and - peak features)
RSME - Train: 2.1291 - Validation: 2.5783 - Test: None

Model: XGBoost (- working_day feat)
RSME - Train: 2.1735 - Validation: 2.5935 - Test: None

Model: XGBoost (- season feat)
RSME - Train: 2.2543 - Validation: 2.6297 - Test: None
In [33]:
predictors = ['temp','rhum','dayofweek', 'timesofday','wdsp','rainfall_intensity','peak','working_day', 'hour', 'season']

params_xgboost = {'max_depth':11,
                  'eta': 0.001,
                  'n_estimators': 5000,
                  'subsample': 0.7,
                  'colsample_bytree': 0.5,
                  'gamma': 1.5,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, predictors, n_folds=5, verbose=1000, early_stopping_rounds=30)
exp_xgboost = et.Experiment('XGBoost (eta: 0.001)', predictors=predictors, hyperparameters=xgb_model['model'].get_params(), 
                            score=get_metrics_to_Experiment(dict_scores))
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 5 ########################################
[0]	validation_0-rmse:5.01762
[1000]	validation_0-rmse:3.24272
[2000]	validation_0-rmse:2.80975
[3000]	validation_0-rmse:2.71778
[3916]	validation_0-rmse:2.70260
Fold 1 - best iteration: 3887

######################################## Fold 2 out of 5 ########################################
[0]	validation_0-rmse:4.95253
[1000]	validation_0-rmse:3.16989
[2000]	validation_0-rmse:2.73354
[3000]	validation_0-rmse:2.63878
[4000]	validation_0-rmse:2.61979
[4219]	validation_0-rmse:2.61880
Fold 2 - best iteration: 4190

######################################## Fold 3 out of 5 ########################################
[0]	validation_0-rmse:4.71785
[1000]	validation_0-rmse:2.94016
[2000]	validation_0-rmse:2.52714
[3000]	validation_0-rmse:2.44703
[4000]	validation_0-rmse:2.43498
[4018]	validation_0-rmse:2.43495
Fold 3 - best iteration: 3989

######################################## Fold 4 out of 5 ########################################
[0]	validation_0-rmse:4.74554
[1000]	validation_0-rmse:3.00042
[2000]	validation_0-rmse:2.60350
[3000]	validation_0-rmse:2.52986
[3833]	validation_0-rmse:2.52086
Fold 4 - best iteration: 3804

######################################## Fold 5 out of 5 ########################################
[0]	validation_0-rmse:4.88409
[1000]	validation_0-rmse:3.17539
[2000]	validation_0-rmse:2.77035
[3000]	validation_0-rmse:2.68065
[4000]	validation_0-rmse:2.66142
[4397]	validation_0-rmse:2.66010
Fold 5 - best iteration: 4367

--- New Experiment added! ---
ID#: 6504363392 
Algorithm: XGBoost (eta: 0.001) 
Predictors: ['temp', 'rhum', 'dayofweek', 'timesofday', 'wdsp', 'rainfall_intensity', 'peak', 'working_day', 'hour', 'season']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.5, 'gamma': 1.5, 'gpu_id': -1, 'importance_type': 'gain', 'interaction_constraints': '', 'learning_rate': 0.00100000005, 'max_delta_step': 0, 'max_depth': 11, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 5000, 'n_jobs': 12, 'num_parallel_tree': 1, 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.7, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'eta': 0.001, 'seed': 42, 'eval_metric': 'rmse'}
Date: 05/07/2022 21:28:09
Metric: [{ 'metric': RSME, 'train': 1.6609,  'validation': 2.5881, 'test': None }, { 'metric': MAE, 'train': 1.2083,  'validation': 1.8759, 'test': None }]

In [34]:
predictors = ['temp','rhum','dayofweek', 'timesofday','wdsp','rainfall_intensity','peak','working_day', 'hour', 'season']

params_xgboost = {'max_depth':9,
                  'eta': 0.001,
                  'n_estimators': 5000,
                  'subsample': 0.8,
                  'colsample_bytree': 0.5,
                  'gamma': 1.5,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, predictors, n_folds=5, verbose=1000, early_stopping_rounds=30)
exp_xgboost = et.Experiment('XGBoost (max_depth: 9 and subsample: 0.8)', predictors=predictors, hyperparameters=xgb_model['model'].get_params(), 
                            score=get_metrics_to_Experiment(dict_scores))
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 5 ########################################
[0]	validation_0-rmse:5.01765
[1000]	validation_0-rmse:3.23445
[2000]	validation_0-rmse:2.80315
[3000]	validation_0-rmse:2.71097
[4000]	validation_0-rmse:2.69217
[4234]	validation_0-rmse:2.69101
Fold 1 - best iteration: 4205

######################################## Fold 2 out of 5 ########################################
[0]	validation_0-rmse:4.95249
[1000]	validation_0-rmse:3.16117
[2000]	validation_0-rmse:2.72962
[3000]	validation_0-rmse:2.63752
[4000]	validation_0-rmse:2.61836
[4682]	validation_0-rmse:2.61556
Fold 2 - best iteration: 4652

######################################## Fold 3 out of 5 ########################################
[0]	validation_0-rmse:4.71779
[1000]	validation_0-rmse:2.93525
[2000]	validation_0-rmse:2.52687
[3000]	validation_0-rmse:2.44774
[4000]	validation_0-rmse:2.43207
[4270]	validation_0-rmse:2.43126
Fold 3 - best iteration: 4240

######################################## Fold 4 out of 5 ########################################
[0]	validation_0-rmse:4.74551
[1000]	validation_0-rmse:2.99345
[2000]	validation_0-rmse:2.60131
[3000]	validation_0-rmse:2.52880
[3919]	validation_0-rmse:2.51755
Fold 4 - best iteration: 3890

######################################## Fold 5 out of 5 ########################################
[0]	validation_0-rmse:4.88406
[1000]	validation_0-rmse:3.16710
[2000]	validation_0-rmse:2.76323
[3000]	validation_0-rmse:2.67176
[4000]	validation_0-rmse:2.64894
[4640]	validation_0-rmse:2.64535
Fold 5 - best iteration: 4610

--- New Experiment added! ---
ID#: 6511302736 
Algorithm: XGBoost (max_depth: 9 and subsample: 0.8) 
Predictors: ['temp', 'rhum', 'dayofweek', 'timesofday', 'wdsp', 'rainfall_intensity', 'peak', 'working_day', 'hour', 'season']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.5, 'gamma': 1.5, 'gpu_id': -1, 'importance_type': 'gain', 'interaction_constraints': '', 'learning_rate': 0.00100000005, 'max_delta_step': 0, 'max_depth': 9, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 5000, 'n_jobs': 12, 'num_parallel_tree': 1, 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.8, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'eta': 0.001, 'seed': 42, 'eval_metric': 'rmse'}
Date: 05/07/2022 21:31:51
Metric: [{ 'metric': RSME, 'train': 1.8909,  'validation': 2.5797, 'test': None }, { 'metric': MAE, 'train': 1.3838,  'validation': 1.8731, 'test': None }]

In [35]:
experiment_tracker.print_partial_results()
--- Experiments ---

Model: XGBoost (Baseline)
RSME - Train: 2.4729 - Validation: 2.6320 - Test: None
MAE - Train: 1.8143 - Validation: 1.9269 - Test: None

Model: XGBoost 1
RSME - Train: 2.4930 - Validation: 2.6233 - Test: None
MAE - Train: 1.8292 - Validation: 1.9204 - Test: None

Model: XGBoost (max_depth, eta, subsampel and estimators)
RSME - Train: 1.8872 - Validation: 2.5822 - Test: None
MAE - Train: 1.3823 - Validation: 1.8758 - Test: None

Model: XGBoost (gamma: 1.5)
RSME - Train: 1.8869 - Validation: 2.5816 - Test: None
MAE - Train: 1.3840 - Validation: 1.8753 - Test: None

Model: XGBoost (max_depth: 7)
RSME - Train: 2.1507 - Validation: 2.5762 - Test: None
MAE - Train: 1.5812 - Validation: 1.8766 - Test: None

Model: XGBoost (- holiday and - peak features)
RSME - Train: 2.1291 - Validation: 2.5783 - Test: None
MAE - Train: 1.5662 - Validation: 1.8773 - Test: None

Model: XGBoost (- working_day feat)
RSME - Train: 2.1735 - Validation: 2.5935 - Test: None
MAE - Train: 1.5976 - Validation: 1.8900 - Test: None

Model: XGBoost (- season feat)
RSME - Train: 2.2543 - Validation: 2.6297 - Test: None
MAE - Train: 1.6586 - Validation: 1.9207 - Test: None

Model: XGBoost (eta: 0.001)
RSME - Train: 1.6609 - Validation: 2.5881 - Test: None
MAE - Train: 1.2083 - Validation: 1.8759 - Test: None

Model: XGBoost (max_depth: 9 and subsample: 0.8)
RSME - Train: 1.8909 - Validation: 2.5797 - Test: None
MAE - Train: 1.3838 - Validation: 1.8731 - Test: None

Saving Model

In [36]:
pickle.dump(xgb_model, open('../models/xgboost_complex_pipe.pkl', 'wb'))
pickle.dump(xgb_model['model'], open('../models/xgboost_complex.pkl', 'wb'))
xgb_model['model'].save_model('../models/xgboost_complex.json')
xgb_model['model'].save_model('../models/xgboost_complex.model')
In [37]:
def normalized_rsme(value, dataset) -> float:
    return value / max(dataset['count']) - min(dataset['count'])
In [38]:
# Normalized RSME - Root Mean Square Error
normtrain_rsme = normalized_rsme(dict_scores['train_rsme'], df_train)
normtrain_rsme
Out[38]:
0.07272703638284966

Only positives count

In [39]:
df_positives = df_train[df_train['count'] > 0].copy()
X_pos = df_positives.drop(['count'], axis=1)
y_pos = df_positives.pop('count')
X.shape, X_pos.shape
Out[39]:
((8760, 22), (6980, 22))
In [40]:
predictors = ['temp','rhum','dayofweek', 'holiday','timesofday','wdsp','rainfall_intensity','peak','working_day', 'hour', 'season']

params_xgboost = {'max_depth':9,
                  'eta': 0.001,
                  'n_estimators': 5000,
                  'subsample': 0.8,
                  'colsample_bytree': 0.5,
                  'gamma': 1.5,
                   'seed': 42,
                   'eval_metric': 'rmse'
                   }

dict_scores, xgb_model = kfold_score(params_xgboost, X=X_pos, y=y_pos, predictors=predictors, n_folds=5, verbose=1000, early_stopping_rounds=30)
exp_xgboost = et.Experiment('XGBoost (only + count values)', predictors=predictors, hyperparameters=xgb_model['model'].get_params(), 
                            score=get_metrics_to_Experiment(dict_scores), notes='Checking possibility of using hurdle model')
experiment_tracker.add_experiment(exp_xgboost)
######################################## Fold 1 out of 5 ########################################
[0]	validation_0-rmse:5.30208
[1000]	validation_0-rmse:3.17864
[2000]	validation_0-rmse:2.72787
[3000]	validation_0-rmse:2.65570
[3743]	validation_0-rmse:2.64687
Fold 1 - best iteration: 3713

######################################## Fold 2 out of 5 ########################################
[0]	validation_0-rmse:5.47349
[1000]	validation_0-rmse:3.36752
[2000]	validation_0-rmse:2.89018
[3000]	validation_0-rmse:2.79569
[4000]	validation_0-rmse:2.77658
[4461]	validation_0-rmse:2.77365
Fold 2 - best iteration: 4431

######################################## Fold 3 out of 5 ########################################
[0]	validation_0-rmse:5.36302
[1000]	validation_0-rmse:3.24548
[2000]	validation_0-rmse:2.77772
[3000]	validation_0-rmse:2.69382
[3825]	validation_0-rmse:2.68446
Fold 3 - best iteration: 3795

######################################## Fold 4 out of 5 ########################################
[0]	validation_0-rmse:5.52297
[1000]	validation_0-rmse:3.39055
[2000]	validation_0-rmse:2.89755
[3000]	validation_0-rmse:2.80029
[4000]	validation_0-rmse:2.78380
[4170]	validation_0-rmse:2.78308
Fold 4 - best iteration: 4140

######################################## Fold 5 out of 5 ########################################
[0]	validation_0-rmse:5.55485
[1000]	validation_0-rmse:3.40061
[2000]	validation_0-rmse:2.86673
[3000]	validation_0-rmse:2.73784
[4000]	validation_0-rmse:2.70130
[4999]	validation_0-rmse:2.68881
Fold 5 - best iteration: 4998

--- New Experiment added! ---
ID#: 6511054912 
Algorithm: XGBoost (only + count values) 
Predictors: ['temp', 'rhum', 'dayofweek', 'holiday', 'timesofday', 'wdsp', 'rainfall_intensity', 'peak', 'working_day', 'hour', 'season']
Hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 0.5, 'gamma': 1.5, 'gpu_id': -1, 'importance_type': 'gain', 'interaction_constraints': '', 'learning_rate': 0.00100000005, 'max_delta_step': 0, 'max_depth': 9, 'min_child_weight': 1, 'missing': nan, 'monotone_constraints': '()', 'n_estimators': 5000, 'n_jobs': 12, 'num_parallel_tree': 1, 'random_state': 42, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.8, 'tree_method': 'exact', 'validate_parameters': 1, 'verbosity': None, 'eta': 0.001, 'seed': 42, 'eval_metric': 'rmse'}
Date: 05/07/2022 21:35:28
Metric: [{ 'metric': RSME, 'train': 1.9839,  'validation': 2.7153, 'test': None }, { 'metric': MAE, 'train': 1.4920,  'validation': 2.0364, 'test': None }]
Notes: Checking possibility of using hurdle model
In [41]:
experiment_tracker.to_excel('../documentation/experiment_tracker_xgboost.xlsx')
In [42]:
%reload_ext watermark
%watermark -a "Leandro Pessini" -n -u -v -iv -w
Author: Leandro Pessini

Last updated: Tue Jul 05 2022

Python implementation: CPython
Python version       : 3.9.6
IPython version      : 7.25.0

numpy            : 1.21.1
xgboost          : 1.4.0
joblib           : 1.0.1
pandas           : 1.3.0
sklearn          : 1.0.2
category_encoders: 2.4.0
sys              : 3.9.6 | packaged by conda-forge | (default, Jul 11 2021, 03:36:15) 
[Clang 11.1.0 ]

Watermark: 2.3.0

GitHub Mark GitHub repository
Author: Leandro Pessini