How to fit & predict in Cat-boost Algorithm

princit princit at gmail.com
Wed Mar 18 04:59:19 EDT 2020


I am new in python. I am trying to predict the "time_to_failure" for given "acoustic_data" in the test CSV file using catboost algorithm.


def catbostregtest(X_train, y_train):  
    # submission format
    submission = pd.read_csv('sample_submission.csv', index_col='seg_id')
    X_test = pd.DataFrame()
    # prepare test data
    for seg_id in submission.index:
        seg = pd.read_csv('test/' + seg_id + '.csv')
        ch = gen_features(seg['acoustic_data'])
        X_test = X_test.append(ch, ignore_index=True)
    # model of choice here
    model = CatBoostRegressor(iterations=10000, loss_function='MAE', boosting_type='Ordered')
    model.fit(X_train, y_train)      #error line
    y_hat = model.predict(X_test)    #error line
    # write submission file LSTM
    submission['time_to_failure'] = y_hat
    submission.to_csv('submissionCAT.csv')
    print(model.best_score_)



This function "catbostregtest" is giving me error with the errorlog
 
Traceback (most recent call last):
File "E:\dir\question.py", line 68, in main()
File "E:\dir\question.py", line 65, in main catbostregtest(X_train, y_train)
File "E:\dir\question.py", line 50, in catbostregtest model.fit(X_train, y_train)
File "C:\Users\xyz\AppData\Local\Continuum\anaconda3\lib\site-packages\catboost\core.py", line 4330, in fit save_snapshot, snapshot_file, snapshot_interval, init_model)
File "C:\Users\xyz\AppData\Local\Continuum\anaconda3\lib\site-packages\catboost\core.py", line 1690, in _fit train_params["init_model"]
File "C:\Users\xyz\AppData\Local\Continuum\anaconda3\lib\site-packages\catboost\core.py", line 1225, in _train self._object._train(train_pool, test_pool, params, allow_clear_pool, init_model._object if init_model else None)
File "_catboost.pyx", line 3870, in _catboost._CatBoost._train
File "_catboost.pyx", line 3916, in _catboost._CatBoost._train
CatBoostError: c:/goagent/pipelines/buildmaster/catboost.git/catboost/libs/data/quantization.cpp:2424: All features are either constant or ignored.
This is gen_features function

def gen_features(X):
  
    strain = []

    strain.append(X.mean())

    strain.append(X.std())

    strain.append(X.min())

    strain.append(X.max())

    strain.append(X.kurtosis())

    strain.append(X.skew())

    strain.append(np.quantile(X,0.01))

    strain.append(np.quantile(X,0.05))

    strain.append(np.quantile(X,0.95))

    strain.append(np.quantile(X,0.99))

    strain.append(np.abs(X).max())

    strain.append(np.abs(X).mean())

    strain.append(np.abs(X).std())

    return pd.Series(strain)


This function is called from the main function

def main():
    train1 = pd.read_csv('train.csv', iterator=True, chunksize=150_000, dtype={'acoustic_data': np.int16, 'time_to_failure': np.float64})
    X_train = pd.DataFrame()
    y_train = pd.Series() 
    for df in train1: 
        ch = gen_features(df['acoustic_data']) 
        X_train = X_train.append(ch, ignore_index=True)
        y_train = y_train.append(pd.Series(df['time_to_failure'].values[-1])) 
    catbostregtest(X_train, y_train)

How I can remove the error that occur during making predict from catboost model? How I can remove this error please help. You can download and run the project in spyder ide from this link https://drive.google.com/file/d/1JFsNfE22ef82e-dS0zsZHDE3zGJxnJ_J/view?usp=sharing or https://github.com/princit/catboostAlgorithm



More information about the Python-list mailing list