[Tutor] Curve Fitting a cvs data file in Python

Eddwin Shoemo javon1974 at me.com
Fri Nov 8 23:19:08 EST 2019


Hi. I was presented with a question and it has me stumped. My PY and csv files are attached.
Here is the question:

Use the function, test_func1(x, b0, b1,b2,b3,b4,b5)  (<——located at the bottom of the script below)
to predict the SALES in terms of each of the 3 attributes: the TV, Radio, and Newspaper data. PLOT the results. 
HINT: use the optimize library from scipy module. look for functions such as curve_fit.

My Script:

import csv 
import pandas as pd
import numpy as np
from scipy import optimize
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt  #Used to plot
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import cross_val_score, GridSearchCV # for optimuim MSE with Linear reg

########## Another data set -advertising
data2 = pd.read_csv('Advertising.csv')
print(data2.head()) # print the first 5 lines
#print(data2)
data2.drop(['Unnamed: 0'], axis=1, inplace=True)# drop the unnamed column (axis=1 for col, and 0 for rows)
print(data2.head())  

#tv=data2['TV'].values  # Alternative command for the one listed below
tv=data2[['TV']]

#print(tv)
radio=data2['radio'].values
#radio=data2[['radio']] # Alternative method to command listed above
#print(radio)
newspaper=data2['newspaper'].values
#print(newspaper)
#sales=data2['sales'].values
sales=data2[['sales']]

"""            
reg1 = LinearRegression()
reg1.fit(tv, sales)
y_pred1 = reg1.predict(tv)
plt.figure()
plt.scatter(tv, sales, color='y') #plot real values
plt.plot(tv, y_pred1, "r.") #plot predicted values # The dot displays a dotted value
##   COLORS: one of {'b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'}

reg2 = LinearRegression()
reg2.fit(radio.reshape(-1,1), sales)  # Will use a column vector
y_pred2 = reg2.predict(radio.reshape(-1,1))
plt.scatter(radio, sales, color='b') #plot real values
plt.plot(radio, y_pred2, c="black", linewidth=3)
plt.xlabel("The number of Ads (TV, Radio, and Newspaper)") # Label for the graph
plt.ylabel("The Sales") # Label for the graph

reg3 = LinearRegression()
reg3.fit(newspaper.reshape(-1,1), sales)
y_pred3 = reg3.predict(newspaper.reshape(-1,1))
plt.scatter(newspaper, sales, color='red') #plot real values
plt.plot(newspaper, y_pred3, "g.")

### Ridge Regression ###############################################
ridge = Ridge()
parameters_dict = {'alpha':[1e-12, 1e-7, 1e-3, 1e-2, 1, 4, 15, 20]}
rg_regressor = GridSearchCV(ridge,parameters_dict,scoring='neg_mean_squared_error',cv=5) 
rg_regressor.fit(tv,sales)
y_pred4 = rg_regressor.predict(tv)
print('The best value for -Ridge- alpha is', rg_regressor.best_params_)
print('The minimuim MSE -Ridge-is',rg_regressor.best_score_)
plt.scatter(tv,sales,color='m') # Plot real values
plt.plot(tv, y_pred4, "k--")

### Lasso Regression ###############################################
lasso = Lasso()
parameters_dict_lasso = {'alpha':[1e-12, 1e-7, 1e-3, 1e-2, 1, 4, 15, 20]}
ls_regressor = GridSearchCV(lasso, parameters_dict_lasso,scoring='neg_mean_squared_error', cv=5) 
ls_regressor.fit(tv,sales)
y_pred5 = ls_regressor.predict(tv)
print('The best value for Lasso alpha is', ls_regressor.best_params_)
print('The minimuim MSE -Lasso-is', ls_regressor.best_score_)
plt.scatter(tv,sales,color='m') # Plot real values
plt.plot(tv, y_pred5, "g--") # -- is how the plot is displayed

### Linear Regression with optimuim MSE #############################
LR = LinearRegression()
MSEs=cross_val_score(LR,tv,sales,scoring='neg_mean_squared_error', cv=5)
avg_MSEs=np.mean(MSEs)
print('The average MSE-Linear', avg_MSEs)
LR.fit(tv, sales)
Pred = LR.predict(tv)
plt.scatter(tv,sales,color='m') # Plot real values
plt.plot(tv, Pred, "b--") # -- is how the plot is displayed
"""


def test_func1(x, b0, b1,b2,b3,b4,b5):
  return (b0 + b1*x + b2*x**2+ b3*x**3 + b4*x**4+ b5*x**5)

print('call the function:',test_func1(1,10,5,4,3,2,1))



More information about the Tutor mailing list