Подскажите пожалуйста, как мне обучить модели полиноминальной регрессии (линейную, квадратическую и кубическую), если у меня несколько факторов/переменных (8). По заданию нужно на обучающем наборе (https://gist.github.com/vithu95/04f9ccf102e8ebde5d4119ca98aac7f2) обучить и представить их графики.
Нашел пример в интернете https://nagornyy.me/courses/data-science/regression/
Но там обучается модель используя 1-н фактор, а у меня их 8. Если я пытаюсь вместо 1-й переменной отправить 8, то выходит ошибка.
import pandas as pd from sklearn.neighbors import KNeighborsRegressor from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.svm import SVR from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split from sklearn.metrics import mean_absolute_error, mean_squared_error, median_absolute_error, r2_score import numpy as np import matplotlib.pyplot as plt import seaborn as sns #Обучение Полиноминальной регрессии def learnPolynomialFeatures (dataset, X_,Y_): X = dataset[X_].values y = dataset[Y_].values regr = LinearRegression() # create quadratic features quadratic = PolynomialFeatures(degree=2) cubic = PolynomialFeatures(degree=3) X_quad = quadratic.fit_transform(X) X_cubic = cubic.fit_transform(X) # fit features X_fit = np.arange(X.min(), X.max(), 1)[:, np.newaxis] regr = regr.fit(X, y) y_lin_fit = regr.predict(X_fit) linear_r2 = r2_score(y, regr.predict(X)) regr = regr.fit(X_quad, y) y_quad_fit = regr.predict(quadratic.fit_transform(X_fit)) quadratic_r2 = r2_score(y, regr.predict(X_quad)) regr = regr.fit(X_cubic, y) y_cubic_fit = regr.predict(cubic.fit_transform(X_fit)) cubic_r2 = r2_score(y, regr.predict(X_cubic)) return X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y # plot results Polynom Regression def PlotPolynomRelationPredict (X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y): plt.scatter(X, y, label='training points', color='lightgray') plt.plot(X_fit, y_lin_fit, label='linear (d=1), $R^2={:.2f}$'.format(linear_r2), color='blue', lw=2, linestyle=':') plt.plot(X_fit, y_quad_fit, label='quadratic (d=2), $R^2={:.2f}$'.format(quadratic_r2), color='red', lw=2, linestyle='-') plt.plot(X_fit, y_cubic_fit, label='cubic (d=3), $R^2={:.2f}$'.format(cubic_r2), color='green', lw=2, linestyle='--') plt.xlabel('% lower status of the population [LSTAT]') plt.ylabel('Price in $1000\'s [MEDV]') plt.legend(loc='upper right') dataset = pd.read_csv('J:/Exemple_Regression/ENB2012_data.csv', sep=',') X = dataset[['X1','X2','X3','X4','X5','X6','X7','X8']].values y = dataset['Y1'].values X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0) #Обучение Полиноминальной регрессии X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y=learnPolynomialFeatures (dataset, ['X1','X2','X3','X4','X5','X6','X7','X8'],'Y1') PlotPolynomRelationPredict (X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y)
ValueError Traceback (most recent call last)
<ipython-input-5-87d395e1675d> in <module>
163
164 #Обучение полиноминальной модели
–> 165 X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y=learnPolynomialFeatures (dataset, ,'Y1')
166
167 #Предсказание модели
<ipython-input-5-87d395e1675d> in learnPolynomialFeatures(dataset, X_, Y_)
78
79 regr = regr.fit(X, y)
—> 80 y_lin_fit = regr.predict(X_fit)
81 linear_r2 = r2_score(y, regr.predict(X))
82
~\Anaconda3\envs\krs\lib\site-packages\sklearn\linear_model\_base.py in predict(self, X)
223 Returns predicted values.
224 “”"
–> 225 return self._decision_function(X)
226
227 _preprocess_data = staticmethod(_preprocess_data)
~\Anaconda3\envs\krs\lib\site-packages\sklearn\linear_model\_base.py in _decision_function(self, X)
207 X = check_array(X, accept_sparse
208 return safe_sparse_dot(X, self.coef_.T,
–> 209 dense_output=True) + self.intercept_
210
211 def predict(self, X):
~\Anaconda3\envs\krs\lib\site-packages\sklearn\utils\extmath.py in safe_sparse_dot(a, b, dense_output)
149 ret = np.dot(a, b)
150 else:
–> 151 ret = a @ b
152
153 if (sparse.issparse(a) and sparse.issparse(b)
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 8 is different from 1)
Если комментирую линейную модель, то выходит ошибка на следующей.