Форум сайта python.su
Здравствуйте, ребята!
Подскажите пожалуйста, как мне обучить модели полиноминальной регрессии (линейную, квадратическую и кубическую), если у меня несколько факторов/переменных (8). По заданию нужно на обучающем наборе (https://gist.github.com/vithu95/04f9ccf102e8ebde5d4119ca98aac7f2) обучить и представить их графики.
Нашел пример в интернете https://nagornyy.me/courses/data-science/regression/
Но там обучается модель используя 1-н фактор, а у меня их 8. Если я пытаюсь вместо 1-й переменной отправить 8, то выходит ошибка.
import pandas as pd from sklearn.neighbors import KNeighborsRegressor from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.svm import SVR from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split from sklearn.metrics import mean_absolute_error, mean_squared_error, median_absolute_error, r2_score import numpy as np import matplotlib.pyplot as plt import seaborn as sns #Обучение Полиноминальной регрессии def learnPolynomialFeatures (dataset, X_,Y_): X = dataset[X_].values y = dataset[Y_].values regr = LinearRegression() # create quadratic features quadratic = PolynomialFeatures(degree=2) cubic = PolynomialFeatures(degree=3) X_quad = quadratic.fit_transform(X) X_cubic = cubic.fit_transform(X) # fit features X_fit = np.arange(X.min(), X.max(), 1)[:, np.newaxis] regr = regr.fit(X, y) y_lin_fit = regr.predict(X_fit) linear_r2 = r2_score(y, regr.predict(X)) regr = regr.fit(X_quad, y) y_quad_fit = regr.predict(quadratic.fit_transform(X_fit)) quadratic_r2 = r2_score(y, regr.predict(X_quad)) regr = regr.fit(X_cubic, y) y_cubic_fit = regr.predict(cubic.fit_transform(X_fit)) cubic_r2 = r2_score(y, regr.predict(X_cubic)) return X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y # plot results Polynom Regression def PlotPolynomRelationPredict (X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y): plt.scatter(X, y, label='training points', color='lightgray') plt.plot(X_fit, y_lin_fit, label='linear (d=1), $R^2={:.2f}$'.format(linear_r2), color='blue', lw=2, linestyle=':') plt.plot(X_fit, y_quad_fit, label='quadratic (d=2), $R^2={:.2f}$'.format(quadratic_r2), color='red', lw=2, linestyle='-') plt.plot(X_fit, y_cubic_fit, label='cubic (d=3), $R^2={:.2f}$'.format(cubic_r2), color='green', lw=2, linestyle='--') plt.xlabel('% lower status of the population [LSTAT]') plt.ylabel('Price in $1000\'s [MEDV]') plt.legend(loc='upper right') dataset = pd.read_csv('J:/Exemple_Regression/ENB2012_data.csv', sep=',') X = dataset[['X1','X2','X3','X4','X5','X6','X7','X8']].values y = dataset['Y1'].values X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0) #Обучение Полиноминальной регрессии X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y=learnPolynomialFeatures (dataset, ['X1','X2','X3','X4','X5','X6','X7','X8'],'Y1') PlotPolynomRelationPredict (X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y)
ValueError Traceback (most recent call last)
<ipython-input-5-87d395e1675d> in <module>
163
164 #Обучение полиноминальной модели
–> 165 X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y=learnPolynomialFeatures (dataset, ,'Y1')
166
167 #Предсказание модели
<ipython-input-5-87d395e1675d> in learnPolynomialFeatures(dataset, X_, Y_)
78
79 regr = regr.fit(X, y)
—> 80 y_lin_fit = regr.predict(X_fit)
81 linear_r2 = r2_score(y, regr.predict(X))
82
~\Anaconda3\envs\krs\lib\site-packages\sklearn\linear_model\_base.py in predict(self, X)
223 Returns predicted values.
224 “”"
–> 225 return self._decision_function(X)
226
227 _preprocess_data = staticmethod(_preprocess_data)
~\Anaconda3\envs\krs\lib\site-packages\sklearn\linear_model\_base.py in _decision_function(self, X)
207 X = check_array(X, accept_sparse
208 return safe_sparse_dot(X, self.coef_.T,
–> 209 dense_output=True) + self.intercept_
210
211 def predict(self, X):
~\Anaconda3\envs\krs\lib\site-packages\sklearn\utils\extmath.py in safe_sparse_dot(a, b, dense_output)
149 ret = np.dot(a, b)
150 else:
–> 151 ret = a @ b
152
153 if (sparse.issparse(a) and sparse.issparse(b)
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 8 is different from 1)
Отредактировано Volodya (Март 7, 2020 17:09:25)
Прикреплённый файлы: ENB2012_data.csv (39,8 KБ)
Офлайн
Можно ли вообще построить/обучить множественную полиноминальную регрессионную модель?
Офлайн
В общем дело было в следующей строке:
X_fit = np.arange(X.min(), X.max(), 1)[:, np.newaxis]
X_fit = np.arange(X.min(), X.max(), 1)[:, np.newaxis]
X = dataset[['X1','X2','X3','X4','X5','X6','X7','X8']].values y = dataset['Y1'].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y=learnPolynomialFeatures (X_train, y_train,X_test,y_test)
#Обучение Полиноминальной регрессии def learnPolynomialFeatures (X_train, y_train,X_test,y_test): regr = LinearRegression() # create quadratic features quadratic = PolynomialFeatures(degree=2) cubic = PolynomialFeatures(degree=3) X_quad = quadratic.fit_transform(X_train) X_cubic = cubic.fit_transform(X_train) # fit features #X_fit = np.arange(X.min(), X.max(), 1)[:, np.newaxis] regr = regr.fit(X_train, y_train) y_lin_fit = regr.predict(X_test) linear_r2 = r2_score(y_train, regr.predict(X_train)) regr = regr.fit(X_quad, y_train) y_quad_fit = regr.predict(quadratic.fit_transform(X_test)) quadratic_r2 = r2_score(y_train, regr.predict(X_quad)) regr = regr.fit(X_cubic, y_train) y_cubic_fit = regr.predict(cubic.fit_transform(X_test)) cubic_r2 = r2_score(y_train, regr.predict(X_cubic)) return X_train, X_test, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y_train X = dataset[['X1','X2','X3','X4','X5','X6','X7','X8']].values y = dataset['Y1'].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) X, X_fit, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y=learnPolynomialFeatures (X_train, y_train,X_test,y_test)
Отредактировано Volodya (Март 12, 2020 13:16:37)
Офлайн
Volodya
Если есть замечания, то напишите пожалуйста, я только учусь
return X_train, X_test, y_lin_fit, linear_r2, y_quad_fit, quadratic_r2, y_cubic_fit, cubic_r2, y_train
X = dataset[['X1','X2','X3','X4','X5','X6','X7','X8']].values
Офлайн
doza_and
Да, спасибо!
Учту )
Офлайн
doza_and
Как то так?
#Обучение модели def TrainingLineModel (X_train, y_train): model_line =LinearRegression() model_line.fit(X_train, y_train) linear_r2 = r2_score(y_train, model_line.predict(X_train)) return model_line, linear_r2 #Обучение Полиноминальной регрессии def TrainingPolynomialRegression (X_train, y_train, degree_): regr=LinearRegression() # create features func_degree = PolynomialFeatures(degree=degree_) model_regr_fit = regr.fit(func_degree.fit_transform(X_train), y_train) degree_r2 = r2_score(y_train, PredictionModel(model_regr_fit, X_train, degree_)) return model_regr_fit, degree_r2 #Предсказание модели def PredictionModel(model_fit, X_try, degree_): if degree_>1: func_degree = PolynomialFeatures(degree=degree_) y_degree_fit = model_fit.predict(func_degree.fit_transform(X_try)) return y_degree_fit else: y_line_fit = model_fit.predict(X_try) return y_line_fit dataset = pd.read_csv('F:Exemple_Regression/ENB2012_data.csv', sep=',') #Разбивка данных на обучающие и тестовые X = dataset[['X1','X2','X3','X4','X5','X6','X7','X8']].values y = dataset['Y1'].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) #Обучение линейной модели model_line, linear_r2=TrainingLineModel (X_train, y_train) #Обучение полиноминальных моделей model_quad, quad_r2=TrainingPolynomialRegression (X_train, y_train, 2) model_cubic, cubic_r2=TrainingPolynomialRegression (X_train, y_train, 3) #Предсказание моделей y_line_predict = PredictionModel(model_line, X_test, 1) y_quad_predict = PredictionModel(model_quad, X_test, 2) y_cubic_predict = PredictionModel(model_cubic, X_test, 3) PlotPolynomRelationPredict (X, X_test, y_line_predict, linear_r2, y_quad_predict, quad_r2, y_cubic_predict, cubic_r2, y)
X = dataset[0:7].values
regr=LinearRegression()
Отредактировано Volodya (Март 13, 2020 13:41:41)
Офлайн
Volodyaтак намного симпатичнее.
Как то так?
Volodyahttps://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html
Только не получилось у датасета взять срез :
'X1':'X8'
Офлайн