Форум сайта python.su
Работающая программа
# From https://www.kaggle.com/code/freacle/part-1 # В исходной статье https://habr.com/ru/articles/869118/ # Очень много кусков программ с объяснениями import numpy as np Parameter = None def ParameterObj(): class Parameter: layers = [] calling = dict() def __init__(self, info): Parameter.layers.append(info[0]) Parameter.calling[info[0]] = info[1:] return Parameter class Module: def __init__(self): self._constructor_Parameter = ParameterObj() global Parameter Parameter = self._constructor_Parameter def forward(self): pass def __call__(self, x): return self.forward(x) def parameters(self): return self class Linear: def __init__(self, input_channels: int, output_channels: int, bias = True): self.input_channels = input_channels self.output_channels = output_channels self.bias = bias self.backward_list = [] if bias: Parameter([self, np.random.uniform(- 0.5, 0.5, size=(self.input_channels, self.output_channels)), np.random.uniform(- 0.5, 0.5, size=self.output_channels)]) else: Parameter([self, np.random.uniform(- 0.5, 0.5, size=(self.input_channels, self.output_channels)), np.zeros(self.output_channels)]) def __call__(self, x): self.x = np.array(x, copy=True) result = x @ Parameter.calling[self][0] + Parameter.calling[self][1] return result def backward(self, input_matrix): x_gradient = input_matrix @ self.weight.T self.weight_gradient = self.x.T @ input_matrix self.bias_gradient = input_matrix.mean(axis=0) return x_gradient class Flatten: def __init__(self): pass def __call__(self, x): return x.reshape(1, -1) class ReLU: def __init__(self): pass def __call__(self, x): self.x = x return np.maximum(0, x) def backward(self, input_matrix): return (self.x > 0) * input_matrix class Softmax(): def __init__(self): pass def __call__(self, z): return np.exp(z) / np.sum(np.exp(z), axis=1).reshape(-1, 1) class CrossEntropyLoss: def __init__(self): self.predicted = None self.true = None def __call__(self, logits, true): predicted = np.exp(logits) / np.sum(np.exp(logits), axis=1).reshape(-1, 1) # softmax self.predicted = np.array(predicted, copy=True) # сделаем копию входных матрицы для дальнейших вычислений self.true = np.array(true, copy=True) # сделаем копию входных матрицы для дальнейших вычислений number_of_classes = predicted.shape[1] # получим количество классов, в нашем случае 2 self.true = np.array(true, copy=True) # вычисляем значение лосс-функции прямо по формуле self.loss = -1 * np.sum(true * np.log(predicted + 1e-5), axis=1) return self def backward(self): loss = self.predicted - self.true # Итерируем по каждому слою в обратном порәдке, благодаря тому, что мы всё сохранили в Parameter.layers for index, layer in enumerate(Parameter.layers[::-1]): if type(layer).__name__ == 'Linear': changes_w = (layer.x.T @ loss) / loss.shape[0] # нормировка на loss.shape[0] нужна, так как величина изменений зависит от размера батча if layer.bias: changes_b = (np.sum(loss) / loss.shape[0]) else: changes_b = 0 layer.backward_list = [changes_w, changes_b] # Cчитаем градиент для следующих слоев loss = loss @ Parameter.calling[layer][0].T elif type(layer).__name__ == 'ReLU': loss = layer.backward(loss) class SGD: def __init__(self, model, learning_rate): self.model = model self.lr = learning_rate def step(self): for index, layer in enumerate(self.model._constructor_Parameter.layers[::-1]): if type(layer).__name__ == 'Linear': weight, bias = self.model._constructor_Parameter.calling[layer] weight_gradient, bias_gradient = layer.backward_list[0], layer.backward_list[1] self.model._constructor_Parameter.calling[layer] = [weight - lr * weight_gradient, bias - lr * bias_gradient] class SimpleNet(Module): def __init__(self): super().__init__() self.linear1 = Linear(input_channels=25, output_channels=10, bias=True) self.linear2 = Linear(input_channels=10, output_channels=2, bias=True) self.flatten = Flatten() self.relu = ReLU() self.softmax = Softmax() def forward(self, x): x_1 = self.flatten(x) x_2 = self.linear1(x_1) x_3 = self.relu(x_2) x_4 = self.linear2(x_3) return x_4 input_x = np.array([[ 0.99197708, -0.77980023, -0.8391331 , -0.41970686, 0.72636492], [ 0.85901409, -0.22374584, -1.95850625, -0.81685145, 0.96359871], [-0.42707937, -0.50053309, 0.34049477, 0.62106931, -0.76039365], [ 0.34206742, 2.15131285, 0.80851759, 0.28673013, 0.84706839], [-1.70231094, 0.36473216, 0.33631525, -0.92515589, -2.57602677]]) target_x = np.array([[1, 0]]) loss_fn = CrossEntropyLoss() model = SimpleNet() optim = SGD(model.parameters(), learning_rate = 0.01) for i in range(100): output = model(input_x) loss = loss_fn(output, target_x) loss.backward() lr = 0.01 optim.step() if (i % 20) == 0: print(loss.loss, i)
import numpy as np class Parameter: layers = [] calling = {} def __init__(self, info): self.layers.append(info[0]) self.calling[info[0]] = info[1:] class Module: def __init__(self): self.Parameter = Parameter def forward(self): pass def __call__(self, x): return self.forward(x) def parameters(self): return self class Linear(Module): def __init__(self, input_channels: int, output_channels: int, bias=True): self.input_channels = input_channels self.output_channels = output_channels self.bias = bias self.backward_list = [] if bias: self.Parameter([self, np.random.uniform(-0.5, 0.5, size=(input_channels, output_channels)), np.random.uniform(-0.5, 0.5, size=output_channels)]) else: self.Parameter([self, np.random.uniform(-0.5, 0.5, size=(input_channels, output_channels)), np.zeros(output_channels)]) def __call__(self, x): self.x = np.array(x, copy=True) result = x @ self.Parameter.calling[self][0] + self.Parameter.calling[self][1] return result def backward(self, input_matrix): x_gradient = input_matrix @ self.Parameter.calling[self][0].T self.backward_list = [input_matrix.T @ self.x, input_matrix.mean(axis=0)] return x_gradient class Flatten(Module): def __call__(self, x): return x.reshape(x.shape[0], -1) class ReLU(Module): def __call__(self, x): self.x = x return np.maximum(0, x) def backward(self, input_matrix): return (self.x > 0) * input_matrix class Softmax(Module): def __call__(self, z): exp_z = np.exp(z) return exp_z / exp_z.sum(axis=1, keepdims=True) class CrossEntropyLoss(Module): def __call__(self, logits, true): self.predicted = self.Softmax()(logits) self.true = np.array(true, copy=True) self.loss = -np.sum(true * np.log(self.predicted + 1e-5), axis=1) return self def backward(self): loss = self.predicted - self.true for layer in self.Parameter.layers[::-1]: if isinstance(layer, Linear): layer.backward_list[0] = (layer.x.T @ loss) / loss.shape[0] if layer.bias: layer.backward_list[1] = np.sum(loss, axis=0) / loss.shape[0] loss = loss @ layer.Parameter.calling[layer][0].T elif isinstance(layer, ReLU): loss = layer.backward(loss) class SGD: def __init__(self, model, learning_rate): self.model = model self.lr = learning_rate def step(self): for layer in self.model.Parameter.layers[::-1]: if isinstance(layer, Linear): weight, bias = layer.Parameter.calling[layer] weight_gradient, bias_gradient = layer.backward_list layer.Parameter.calling[layer] = [weight - self.lr * weight_gradient, bias - self.lr * bias_gradient] class SimpleNet(Module): def __init__(self): super().__init__() self.linear1 = Linear(input_channels=25, output_channels=10, bias=True) self.linear2 = Linear(input_channels=10, output_channels=2, bias=True) self.flatten = Flatten() self.relu = ReLU() self.softmax = Softmax() def forward(self, x): x_1 = self.flatten(x) x_2 = self.linear1(x_1) x_3 = self.relu(x_2) x_4 = self.linear2(x_3) return x_4 input_x = np.array([[ 0.99197708, -0.77980023, -0.8391331 , -0.41970686, 0.72636492], [ 0.85901409, -0.22374584, -1.95850625, -0.81685145, 0.96359871], [-0.42707937, -0.50053309, 0.34049477, 0.62106931, -0.76039365], [ 0.34206742, 2.15131285, 0.80851759, 0.28673013, 0.84706839], [-1.70231094, 0.36473216, 0.33631525, -0.92515589, -2.57602677]]) target_x = np.array([[1, 0]]) loss_fn = CrossEntropyLoss() model = SimpleNet() optim = SGD(model.parameters(), learning_rate = 0.01) for i in range(100): output = model(input_x) loss = loss_fn(output, target_x) loss.backward() lr = 0.01 optim.step() if (i % 20) == 0: print(loss.loss, i)
Офлайн