DjangoBB LoFi version

Полная версия: После оптимизации ИИ ошибка

Начало » Python для новичков » После оптимизации ИИ ошибка

VAF34

Янв. 18, 2025 14:51:37

Работающая программа

 # From https://www.kaggle.com/code/freacle/part-1
# В исходной статье https://habr.com/ru/articles/869118/
# Очень много кусков программ с объяснениями
import numpy as np
Parameter = None
def ParameterObj():
    class Parameter:
        layers = []
        calling = dict()
        def __init__(self, info):
            Parameter.layers.append(info[0])
            Parameter.calling[info[0]] = info[1:]
    return Parameter
    
class Module:
    def __init__(self):
        self._constructor_Parameter = ParameterObj()
        global Parameter
        Parameter = self._constructor_Parameter
    def forward(self):
        pass
    def __call__(self, x):
        return self.forward(x)
    def parameters(self):
        return self
class Linear:
    def __init__(self, input_channels: int, output_channels: int, bias = True):
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.bias = bias
        self.backward_list = []
        if bias:
            Parameter([self, np.random.uniform(- 0.5, 0.5, size=(self.input_channels, self.output_channels)), np.random.uniform(- 0.5, 0.5, size=self.output_channels)])
        else:
            Parameter([self, np.random.uniform(- 0.5, 0.5, size=(self.input_channels, self.output_channels)), np.zeros(self.output_channels)])
    def __call__(self, x):
        self.x = np.array(x, copy=True)
        result = x @ Parameter.calling[self][0] + Parameter.calling[self][1]
        return result
    def backward(self, input_matrix):
        x_gradient = input_matrix @ self.weight.T
        self.weight_gradient = self.x.T @ input_matrix
        self.bias_gradient = input_matrix.mean(axis=0)
        return x_gradient
class Flatten:
    def __init__(self): 
        pass
    def __call__(self, x):
        return x.reshape(1, -1)
class ReLU:
    def __init__(self): 
        pass
    def __call__(self, x):
        self.x = x
        return np.maximum(0, x)
    def backward(self, input_matrix):
        return (self.x > 0) * input_matrix
        
class Softmax():
    def __init__(self): 
        pass
    def __call__(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis=1).reshape(-1, 1)
class CrossEntropyLoss:
    def __init__(self):
        self.predicted = None
        self.true = None
    def __call__(self, logits, true):
        predicted = np.exp(logits) / np.sum(np.exp(logits), axis=1).reshape(-1, 1) # softmax
        self.predicted = np.array(predicted, copy=True) # сделаем копию входных матрицы для дальнейших вычислений
        self.true = np.array(true, copy=True) # сделаем копию входных матрицы для дальнейших вычислений
        number_of_classes = predicted.shape[1] # получим количество классов, в нашем случае 2
        self.true = np.array(true, copy=True)
        # вычисляем значение лосс-функции прямо по формуле
        self.loss = -1 * np.sum(true * np.log(predicted + 1e-5), axis=1)
        
        return self
        
    def backward(self):
        loss = self.predicted - self.true
		# Итерируем по каждому слою в обратном порәдке, благодаря тому, что мы всё сохранили в Parameter.layers
        for index, layer in enumerate(Parameter.layers[::-1]):
            if type(layer).__name__ == 'Linear':
                changes_w = (layer.x.T @ loss) / loss.shape[0]
                # нормировка на loss.shape[0] нужна, так как величина изменений зависит от размера батча
                if layer.bias:
                    changes_b = (np.sum(loss) / loss.shape[0])
                else:
                    changes_b = 0
                layer.backward_list = [changes_w, changes_b]
			    # Cчитаем градиент для следующих слоев
                loss = loss @ Parameter.calling[layer][0].T
                
            elif type(layer).__name__ == 'ReLU':
                loss = layer.backward(loss)
class SGD:
    def __init__(self, model, learning_rate):
        self.model = model
        self.lr = learning_rate
    def step(self):
        for index, layer in enumerate(self.model._constructor_Parameter.layers[::-1]):
            if type(layer).__name__  == 'Linear':
                weight, bias = self.model._constructor_Parameter.calling[layer]
                weight_gradient, bias_gradient = layer.backward_list[0], layer.backward_list[1]
                self.model._constructor_Parameter.calling[layer] = [weight - lr * weight_gradient, bias - lr * bias_gradient] 
            
class SimpleNet(Module):
    def __init__(self):
        super().__init__()
        self.linear1 = Linear(input_channels=25, output_channels=10, bias=True)
        self.linear2 = Linear(input_channels=10, output_channels=2, bias=True)
        self.flatten = Flatten()
        self.relu = ReLU()
        self.softmax = Softmax()
    def forward(self, x):
        x_1 = self.flatten(x)
        x_2 = self.linear1(x_1)
        x_3 = self.relu(x_2)
        x_4 = self.linear2(x_3)
        return x_4
input_x = np.array([[ 0.99197708, -0.77980023, -0.8391331 , -0.41970686,  0.72636492],
       [ 0.85901409, -0.22374584, -1.95850625, -0.81685145,  0.96359871],
       [-0.42707937, -0.50053309,  0.34049477,  0.62106931, -0.76039365],
       [ 0.34206742,  2.15131285,  0.80851759,  0.28673013,  0.84706839],
       [-1.70231094,  0.36473216,  0.33631525, -0.92515589, -2.57602677]])
target_x = np.array([[1, 0]])
loss_fn = CrossEntropyLoss()
model = SimpleNet()
optim = SGD(model.parameters(), learning_rate = 0.01)
for i in range(100):
    output = model(input_x)
    loss = loss_fn(output, target_x)
    loss.backward()
    lr = 0.01
    optim.step()
    if (i % 20) == 0:
        print(loss.loss, i)

С целью сокращения была доверена ИИ для оптимизации. После выкидывания ИИ всех комментарием и нескольких изменений, текст был получен без окончания, содержащего использование.

 import numpy as np
class Parameter:
    layers = []
    calling = {}
    def __init__(self, info):
        self.layers.append(info[0])
        self.calling[info[0]] = info[1:]
class Module:
    def __init__(self):
        self.Parameter = Parameter
    
    def forward(self):
        pass
    def __call__(self, x):
        return self.forward(x)
    def parameters(self):
        return self
class Linear(Module):
    def __init__(self, input_channels: int, output_channels: int, bias=True):
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.bias = bias
        self.backward_list = []
        if bias:
            self.Parameter([self, np.random.uniform(-0.5, 0.5, size=(input_channels, output_channels)), np.random.uniform(-0.5, 0.5, size=output_channels)])
        else:
            self.Parameter([self, np.random.uniform(-0.5, 0.5, size=(input_channels, output_channels)), np.zeros(output_channels)])
    def __call__(self, x):
        self.x = np.array(x, copy=True)
        result = x @ self.Parameter.calling[self][0] + self.Parameter.calling[self][1]
        return result
    def backward(self, input_matrix):
        x_gradient = input_matrix @ self.Parameter.calling[self][0].T
        self.backward_list = [input_matrix.T @ self.x, input_matrix.mean(axis=0)]
        return x_gradient
class Flatten(Module):
    def __call__(self, x):
        return x.reshape(x.shape[0], -1)
class ReLU(Module):
    def __call__(self, x):
        self.x = x
        return np.maximum(0, x)
    def backward(self, input_matrix):
        return (self.x > 0) * input_matrix
class Softmax(Module):
    def __call__(self, z):
        exp_z = np.exp(z)
        return exp_z / exp_z.sum(axis=1, keepdims=True)
class CrossEntropyLoss(Module):
    def __call__(self, logits, true):
        self.predicted = self.Softmax()(logits)
        self.true = np.array(true, copy=True)
        self.loss = -np.sum(true * np.log(self.predicted + 1e-5), axis=1)
        return self
    def backward(self):
        loss = self.predicted - self.true
        for layer in self.Parameter.layers[::-1]:
            if isinstance(layer, Linear):
                layer.backward_list[0] = (layer.x.T @ loss) / loss.shape[0]
                if layer.bias:
                    layer.backward_list[1] = np.sum(loss, axis=0) / loss.shape[0]
                loss = loss @ layer.Parameter.calling[layer][0].T
            elif isinstance(layer, ReLU):
                loss = layer.backward(loss)
class SGD:
    def __init__(self, model, learning_rate):
        self.model = model
        self.lr = learning_rate
    def step(self):
        for layer in self.model.Parameter.layers[::-1]:
            if isinstance(layer, Linear):
                weight, bias = layer.Parameter.calling[layer]
                weight_gradient, bias_gradient = layer.backward_list
                layer.Parameter.calling[layer] = [weight - self.lr * weight_gradient, bias - self.lr * bias_gradient]
class SimpleNet(Module):
    def __init__(self):
        super().__init__()
        self.linear1 = Linear(input_channels=25, output_channels=10, bias=True)
        self.linear2 = Linear(input_channels=10, output_channels=2, bias=True)
        self.flatten = Flatten()
        self.relu = ReLU()
        self.softmax = Softmax()
    def forward(self, x):
        x_1 = self.flatten(x)
        x_2 = self.linear1(x_1)
        x_3 = self.relu(x_2)
        x_4 = self.linear2(x_3)
        return x_4
input_x = np.array([[ 0.99197708, -0.77980023, -0.8391331 , -0.41970686,  0.72636492],
       [ 0.85901409, -0.22374584, -1.95850625, -0.81685145,  0.96359871],
       [-0.42707937, -0.50053309,  0.34049477,  0.62106931, -0.76039365],
       [ 0.34206742,  2.15131285,  0.80851759,  0.28673013,  0.84706839],
       [-1.70231094,  0.36473216,  0.33631525, -0.92515589, -2.57602677]])
target_x = np.array([[1, 0]])
loss_fn = CrossEntropyLoss()
model = SimpleNet()
optim = SGD(model.parameters(), learning_rate = 0.01)
for i in range(100):
    output = model(input_x)
    loss = loss_fn(output, target_x)
    loss.backward()
    lr = 0.01
    optim.step()
    if (i % 20) == 0:
        print(loss.loss, i)

Текст без окончания проходил без ошибок, но и без выдачи результатов. Добавление прежнего окончания привело к ошибкам.