我构建一个具有numpy的MLP以近似余弦函数,当损失为平均错误时,它会收敛:
导入numpy作为NP
导入大熊猫作为pd
从matplotlib导入PHPLOT作为PLT
来自Sklearn。
i构建具有numpy的MLP以近似cosinus函数,当损失是平均错误时,它会收敛:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
np.random.seed(0)
X_min = -10
X_max = 10
X = np.linspace(X_min, X_max, 10_000).reshape(-1, 1)
y = np.cos(X).reshape(-1, 1)
X = (X - X_min) / (X_max - X_min) -0.5
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
class NeuralNetwork:
def __init__(self, layer_sizes):
self.layer_sizes = layer_sizes
self.num_layers = len(layer_sizes)
self.learning_rate = 0.01
# Initialize weights and biases for all layers except input layer
self.weights = [np.random.randn(layer_sizes[i-1], layer_sizes[i]) for i in range(1, self.num_layers)]
self.biases = [np.zeros((1, layer_sizes[i])) for i in range(1, self.num_layers)]
def forward(self, X):
self.activations = [X] # Store activations for all layers
output = X
for w, b in zip(self.weights, self.biases):
output = np.dot(output, w) + b
output = np.tanh(output) # Activation function (tanh in this case)
self.activations.append(output)
self.output = output
def backward(self, X, y):
m = X.shape[0]
# Compute gradients
delta = (self.output - y)**1
#print(delta.shape)
moyenne = np.mean(delta)
delta = np.repeat(moyenne, m).reshape(-1, 1)
for i in range(self.num_layers - 1, 0, -1):
# Compute gradients for weights and biases
self.weights[i-1] -= self.learning_rate * (1 / m) * np.dot(self.activations[i-1].T, delta)
self.biases[i-1] -= self.learning_rate * (1 / m) * np.sum(delta, axis=0)
# Compute delta for previous layer
delta = np.dot(delta, self.weights[i-1].T) * (1 - np.power(self.activations[i-1], 2))
def train(self, X, y, epochs, batch_size=30):
num_samples = X.shape[0]
for epoch in range(epochs):
for i in range(0, num_samples, batch_size):
X_batch = X[i:i+batch_size]
y_batch = y[i:i+batch_size]
# Forward pass
self.forward(X_batch)
# Backward pass
self.backward(X_batch, y_batch)
# Print loss every 100 epochs
if epoch % (N_epochs//10) == 0:
# Use entire dataset for loss calculation
self.forward(X)
loss = np.mean(np.square(self.output - y))
#loss = np.mean((self.output - y))
print(f'Epoch {epoch}, Loss: {loss}')
# Example usage:
layer_sizes = [1, 100,100,100, 1] # Define layer sizes including input and output layers
N_epochs = 100
model = NeuralNetwork(layer_sizes)
model.train(X_train, y_train, epochs=N_epochs, batch_size=32)
# Evaluate the model
model.forward(X_test)
test_loss = np.mean(np.square(model.output - y_test))
print(f'Test Loss: {test_loss}')
X_plot = np.linspace(X_min, X_max, 1000).reshape(-1, 1)
X_plot_2 = (X_plot - X_min) / (X_max - X_min) - 0.5
model.forward(X_plot_2)
plt.plot(np.cos(X_plot))
plt.plot(model.output)
但是,当delta =(self.output -y)** 2 aka损耗=平方英尺错误
时,它不会收敛
我们可以看到可以使用TensorFlow完成收敛:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
np.random.seed(0)
X_min = -10
X_max = 10
X = np.linspace(X_min, X_max, 10_000).reshape(-1, 1)
y = np.cos(X).reshape(-1, 1)
X = (X - X_min) / (X_max - X_min) - 0.5
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = tf.keras.Sequential([
tf.keras.layers.Dense(100, activation='relu', input_shape=(1,)),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(loss='mean_squared_error')
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)
你能解释我为什么吗?
是因为来自TensorFlow的优化器吗?
我试图找到一种不使用TensorFlow的优化器的方法:
# Define a dummy optimizer that does nothing
dummy_optimizer = lambda lr: tf.keras.optimizers.Optimizer()
# Compile the model with the dummy optimizer
model.compile(loss='mean_squared_error', optimizer=dummy_optimizer)
,但无法实现