
2.2.1 建立线性模型(两层网络间没有激活函数)
learning_rate = 1e-3
lambda_l2 = 1e-5
# 建立神经网络模型
model = nn.Sequential(
nn.Linear(D, H),
nn.Linear(H, C)
)
model.to(device) # 模型转到 GPU
# 对于回归问题,使用MSE损失函数
criterion = torch.nn.MSELoss()
# 定义优化器,使用SGD
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2
# 开始训练
for t in range(1000):
# 数据输入模型得到预测结果
y_pred = model(X)
# 计算 MSE 损失
loss = criterion(y_pred, y)
print("[EPOCH]: %i, [LOSS or MSE]: %.6f" % (t, loss.item()))
display.clear_output(wait=True)
# 反向传播前,梯度清零
optimizer.zero_grad()
# 反向传播
loss.backward()
# 更新参数
optimizer.step()
[EPOCH]: 999, [LOSS or MSE]: 0.029701

2.2.2 两层神经网络
# 这里定义了2个网络,一个 relu_model,一个 tanh_model,
# 使用了不同的激活函数
relu_model = nn.Sequential(
nn.Linear(D, H),
nn.ReLU(),
nn.Linear(H, C)
)
relu_model.to(device)
tanh_model = nn.Sequential(
nn.Linear(D, H),
nn.Tanh(),
nn.Linear(H, C)
)
tanh_model.to(device)
# MSE损失函数
criterion = torch.nn.MSELoss()
# 定义优化器,使用 Adam,这里仍使用 SGD 优化器的化效果会比较差,具体原因请自行百度
optimizer_relumodel = torch.optim.Adam(relu_model.parameters(), lr=learning_rate, weight_decay=lambda_l2)
optimizer_tanhmodel = torch.optim.Adam(tanh_model.parameters(), lr=learning_rate, weight_decay=lambda_l2)
# 开始训练
for t in range(1000):
y_pred_relumodel = relu_model(X)
y_pred_tanhmodel = tanh_model(X)
# 计算损失与准确率
loss_relumodel = criterion(y_pred_relumodel, y)
loss_tanhmodel = criterion(y_pred_tanhmodel, y)
print(f"[MODEL]: relu_model, [EPOCH]: {t}, [LOSS]: {loss_relumodel.item():.6f}")
print(f"[MODEL]: tanh_model, [EPOCH]: {t}, [LOSS]: {loss_tanhmodel.item():.6f}")
display.clear_output(wait=True)
optimizer_relumodel.zero_grad()
optimizer_tanhmodel.zero_grad()
loss_relumodel.backward()
loss_tanhmodel.backward()
optimizer_relumodel.step()
optimizer_tanhmodel.step()
[MODEL]: relu_model, [EPOCH]: 999, [LOSS]: 0.006584
[MODEL]: tanh_model, [EPOCH]: 999, [LOSS]: 0.014194
