作业三:使用minibatch的方式进行梯度下降

作业三:使用minibatch的方式进行梯度下降 项目 内容
这个作业属于的课程   人工智能实战2019(北京航空航天大学)  
这个作业的要求   第三次作业:使用minibatch的方式进行梯度下降  
我在这个课程的目标是   学习算法,积累项目经验,锻炼coding能力  
这个作业在哪个具体方面帮助我实现目标   了解batch, iteration,epoch的概念;学习使用批处理操作  
作业正文   见下文  
其他参考文献   微软示例代码  
1. 作业要求

使用minibatch的方式进行梯度下降

复习讲过的课程(链接),并回答关于损失函数的 2D 示意图的问题

为什么是椭圆而不是圆?如何把这个图变成一个圆?

为什么中心是个椭圆区域而不是一个点?

2. 实现随机选取数据的方式进行minibatch梯度下降

示例代码位置:/B-教学案例与实践/B6-神经网络基本原理简明教程/微软-方案1/NeuralNetwork/ch04/level4-BatchGradientDescent.py

import numpy as np import matplotlib.pyplot as plt from matplotlib.pyplot import savefig from pathlib import Path x_data_name = "TemperatureControlXData.dat" y_data_name = "TemperatureControlYData.dat" class CData(object): def __init__(self, loss, w, b, epoch, iteration): self.loss = loss self.w = w self.b = b self.epoch = epoch self.iteration = iteration def ReadData(): Xfile = Path(x_data_name) Yfile = Path(y_data_name) if Xfile.exists() & Yfile.exists(): X = np.load(Xfile) Y = np.load(Yfile) return X.reshape(1,-1),Y.reshape(1,-1) else: return None,None def ForwardCalculationBatch(W,B,batch_x): Z = np.dot(W, batch_x) + B return Z def BackPropagationBatch(batch_x, batch_y, batch_z): m = batch_x.shape[1] dZ = batch_z - batch_y dB = dZ.sum(axis=1, keepdims=True)/m dW = np.dot(dZ, batch_x.T)/m return dW, dB def UpdateWeights(w, b, dW, dB, eta): w = w - eta*dW b = b - eta*dB return w,b def InitialWeights(num_input, num_output, flag): if flag == 0: # zero W = np.zeros((num_output, num_input)) elif flag == 1: # normalize W = np.random.normal(size=(num_output, num_input)) elif flag == 2: # xavier W=np.random.uniform( -np.sqrt(6/(num_input+num_output)), np.sqrt(6/(num_input+num_output)), size=(num_output,num_input)) B = np.zeros((num_output, 1)) return W,B def CheckLoss(W, B, X, Y): m = X.shape[1] Z = np.dot(W, X) + B LOSS = (Z - Y)**2 loss = LOSS.sum()/m/2 return loss def shuffle(X, Y): num_example = X.shape[1] rank = np.arange(0, num_example) np.random.shuffle(rank) X_shuffle = [] Y_shuffle = [] for i in rank: X_shuffle.append(X[:,i]) Y_shuffle.append(Y[:,i]) X_shuffle = np.transpose(X_shuffle) Y_shuffle = np.transpose(Y_shuffle) return X_shuffle, Y_shuffle def GetBatchSamples(X,Y,batch_size,iteration): num_feature = X.shape[0] start = iteration * batch_size end = start + batch_size batch_x = X[0:num_feature, start:end].reshape(num_feature, batch_size) batch_y = Y[0, start:end].reshape(1, batch_size) return batch_x, batch_y def GetMinimalLossData(dict_loss): key = sorted(dict_loss.keys())[0] w = dict_loss[key].w b = dict_loss[key].b return w,b,dict_loss[key] def ShowIterLossHistory(dict_loss, batch_size): loss = [] for key in dict_loss: loss.append(key) plt.title("batch size :" + str(batch_size)) plt.xlabel("iteration") plt.plot(loss[30:800]) plt.ylabel("loss") savefig("/Users/souchiguu/Desktop/" + str(batch_size) + ".png") plt.show() def ShowEpochLossHistory(list_epoch, Batchsize): color = [\'b\',\'g\',\'y\'] for num_batch in range(len(Batchsize)): loss = [] for key in list_epoch[num_batch]: loss.append(key) plt.plot(loss, color[num_batch], label = \'batchsize=\'+str(Batchsize[num_batch])) plt.title("learning rate = 0.01" ) plt.xlabel("epoch") plt.ylabel("loss") plt.legend() savefig("/Users/souchiguu/Desktop/" + "0.1" ".png") plt.show() if __name__ == \'__main__\': # method = "MiniBatch" eta, max_epoch = 0.01, 50 Batchsize = [5, 10, 15] list_epoch = [] # read data X_origin, Y_origin = ReadData() # count of samples num_example = X_origin.shape[1] num_feature = X_origin.shape[0] for batch_size in Batchsize: W, B = InitialWeights(1,1,0) # calculate loss to decide the stop condition # loss = 5 dict_epoch_loss = {} dict_iter_loss = {} for epoch in range(max_epoch): # random shuffle X, Y = shuffle(X_origin, Y_origin) # if num_example=200, batch_size=10, then iteration=200/10=20 max_iteration = (int)(num_example / batch_size) sum_loss = 0 for iteration in range(max_iteration): # get x and y value for one sample batch_x, batch_y = GetBatchSamples(X,Y,batch_size,iteration) # get z from x,y batch_z = ForwardCalculationBatch(W, B, batch_x) # calculate gradient of w and b dW, dB = BackPropagationBatch(batch_x, batch_y, batch_z) # update w,b W, B = UpdateWeights(W, B, dW, dB, eta) # calculate loss for this batch loss = CheckLoss(W,B,X,Y) # print("batchsize=%d, epoch=%d, iteration=%d, loss=%f" %(batch_size, epoch, iteration, loss)) dict_iter_loss[loss] = CData(loss, W, B, epoch, iteration) sum_loss += loss # end for dict_epoch_loss[sum_loss] = CData(sum_loss, W, B, epoch, max_iteration) # end for list_epoch.append(dict_epoch_loss) ShowIterLossHistory(dict_iter_loss, batch_size) w,b,cdata = GetMinimalLossData(dict_epoch_loss) print("w:", cdata.w, "b:", cdata.b) print("batchsize=%d, epoch=%d, iteration=%d, loss=%f" %(batch_size, cdata.epoch, cdata.iteration, cdata.loss)) ShowEpochLossHistory(list_epoch, Batchsize)

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/zgjxwg.html