利用logistic回归解决多分类问题

利用logistic回归解决手写数字识别问题,数据集私聊。

from scipy.io import loadmat import numpy as np import pandas as pd import matplotlib.pyplot as plt from scipy.optimize import minimize data = loadmat(\'ex3data1.mat\') data_row = data[\'X\'].shape #5000个200*200的矩阵,表示5000个手写数字 data_cols = data[\'y\'].shape #5000个结果 def sigmoid(z): return 1 / (1 + np.exp(-z)) def cost(theta, X, y, learningrate): theta = np.matrix(theta) X = np.matrix(X) y = np.matrix(y) first = np.multiply(-y, np.log(sigmoid(X * theta.T))) second = np.multiply((1 - y), np.log(1 - sigmoid(X * theta.T))) reg = (learningrate / (2 * len(X))) * np.sum(np.power(theta[:, 1:theta.shape[1]], 2)) return np.sum(first - second) / len(X) + reg def gradientReg(theta, X, y, learningRate): theta = np.matrix(theta) X = np.matrix(X) y = np.matrix(y) error = sigmoid(X * theta.T) - y grad = (((X.T * error) / len(X)).T + ((learningRate) / len(X)) * theta) grad[0, 0] = np.sum(np.multiply(error, X[:, 0])) / len(X) return np.array(grad).ravel() def one_vs_all(X, y, num_labels, learning_rate): rows = X.shape[0] params = X.shape[1] all_theta = np.zeros((num_labels, params + 1)) #在矩阵X前加入一列1 X = np.insert(X, 0, values=np.ones(rows), axis=1) for i in range(1, num_labels + 1): theta = np.zeros(params + 1) y_i = np.array([1 if label == i else 0 for label in y]) y_i = np.reshape(y_i, (rows, 1)) #寻找此分类器的最优参数 fmin = minimize(fun=cost, x0=theta, args=(X, y_i, learning_rate), method=\'TNC\', jac=gradientReg) all_theta[i-1, :] = fmin.x return all_theta num_labels = 10 learningRate = 1 rows = data[\'X\'].shape[0] params = data[\'X\'].shape[1] X = np.insert(data[\'X\'], 0, values=np.ones(rows), axis=1) theta = np.zeros(params + 1) def predict_all(X, all_theta): #在矩阵X前加入一列1 X = np.insert(X, 0, values=np.ones(rows), axis=1) X = np.matrix(X) all_theta = np.matrix(all_theta) h = sigmoid(X * all_theta.T) h_argmax = np.argmax(h, axis=1) #在行方向寻找最大值 h_argmax = h_argmax + 1 return h_argmax all_theta = one_vs_all(data[\'X\'], data[\'y\'], num_labels, 1) y_predict = predict_all(data[\'X\'], all_theta) correct = [1 if a == b else 0 for (a, b) in zip(y_predict, data[\'y\'])] accuracy = (sum(correct) / float(len(correct))) print(\'accuracy = {0}%\'.format(accuracy * 100))

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/zgdgxj.html