以上是我训练这个网络时得到的输出。因为加密噪声和低精度的问题模型调优会更困难一些,同时模型的训练也很慢,这是由于加密和解密操作的开销。虽然我也想做一些更简单的样例,但是从我们的主题和概念出发,首先要保证的是方案的安全性。
重点:网络的权重全部是加密的。
训练数据是未加密的。
在训练之后,网络可以被解密以提高性能或二次训练(或切换到不同的加密密钥)。
训练损失和输出的预测都是加密的值。我们必须解码它们才能解释网络的行为。
第9部分:情感分类为了提供一些更真实的场景,这里提供了基于优达学城的纳米学位中的网络实现的情感分类网络,网络是在IMDB的评价上训练的。你可以在这里找到完整的代码
import time import sys import numpy as np # 让我们调整之前的网络来适配这个问题 class SentimentNetwork: def __init__(self, reviews,labels,min_count = 10,polarity_cutoff = 0.1,hidden_nodes = 8, learning_rate = 0.1): np.random.seed(1234) self.pre_process_data(reviews, polarity_cutoff, min_count) self.init_network(len(self.review_vocab),hidden_nodes, 1, learning_rate) def pre_process_data(self,reviews, polarity_cutoff,min_count): print("Pre-processing data...") positive_counts = Counter() negative_counts = Counter() total_counts = Counter() for i in range(len(reviews)): if(labels[i] == 'POSITIVE'): for word in reviews[i].split(" "): positive_counts[word] += 1 total_counts[word] += 1 else: for word in reviews[i].split(" "): negative_counts[word] += 1 total_counts[word] += 1 pos_neg_ratios = Counter() for term,cnt in list(total_counts.most_common()): if(cnt >= 50): pos_neg_ratio = positive_counts[term] / float(negative_counts[term]+1) pos_neg_ratios[term] = pos_neg_ratio for word,ratio in pos_neg_ratios.most_common(): if(ratio > 1): pos_neg_ratios[word] = np.log(ratio) else: pos_neg_ratios[word] = -np.log((1 / (ratio + 0.01))) review_vocab = set() for review in reviews: for word in review.split(" "): if(total_counts[word] > min_count): if(word in pos_neg_ratios.keys()): if((pos_neg_ratios[word] >= polarity_cutoff) or (pos_neg_ratios[word] <= -polarity_cutoff)): review_vocab.add(word) else: review_vocab.add(word) self.review_vocab = list(review_vocab) label_vocab = set() for label in labels: label_vocab.add(label) self.label_vocab = list(label_vocab) self.review_vocab_size = len(self.review_vocab) self.label_vocab_size = len(self.label_vocab) self.word2index = {} for i, word in enumerate(self.review_vocab): self.word2index[word] = i self.label2index = {} for i, label in enumerate(self.label_vocab): self.label2index[label] = i def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate): # 设置输入层,隐藏层和输出层的节点数 self.input_nodes = input_nodes self.hidden_nodes = hidden_nodes self.output_nodes = output_nodes print("Initializing Weights...") self.weights_0_1_t = np.zeros((self.input_nodes,self.hidden_nodes)) self.weights_1_2_t = np.random.normal(0.0, self.output_nodes**-0.5, (self.hidden_nodes, self.output_nodes)) print("Encrypting Weights...") self.weights_0_1 = list() for i,row in enumerate(self.weights_0_1_t): sys.stdout.write("\rEncrypting Weights from Layer 0 to Layer 1:" + str(float((i+1) * 100) / len(self.weights_0_1_t))[0:4] + "% done") self.weights_0_1.append(one_way_encrypt_vector(row,scaling_factor).astype('int64')) print("") self.weights_1_2 = list() for i,row in enumerate(self.weights_1_2_t): sys.stdout.write("\rEncrypting Weights from Layer 1 to Layer 2:" + str(float((i+1) * 100) / len(self.weights_1_2_t))[0:4] + "% done") self.weights_1_2.append(one_way_encrypt_vector(row,scaling_factor).astype('int64')) self.weights_1_2 = transpose(self.weights_1_2) self.learning_rate = learning_rate self.layer_0 = np.zeros((1,input_nodes)) self.layer_1 = np.zeros((1,hidden_nodes)) def sigmoid(self,x): return 1 / (1 + np.exp(-x)) def sigmoid_output_2_derivative(self,output): return output * (1 - output) def update_input_layer(self,review): # 清除之前的转台,将每层中的值置为0 self.layer_0 *= 0 for word in review.split(" "): self.layer_0[0][self.word2index[word]] = 1 def get_target_for_label(self,label): if(label == 'POSITIVE'): return 1 else: return 0 def train(self, training_reviews_raw, training_labels): training_reviews = list() for review in training_reviews_raw: indices = set() for word in review.split(" "): if(word in self.word2index.keys()): indices.add(self.word2index[word]) training_reviews.append(list(indices)) layer_1 = np.zeros_like(self.weights_0_1[0]) start = time.time() correct_so_far = 0 total_pred = 0.5 for i in range(len(training_reviews_raw)): review_indices = training_reviews[i] label = training_labels[i] layer_1 *= 0 for index in review_indices: layer_1 += self.weights_0_1[index] layer_1 = layer_1 / float(len(review_indices)) layer_1 = layer_1.astype('int64') # round to nearest integer layer_2 = sigmoid(innerProd(layer_1,self.weights_1_2[0],M_onehot[len(layer_1) - 2][1],l) / float(scaling_factor))[0:2] if(label == 'POSITIVE'): layer_2_delta = layer_2 - (c_ones[len(layer_2) - 2] * scaling_factor) else: layer_2_delta = layer_2 weights_1_2_trans = transpose(self.weights_1_2) layer_1_delta = mat_mul_forward(layer_2_delta,weights_1_2_trans,scaling_factor).astype('int64') self.weights_1_2 -= np.array(outer_product(layer_2_delta,layer_1)) * self.learning_rate for index in review_indices: self.weights_0_1[index] -= (layer_1_delta * self.learning_rate).astype('int64') # 我们希望这里同时可以进行解密以便我们观察网络训练情况 total_pred += (s_decrypt(layer_2)[0] / scaling_factor) if((s_decrypt(layer_2)[0] / scaling_factor) >= (total_pred / float(i+2)) and label == 'POSITIVE'): correct_so_far += 1 if((s_decrypt(layer_2)[0] / scaling_factor) < (total_pred / float(i+2)) and label == 'NEGATIVE'): correct_so_far += 1 reviews_per_second = i / float(time.time() - start) sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_reviews_raw)))[:4] + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%") if(i % 100 == 0): print(i) def test(self, testing_reviews, testing_labels): correct = 0 start = time.time() for i in range(len(testing_reviews)): pred = self.run(testing_reviews[i]) if(pred == testing_labels[i]): correct += 1 reviews_per_second = i / float(time.time() - start) sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_reviews)))[:4] \ + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] \ + "% #Correct:" + str(correct) + " #Tested:" + str(i+1) + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%") def run(self, review): # 输入层 # 隐藏层 self.layer_1 *= 0 unique_indices = set() for word in review.lower().split(" "): if word in self.word2index.keys(): unique_indices.add(self.word2index[word]) for index in unique_indices: self.layer_1 += self.weights_0_1[index] # 输出层 layer_2 = self.sigmoid(self.layer_1.dot(self.weights_1_2)) if(layer_2[0] >= 0.5): return "POSITIVE" else: return "NEGATIVE" Progress:0.0% Speed(reviews/sec):0.0 #Correct:1 #Trained:1 Training Accuracy:100.%0 Progress:0.41% Speed(reviews/sec):1.978 #Correct:66 #Trained:101 Training Accuracy:65.3%100 Progress:0.83% Speed(reviews/sec):2.014 #Correct:131 #Trained:201 Training Accuracy:65.1%200 Progress:1.25% Speed(reviews/sec):2.011 #Correct:203 #Trained:301 Training Accuracy:67.4%300 Progress:1.66% Speed(reviews/sec):2.003 #Correct:276 #Trained:401 Training Accuracy:68.8%400 Progress:2.08% Speed(reviews/sec):2.007 #Correct:348 #Trained:501 Training Accuracy:69.4%500 Progress:2.5% Speed(reviews/sec):2.015 #Correct:420 #Trained:601 Training Accuracy:69.8%600 Progress:2.91% Speed(reviews/sec):1.974 #Correct:497 #Trained:701 Training Accuracy:70.8%700 Progress:3.33% Speed(reviews/sec):1.973 #Correct:581 #Trained:801 Training Accuracy:72.5%800 Progress:3.75% Speed(reviews/sec):1.976 #Correct:666 #Trained:901 Training Accuracy:73.9%900 Progress:4.16% Speed(reviews/sec):1.983 #Correct:751 #Trained:1001 Training Accuracy:75.0%1000 Progress:4.33% Speed(reviews/sec):1.940 #Correct:788 #Trained:1042 Training Accuracy:75.6% 第10部分:数据加密的优势