1
+ """
2
+ Deep Belief Network
3
+ author: Ye Hu
4
+ 2016/12/20
5
+ """
6
+ import timeit
7
+ import numpy as np
8
+ import tensorflow as tf
9
+ import input_data
10
+ from logisticRegression import LogisticRegression
11
+ from mlp import HiddenLayer
12
+ from rbm import RBM
13
+
14
+ class DBN (object ):
15
+ """
16
+ An implement of deep belief network
17
+ The hidden layers are firstly pretrained by RBM, then DBN is treated as a normal
18
+ MLP by adding a output layer.
19
+ """
20
+ def __init__ (self , n_in = 784 , n_out = 10 , hidden_layers_sizes = [500 , 500 ]):
21
+ """
22
+ :param n_in: int, the dimension of input
23
+ :param n_out: int, the dimension of output
24
+ :param hidden_layers_sizes: list or tuple, the hidden layer sizes
25
+ """
26
+ # Number of layers
27
+ assert len (hidden_layers_sizes ) > 0
28
+ self .n_layers = len (hidden_layers_sizes )
29
+ self .layers = [] # normal sigmoid layer
30
+ self .rbm_layers = [] # RBM layer
31
+ self .params = [] # keep track of params for training
32
+
33
+ # Define the input and output
34
+ self .x = tf .placeholder (tf .float32 , shape = [None , n_in ])
35
+ self .y = tf .placeholder (tf .float32 , shape = [None , n_out ])
36
+
37
+ # Contruct the layers of DBN
38
+ for i in range (self .n_layers ):
39
+ if i == 0 :
40
+ layer_input = self .x
41
+ input_size = n_in
42
+ else :
43
+ layer_input = self .layers [i - 1 ].output
44
+ input_size = hidden_layers_sizes [i - 1 ]
45
+ # Sigmoid layer
46
+ sigmoid_layer = HiddenLayer (inpt = layer_input , n_in = input_size , n_out = hidden_layers_sizes [i ],
47
+ activation = tf .nn .sigmoid )
48
+ self .layers .append (sigmoid_layer )
49
+ # Add the parameters for finetuning
50
+ self .params .extend (sigmoid_layer .params )
51
+ # Create the RBM layer
52
+ self .rbm_layers .append (RBM (inpt = layer_input , n_visiable = input_size , n_hidden = hidden_layers_sizes [i ],
53
+ W = sigmoid_layer .W , hbias = sigmoid_layer .b ))
54
+ # We use the LogisticRegression layer as the output layer
55
+ self .output_layer = LogisticRegression (inpt = self .layers [- 1 ].output , n_in = hidden_layers_sizes [- 1 ],
56
+ n_out = n_out )
57
+ self .params .extend (self .output_layer .params )
58
+ # The finetuning cost
59
+ self .cost = self .output_layer .cost (self .y )
60
+ # The accuracy
61
+ self .accuracy = self .output_layer .accuarcy (self .y )
62
+
63
+ def pretrain (self , sess , X_train , batch_size = 50 , pretraining_epochs = 10 , lr = 0.1 , k = 1 ,
64
+ display_step = 1 ):
65
+ """
66
+ Pretrain the layers (just train the RBM layers)
67
+ :param sess: tf.Session
68
+ :param X_train: the input of the train set (You might modidy this function if you do not use the desgined mnist)
69
+ :param batch_size: int
70
+ :param lr: float
71
+ :param k: int, use CD-k
72
+ :param pretraining_epoch: int
73
+ :param display_step: int
74
+ """
75
+ print ('Starting pretraining...\n ' )
76
+ start_time = timeit .default_timer ()
77
+ batch_num = int (X_train .train .num_examples / batch_size )
78
+ # Pretrain layer by layer
79
+ for i in range (self .n_layers ):
80
+ cost = self .rbm_layers [i ].get_reconstruction_cost ()
81
+ train_ops = self .rbm_layers [i ].get_train_ops (learning_rate = lr , k = k , persistent = None )
82
+ for epoch in range (pretraining_epochs ):
83
+ avg_cost = 0.0
84
+ for j in range (batch_num ):
85
+ x_batch , _ = X_train .train .next_batch (batch_size )
86
+ # 训练
87
+ sess .run (train_ops , feed_dict = {self .x : x_batch })
88
+ # 计算cost
89
+ avg_cost += sess .run (cost , feed_dict = {self .x : x_batch ,}) / batch_num
90
+ # 输出
91
+ if epoch % display_step == 0 :
92
+ print ("\t Pretraing layer {0} Epoch {1} cost: {2}" .format (i , epoch , avg_cost ))
93
+
94
+ end_time = timeit .default_timer ()
95
+ print ("\n The pretraining process ran for {0} minutes" .format ((end_time - start_time ) / 60 ))
96
+
97
+ def finetuning (self , sess , trainSet , training_epochs = 10 , batch_size = 100 , lr = 0.1 ,
98
+ display_step = 1 ):
99
+ """
100
+ Finetuing the network
101
+ """
102
+ print ("\n Start finetuning...\n " )
103
+ start_time = timeit .default_timer ()
104
+ train_op = tf .train .GradientDescentOptimizer (learning_rate = lr ).minimize (
105
+ self .cost , var_list = self .params )
106
+ for epoch in range (training_epochs ):
107
+ avg_cost = 0.0
108
+ batch_num = int (trainSet .train .num_examples / batch_size )
109
+ for i in range (batch_num ):
110
+ x_batch , y_batch = trainSet .train .next_batch (batch_size )
111
+ # 训练
112
+ sess .run (train_op , feed_dict = {self .x : x_batch , self .y : y_batch })
113
+ # 计算cost
114
+ avg_cost += sess .run (self .cost , feed_dict =
115
+ {self .x : x_batch , self .y : y_batch }) / batch_num
116
+ # 输出
117
+ if epoch % display_step == 0 :
118
+ val_acc = sess .run (self .accuracy , feed_dict = {self .x : trainSet .validation .images ,
119
+ self .y : trainSet .validation .labels })
120
+ print ("\t Epoch {0} cost: {1}, validation accuacy: {2}" .format (epoch , avg_cost , val_acc ))
121
+
122
+ end_time = timeit .default_timer ()
123
+ print ("\n The finetuning process ran for {0} minutes" .format ((end_time - start_time ) / 60 ))
124
+
125
+ if __name__ == "__main__" :
126
+ # mnist examples
127
+ mnist = input_data .read_data_sets ("MNIST_data/" , one_hot = True )
128
+ dbn = DBN (n_in = 784 , n_out = 10 , hidden_layers_sizes = [500 , 500 , 500 ])
129
+ sess = tf .Session ()
130
+ init = tf .global_variables_initializer ()
131
+ sess .run (init )
132
+ # set random_seed
133
+ tf .set_random_seed (seed = 1111 )
134
+ dbn .pretrain (sess , X_train = mnist )
135
+ dbn .finetuning (sess , trainSet = mnist )
0 commit comments