Autoencoder自编码

发表于 2018-12-16 | 分类于 TensorFlow

字数统计: 804 | 阅读时长 ≈ 3

Autoencoder自编码

#author:victor
#什么是自编码（Autoencoder）
#What is an Autoencoder
#神经网络的非监督学习
#因为有时候训练的样本数据很大，直接训练会很耗时的，所以把数据的feature压缩一下，然后再解压一下
#Autoencoder是一种数据的压缩算法，其中数据的压缩和解压函数
#数据相关的，有损的，从样本中自动学习的，压缩和解压缩的函数是通过神经网络实现的
#因为自编码不用到训练样本的分类标签，所以是非监督学习的
#比如PCA（principal Component Analysis）：主成分析方法。一种使用最广发的数据压缩算法。
#PCA一种常用的数据降维方法。通过线性变换将原始数据变换成为一组各维度线性无关的表示来提取数据的主要线性分量
#比如分类学习，也是非监督学习的

#import module
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

#import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist=input_data.read_data_sets('MNIST_data',one_hot=False)

#Visualize decoder setting
#Parameters
learning_rate=0.01
training_epochs=20
batch_size=256
display_step=1
examples_to_show=10

#Network Parameters
n_input=784#MNIST data input(img shape:28*28),也即是784个features

#tf.Graph input(only pictures)
X=tf.placeholder('float',[None,n_input])

#hidden layer settings
n_hidden_1=256#first num features(2^8)，先经过一个隐藏层压缩成256个features
n_hidden_2=128#second num features(2^7)，在经过一个隐藏层压缩成128个features
#define the weights
weights={
         'encoder_h1':tf.Variable(tf.random_normal([n_input,n_hidden_1])),
         'encoder_h2':tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2])),
          #经过一个隐藏层解压缩把128个features解压成256个features
         'decoder_h1':tf.Variable(tf.random_normal([n_hidden_2,n_hidden_1])),
         #经过一个隐藏层解压缩把256个features解压成原来784个features
         'decoder_h2':tf.Variable(tf.random_normal([n_hidden_1,n_input])),
         }
#define the biases
biases={
        'encoder_b1':tf.Variable(tf.random_normal([n_hidden_1])),
        'encoder_b2':tf.Variable(tf.random_normal([n_hidden_2])),
        'decoder_b1':tf.Variable(tf.random_normal([n_hidden_1])),
        'decoder_b2':tf.Variable(tf.random_normal([n_input])),
        }

#building the encoder
def encoder(x):
    layer_1=tf.nn.sigmoid(tf.add(tf.matmul(x,weights['encoder_h1']),
                           biases['encoder_b1'] ))
    #Decoder hidden layer with sigmoid activation function
    layer_2=tf.nn.sigmoid(tf.add(tf.matmul(layer_1,weights['encoder_h2']),
                          biases['encoder_b2']))
    return layer_2
    
#building the decoder
def decoder(x):
    #Encoder hidden layer with sigmoid activation
    layer_1=tf.nn.sigmoid(tf.add(tf.matmul(x,weights['decoder_h1']),
                           biases['decoder_b1'] ))
    #Decoder hidden layer with sigmoid activation function
    layer_2=tf.nn.sigmoid(tf.add(tf.matmul(layer_1,weights['decoder_h2']),
                          biases['decoder_b2']))
    return layer_2
    
#Construct model
encoder_op=encoder(X)
decoder_op=decoder(encoder_op)    

#Prediction
y_pred=decoder_op
#Targets(Labels) are the input data
y_true=X

#Define loss and optimizer,minimize the squre error
cost=tf.reduce_mean(tf.pow(y_true-y_pred,2))
optimizer=tf.train.AdamOptimizer(learning_rate).minimize(cost)    

#Initializing the variables
init=tf.initialize_all_variables()

#Launch the graph
with tf.Session() as sess:
    sess.run(init)
    total_batch=int(mnist.train.num_examples/batch_size)
    #Train cycle
    for epoch in range(training_epochs):
        #Loop overall batches
        for i in range(total_batch):
            batch_xs,batch_ys=mnist.train.next_batch(batch_size)#max(x)=1,min(x)=0,batch_xs已经被normalize正规化过了，最大值是1
            #Run optimization op (backprop) and cost op (to get loss value)
            _,c=sess.run([optimizer,cost],feed_dict={X:batch_xs})
            #Display logs per epoch step
            if epoch% display_step==0:
                print("Epoch",'%04d'%(epoch+1),
                      "cost=","{:9f}".format(c))
                
    print("Optimization Finished!")
            
    #Applying encode and decode over test set
    encode_decode=sess.run(
            y_pred,feed_dict={X:mnist.test.images[:examples_to_show]})
    #Compare original images with their reconstructions
    f,a=plt.subplots(2,10,figsize=(10,2))
    for i in range(examples_to_show):
        #real data
        a[0][i].imshow(np.reshape(mnist.test.images[i],(28,28)))
        #predict data
        a[1][i].imshow(np.reshape(encode_decode[i],(28,28)))
    plt.show()

运行结果：

autoencoder

总结：发现经过压缩过后的MNIST data，在训练的时候明显速度加快了。说明在进行大量数据训练的时候，使用自编码进行encoder-decoder不失为一个好办法。

rnn_use_variable

发表于 2018-12-16 | 分类于 TensorFlow

字数统计: 499 | 阅读时长 ≈ 3

在RNN中use variable

#import module
from __future__ import print_function
import tensorflow as tf

#define class
class TrainConfig:
    batch_size = 20
    time_steps = 20
    input_size = 10
    output_size = 2
    cell_size = 11
    learning_rate = 0.01


class TestConfig(TrainConfig):
    time_steps = 1

#define RNN class
class RNN(object):

    #define the init method
    def __init__(self, config):
        self._batch_size = config.batch_size
        self._time_steps = config.time_steps
        self._input_size = config.input_size
        self._output_size = config.output_size
        self._cell_size = config.cell_size
        self._lr = config.learning_rate
        self._built_RNN()

    #build the rnn network    
    def _built_RNN(self):
        with tf.variable_scope('inputs'):
            self._xs = tf.placeholder(tf.float32, [self._batch_size, self._time_steps, self._input_size], name='xs')
            self._ys = tf.placeholder(tf.float32, [self._batch_size, self._time_steps, self._output_size], name='ys')
        with tf.name_scope('RNN'):
            with tf.variable_scope('input_layer'):
                l_in_x = tf.reshape(self._xs, [-1, self._input_size], name='2_2D')  # (batch*n_step, in_size)
                # Ws (in_size, cell_size)
                Wi = self._weight_variable([self._input_size, self._cell_size])
                print(Wi.name)
                # bs (cell_size, )
                bi = self._bias_variable([self._cell_size, ])
                # l_in_y = (batch * n_steps, cell_size)
                with tf.name_scope('Wx_plus_b'):
                    l_in_y = tf.matmul(l_in_x, Wi) + bi
                l_in_y = tf.reshape(l_in_y, [-1, self._time_steps, self._cell_size], name='2_3D')

            with tf.variable_scope('cell'):
                cell = tf.contrib.rnn.BasicLSTMCell(self._cell_size)
                with tf.name_scope('initial_state'):
                    self._cell_initial_state = cell.zero_state(self._batch_size, dtype=tf.float32)

                self.cell_outputs = []
                cell_state = self._cell_initial_state
                for t in range(self._time_steps):
                    if t > 0: tf.get_variable_scope().reuse_variables()
                    cell_output, cell_state = cell(l_in_y[:, t, :], cell_state)
                    self.cell_outputs.append(cell_output)
                self._cell_final_state = cell_state

            with tf.variable_scope('output_layer'):
                # cell_outputs_reshaped (BATCH*TIME_STEP, CELL_SIZE)
                cell_outputs_reshaped = tf.reshape(tf.concat(self.cell_outputs, 1), [-1, self._cell_size])
                Wo = self._weight_variable((self._cell_size, self._output_size))
                bo = self._bias_variable((self._output_size,))
                product = tf.matmul(cell_outputs_reshaped, Wo) + bo
                # _pred shape (batch*time_step, output_size)
                self._pred = tf.nn.relu(product)    # for displacement

        with tf.name_scope('cost'):
            _pred = tf.reshape(self._pred, [self._batch_size, self._time_steps, self._output_size])
            mse = self.ms_error(_pred, self._ys)
            mse_ave_across_batch = tf.reduce_mean(mse, 0)
            mse_sum_across_time = tf.reduce_sum(mse_ave_across_batch, 0)
            self._cost = mse_sum_across_time
            self._cost_ave_time = self._cost / self._time_steps

        with tf.variable_scope('trian'):
            self._lr = tf.convert_to_tensor(self._lr)
            self.train_op = tf.train.AdamOptimizer(self._lr).minimize(self._cost)

    @staticmethod
    def ms_error(y_target, y_pre):
        return tf.square(tf.subtract(y_target, y_pre))

    @staticmethod
    def _weight_variable(shape, name='weights'):
        initializer = tf.random_normal_initializer(mean=0., stddev=0.5, )
        return tf.get_variable(shape=shape, initializer=initializer, name=name)

    @staticmethod
    def _bias_variable(shape, name='biases'):
        initializer = tf.constant_initializer(0.1)
        return tf.get_variable(name=name, shape=shape, initializer=initializer)


if __name__ == '__main__':
    train_config = TrainConfig()
    test_config = TestConfig()

    # the wrong method to reuse parameters in train rnn
    with tf.variable_scope('train_rnn'):
        train_rnn1 = RNN(train_config)
    with tf.variable_scope('test_rnn'):
        test_rnn1 = RNN(test_config)

    # the right method to reuse parameters in train rnn
    with tf.variable_scope('rnn') as scope:
        sess = tf.Session()
        train_rnn2 = RNN(train_config)
        scope.reuse_variables()
        test_rnn2 = RNN(test_config)
        # tf.initialize_all_variables() no long valid from
        # 2017-03-02 if using tensorflow >= 0.12
        if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
            init = tf.initialize_all_variables()
        else:
            init = tf.global_variables_initializer()
        sess.run(init)

使用variable_scope的效果：

rnn

name_scope的用法

发表于 2018-12-16 | 分类于 TensorFlow

字数统计: 169 | 阅读时长 ≈ 1

name_scope的用法

#author:victor
#name_scope和variable_scope的区别
#from _future_ import print_function

#import module
import tensorflow as tf

tf.set_random_seed(1)#reproducible

with tf.name_scope('a_name_scope'):
    #常量初始化
    initializer=tf.constant_initializer(value=1)
    var1=tf.get_variable(name='var1',shape=[1],dtype=tf.float32,initializer=initializer)
    var2=tf.Variable(name='var2',initial_value=[2],dtype=tf.float32)
    var21=tf.Variable(name='var2',initial_value=[2,1],dtype=tf.float32)
    var22=tf.Variable(name='var2',initial_value=[2,2],dtype=tf.float32)
    
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    print(var1.name)#var1:0,用tf.get_vriable创建变量，name_scope不会加上，也就是无效的
    print(sess.run(var1))
    print(var2.name)#a_name_scope/var2:0，用tf.Variable创建变量，会先检查一下有没有name_scope，有就会加上name_scope
    print(sess.run(var2))
    print(var1.name)
    print(sess.run(var21))
    print(var2.name)
    print(sess.run(var22))

使用name_scope的效果：

name_scope

variable_scope的用法

发表于 2018-12-16 | 分类于 TensorFlow

字数统计: 353 | 阅读时长 ≈ 1

variable_scope的用法

#author:victor
#import module
import tensorflow as tf
tf.reset_default_graph()

tf.set_random_seed(1)#reproducible

with tf.variable_scope('a_variable_scope') as scope:
    #常量初始化
    initializer=tf.constant_initializer(value=3)
    var3=tf.get_variable(name='var3',shape=[1],dtype=tf.float32,initializer=initializer)
    var4=tf.Variable(name='var4',initial_value=[4],dtype=tf.float32)
    var4_reuse=tf.Variable(name='var4',initial_value=[4],dtype=tf.float32)
    #想要重复利用必须这么写：
    scope.reuse_variables()#如果不加这一句会报错，报Variable a_variable_scope/var3 already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope
    var3_reuse=tf.get_variable(name='var3')
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    print(var3.name)#a_variable_scope/var3:0,variable_scope加上
    print(sess.run(var4))
    
    print(var4.name)#a_variable_scope/var4:0，variable_scope加上
    print(sess.run(var4))
    
    print(var4_reuse.name)#a_variable_scope/var4_1:0,variable_scope加上,重复利用的时候，并不是打印出var4:0,而是创建了一个新的var4_1
    print(sess.run(var4_reuse))
    
    print(var3_reuse.name)#a_variable_scope/var3:0，可以看到和var3一样了，为什么要重复利用呢？
    #因为同一个variable可能需要在不同的地方使用，
    #通常在RNN循环神经网络中，有一个重复循环的机制。通常在RNN中用到reuse_variable，通常Train RNN和Test RNN的神经网络结构不一样，但是其中的参数可能一样的，所以就会用到reuse variable...
    print(sess.run(var3_reuse))
    
    
    
"""
比如RNN结构
"""
class TrainConfig:
    batch_size=20
    time_steps=20
    input_size=10
    output_size=2
    cell_size=11
    learning_rate=0.01
 #但是RNN的test，有可能的time_steps改成1，但是不会用Train里的time_steps
#所以就会用到reuse variable的time_steps   
class TestConfig(TrainConfig):
    time_steps=1

使用variable_scope的效果：

variable_scope

用RNN来预测学习Sinx曲线

发表于 2018-12-16 | 分类于 TensorFlow

字数统计: 995 | 阅读时长 ≈ 5

用RNN来预测学习sinx曲线

#author:victor
#使用RNN进行回归训练，会用到自己创建对sin曲线，预测一条cos曲线，
#设置RNN各种参数

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
#清除图形推展并重置全局默认图形
tf.reset_default_graph() 
BATCH_START = 0 #建立batch data时候对index
TIME_STEPS = 20 #backpropagation through time 的 time_steps
BATCH_SIZE = 50
INPUT_SIZE = 1 #sim 数据输入size
OUTPUT_SIZE = 1 #cos数据输出size
CELL_SIZE = 10 #RNN的hidden unit size
LR = 0.006
state = tf.Variable(0.0,dtype=tf.float32)
#生成数据的get_batch function:
def get_batch():
    global BATCH_START, TIME_STEPS
    # xs shape (50batch, 20steps)
    xs = np.arange(BATCH_START, BATCH_START + TIME_STEPS*BATCH_SIZE).reshape((BATCH_SIZE, TIME_STEPS)) / (10*np.pi)
    seq = np.sin(xs)
    res = np.cos(xs)
    BATCH_START += TIME_STEPS
    # returned seq, res and xs; shape(batch, step, input)
    return [seq[:, :, np.newaxis], res[:, :, np.newaxis], xs]

#定义LSTMRNN的主体结构
#使用一个 class 来定义这次的 LSTMRNN 会更加方便. 第一步定义 class 中的 __init__ 传入各种参数:
class LSTMRNN(object):
    def __init__(self, n_steps, input_size, output_size, cell_size, batch_size):
        self.n_steps = n_steps
        self.input_size = input_size
        self.output_size = output_size
        self.cell_size = cell_size
        self.batch_size = batch_size
        with tf.name_scope('inputs'):
            self.xs = tf.placeholder(tf.float32, [None, n_steps, input_size], name='xs')
            self.ys = tf.placeholder(tf.float32, [None, n_steps, output_size], name='ys')
        with tf.variable_scope('in_hidden'):
            self.add_input_layer()
        with tf.variable_scope('LSTM_cell'):
            self.add_cell()
        with tf.variable_scope('out_hidden'):
            self.add_output_layer()
        with tf.name_scope('cost'):
            self.compute_cost()
        with tf.name_scope('train'):
            self.train_op = tf.train.AdamOptimizer(LR).minimize(self.cost)


    #设置add_input_layer()函数，添加input_layer()
    def add_input_layer(self,):
        l_in_x = tf.reshape(self.xs, [-1, self.input_size], name = '2_2D') #(batch*n_step, in_size)
        #Ws (in_size, cell_size)
        Ws_in = self._weight_variable([self.input_size, self.cell_size])
        #bs (cell_size)
        bs_in = self._bias_variable([self.cell_size,])
        #l_in_y = (batch * n_steps, cell_size)
        with tf.name_scope('Wx_plus_b'):
            l_in_y = tf.matmul(l_in_x, Ws_in) +bs_in
        #reshape l_in_y ==> (batch, n_steps, cell_size)
        self.l_in_y = tf.reshape(l_in_y, [-1, self.n_steps, self.cell_size], name='2_3D')

    #设置add_cell功能，添加cell， 注意此处的self.cell_init_state, 因为我们在 training 的时候, 这个地方要特别说明.
    def add_cell(self):
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.cell_size, forget_bias = 1.0, state_is_tuple = True)
        with tf.name_scope('initial_state'):
            self.cell_init_state = lstm_cell.zero_state(self.batch_size, dtype= tf.float32)
        self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(lstm_cell, self.l_in_y, initial_state=self.cell_init_state, time_major=False)

    #设置add_output_layer功能， 添加output_layer:
    def add_output_layer(self):
        # shape= (batch * steps, cell_size)
        l_out_x = tf.reshape(self.cell_outputs, [-1, self.cell_size], name= '2_2D')
        Ws_out = self._weight_variable([self.cell_size, self.output_size])
        bs_out = self._bias_variable([self.output_size, ])
        # shape = (batch * steps, output_size)
        with tf.name_scope('Wx_plus_b'):
            self.pred = tf.matmul(l_out_x, Ws_out) + bs_out

    #添加RNN 剩余部分
    def compute_cost(self):
        losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
            [tf.reshape(self.pred, [-1], name='reshape_pred')],
            [tf.reshape(self.ys, [-1], name= 'reshape_target')],
            [tf.ones([self.batch_size * self.n_steps], dtype = tf.float32)],
            average_across_timesteps = True,
            softmax_loss_function = self.ms_error,
            name= 'losses'
        )
        with tf.name_scope('average_cost'):
            self.cost = tf.div(
                tf.reduce_sum(losses, name='losses_sum'),
                tf.cast(self.batch_size, tf.float32),
                name = 'average_cost')
            tf.summary.scalar('cost', self.cost)

    @staticmethod
    def ms_error(labels, logits):
        return tf.square(tf.subtract(labels, logits))
    #没有加@staticmethod时候报错， TypeError: ms_error() got multiple values for argument 'labels'
    #解决办法：https://stackoverflow.com/questions/18950054/class-method-generates-typeerror-got-multiple-values-for-keyword-argument
    
    #define weight
    def _weight_variable(self, shape, name='weights'):
        initializer = tf.random_normal_initializer(mean=0., stddev=1., )
        return tf.get_variable(shape=shape, initializer=initializer, name=name)

    #define biases
    def _bias_variable(self, shape, name='biases'):
        initializer = tf.constant_initializer(0.1)
        return tf.get_variable(name=name, shape = shape, initializer=initializer)


#训练LSTMRNN
if __name__ == '__main__':
    model = LSTMRNN(TIME_STEPS, INPUT_SIZE, OUTPUT_SIZE, CELL_SIZE, BATCH_SIZE)
    sess = tf.Session()
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter("logs", sess.graph)
    
    #tensorflow的版本小于12的，变量初始化方法
    if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
        init = tf.initialize_all_variables()
    else:
        init = tf.global_variables_initializer()
    sess.run(init)
    # relocate to the local dir and run this line to view it on Chrome (http://0.0.0.0:6006/):
    # $ tensorboard --logdir='logs'

    #可视化库matplotlib的显示模式默认是block阻塞模式。
    #就是plt.show()之后，程序回暂停在哪儿，并不会继续执行下去。
    #展示动态图就需要plt.ion()函数，把matplotlib的显示模式转换为交互interactive模式
    plt.ion()
    plt.show()
    for i in range(200):
        seq, res, xs = get_batch()
        if i == 0:
            feed_dict = {
                    model.xs: seq,
                    model.ys: res,
                    # create initial state
            }
        else:
            feed_dict = {
                model.xs: seq,
                model.ys: res,
                model.cell_init_state: state    # use last state as the initial state for this run
            }

        _, cost, state, pred = sess.run(
            [model.train_op, model.cost, model.cell_final_state, model.pred],
            feed_dict=feed_dict)

        # plotting
        plt.plot(xs[0, :], res[0].flatten(), 'r', xs[0, :], pred.flatten()[:TIME_STEPS], 'b--')
        plt.ylim((-1.2, 1.2))
        plt.draw()
        plt.pause(0.3)

        if i % 20 == 0:
            print('cost: ', round(cost, 4))
            result = sess.run(merged, feed_dict)
            writer.add_summary(result, i)

运行效果

rnn predict sinx

发现刚开始RNN蓝色曲线为预测去年，并不是很重合，随着慢慢的RNN训练，到60以后，预测曲线和实际曲线基本吻合。

用Tensorboard查看loss和graph

使用tensorboard
查看loss和graph

tensorboard

RNN入门demo

发表于 2018-12-16 | 分类于 TensorFlow

字数统计: 715 | 阅读时长 ≈ 3

RNN入门Demo

耗费了大量时间来讲解RNN和LSTM的原理，并且这一块确实有点难以理解。实践是检验真理的唯一标准，废话不多说，直接上基于TensorFlow平台的RNN加上LSTM优化后的代码和运行效果。


#author:victor
#什么是循环神经网络RNN
#What is Recurrent Neural Networks?(RNN)

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
tf.reset_default_graph() 

#Mnist data
mnist=input_data.read_data_sets('MNIST_data',one_hot=True)

#networks parameters
lr=0.001#learning rate
training_iters=1000000#iteration也就是循环多少次
batch_size=128
display_step=10

n_inputs=28#MNIST data input(image shape:28*28)
n_steps=28#time steps,inut的28行，作为28列输出
n_hidden_units=128#neurons in hidden layer
n_classes=10#MNIST classes(0-9digits)

#tf.Graph input
x=tf.placeholder(tf.float32,[None,n_steps,n_inputs])
y=tf.placeholder(tf.float32,[None,n_classes])

#Define weights
weights={
#(28,128)
'in':tf.Variable(tf.random_normal([n_inputs,n_hidden_units])),
#(128,10)                                 
'out':tf.Variable(tf.random_normal([n_hidden_units,n_classes]))  
}
 
#Define biases
biases={
#(128,) 
'in':tf.Variable(tf.constant(0.1,shape=[n_hidden_units,])),
#(10,)                             
'out':tf.Variable(tf.constant(0.1,shape=[n_classes,]))        
}


#Define RNN
def RNN(X,weights,biases):
    
    #hidden layer for input to cell
    #X(128 batch,28 steps,28 inputs)
    #把X转换成(128*28,28 inputs)
    X=tf.reshape(X,[-1,n_inputs])
    #把X转换成(128batch,28 steps,128 hidden)
    X_in=tf.matmul(X,weights['in']+biases['in'])
    #把X转换成(128batch,28steps,128hidden)
    X_in=tf.reshape(X_in,[-1,n_steps,n_hidden_units])
    
    
    #cell
    #使用lstm(long-short Term Memory),因为使用RNN可能会存在梯度爆炸，用LSTM优化
    #RNN中一般会用tanh()函数作为激活函数
    #在迭代后期，会逐渐收敛，导致梯度趋于0，于是，出现了“梯度下降”的问题。
    lstm_cell=tf.nn.rnn_cell.BasicLSTMCell(n_hidden_units,
                                           forget_bias=1.0,
                                           state_is_tuple=True)#state_is_tuple，生成的是不是一个元组
    #lstm cell is divided into two parts(c_state,m_state),主线的state是c_state,副线的state是m_state
    _init_state=lstm_cell.zero_state(batch_size,dtype=tf.float32)    
    #使用dynamic_rnn比rnn更好，优点在于对尺度不同的数据的处理上，会减少计算量
    #time_major,上面的28 steps是它，
    outputs,states=tf.nn.dynamic_rnn(lstm_cell,X_in,initial_state=_init_state,time_major=False)
    
    
    #hidden layer for output as the final results
    #method1：
    results=tf.matmul(states[1],weights['out'])+biases['out']
    #method2:                 
    #or use unpack to list[(batch,outputs)..]*steps,就是把tensor解包成list
    #outputs=tf.unstack(tf.transpose(outputs,[1,0,2])) #states is the last outputs
    #选择最后一步的outputs,也就是-1
    #results=tf.matmul(outputs[-1],weights['out'])+biases['out']                              
    return results
    
#prediction    
pred=RNN(x,weights,biases)
#cost
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))
#train_op
train_op=tf.train.AdamOptimizer(lr).minimize(cost)

correct_pred=tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
accuracy=tf.reduce_mean(tf.cast(correct_pred,tf.float32))

#important step
init=tf.initialize_all_variables()
with tf.Session() as sess:
    sess.run(init)
    step=0
    while step*batch_size<training_iters:
        batch_xs,batch_ys=mnist.train.next_batch(batch_size)
        batch_xs=batch_xs.reshape([batch_size,n_steps,n_inputs])#28行，28列，在加上要一个batch_size
        sess.run([train_op],feed_dict={x:batch_xs,y:batch_ys})
        if step%20==0:
            print(sess.run(accuracy,
                           feed_dict={x:batch_xs,
                                      y:batch_ys
                                      }
                           )
                 )
        step+=1

运行效果

由于自己是CPU版本的TensorFlow，运行起来比较慢，只能慢慢等待咯

RNN

随着训练次数的增加，精确度也渐渐上升
设置的训练100w次，每20步输出一次结果，由于时间太久，我就不一一截图了。训练结束后，精确度99%

LSTM(Long-Short Term Memory)长短期记忆网络

发表于 2018-12-16 | 分类于 TensorFlow

字数统计: 897 | 阅读时长 ≈ 3

LSTM(Long-Short Term Memory)长短期记忆网络

一、LSTM结构

LSTM(Long-Short Term Memory)，长短期记忆网络是RNN的一种变形结构。RNN因为梯度消失的原因只有短期记忆，LSTM网络通过精妙的Cell门控制将短期记忆与长期记忆结合起来，并在一定程度上解决了梯度消失的问题。

所有RNN都具有一种重复神经网络模块的链式形式。在标准的RNN中，重复的模块只有一个非常简单的结构，比如一个tanh层。

标准的RNN：

LSTM的结构：

LSTM同样的结构，但是重复的模块拥有一个不同的结构，不同于单一层，这里是由四个，以一种非常特殊的方式进行交互

LSTM解析图标：

橙黄色矩形：学习得到的神经网络层
粉色圆形：代表一些运算，比如加法，乘法
黑色单箭头：向量的传输
两个箭头合并：表示向量的连接
一个箭头分开：表示向量的复制

二、LSTM的核心思想

LSTM的关键就是细胞Cell状态，水平线在图上方贯穿运行。
Cell细胞状态像传送带一样，直接在整个链上运行，只有一些少量的线性交互，信息在上线流传保持不变很容易。

LSTM有通过精心设计的称作为“门”的结构来去除或者增加信息到细胞状态的能力。门是一种让信息选择通过的方法。包含一个sigmoid神经网络层和一个按位的乘法操作

sigmoid层输出0~1之间的数值，描述每个部分有多少量可以通过。0代表不允许

任何量通过，1代表允许任意量通过。LSTM拥有三个门，来保护和控制细胞状态。

三、LSTM的推理

LSTM中的第一步是决定我们会从细胞状态中丢弃什么信息。第一步的决定四通过一个叫做忘记门层完成。忘记门层会读ht-1和xt，输出一个0~1之间的数值给每个在细胞状态Ct-1中的数字，1表示完全保留，0表示完全舍弃
下一步是确定什么样的新信息被存放在细胞状态**中**，这里包含两个部分：

sigmoid层叫做输入门层，决定将要更新的值

tanh层创建一个新的候选值向量Ct，会被假如到状态中。

更新旧细胞状态，把Ct-1更新为Ct。就是把旧状态与ft相乘，丢弃掉确定需要丢弃的信息，然后加上it*Ct，变成新的候选值。

最后一步，确定需要输出什么值。这个输出将会基于我们的细胞状态，但是也是一个过滤后的版本。首先是，运行一个sigmoid层来确定细胞状态的哪个部分将被输出。然后，把细胞状态通过tanh进行处理，得到一个-1~1之间的值，将它和sigmoid门的输出相乘。

四、LSTM的变体

让门层也接受细胞状态的输入
使用coupled忘记和输入门

区别于标准LSTM的分开确定忘记什么和需要添加什么新的信息，变体的LSTM是两者一同做出决定。

Gated Recurrent Unit(GRU)变体，是将忘记门和输入门合成了一个单一的更新门。还混合了细胞状态和隐藏状态的，也是非常流行的变体

RNN(Recurrent Neural Network)循环神经网络

发表于 2018-12-16 | 分类于 TensorFlow

字数统计: 1.4k | 阅读时长 ≈ 4

RNN(Recurrent Neural Network)循环神经网络

RNN(Recurrent Neural Network)是一类用于处理序列数据的神经网络。序列数据：时间序列数据，也就是在不同时间点上收集到的数据，这类数据反映了某一事物、现象等随着时间变化的状态或者程度。也不一定是时间序列，也可以是文本序列。总之：后面的数据跟前面的数据是有关系，可以将RNN看做全连接网络

一、RNN的结构

普通神经网络包含input layer，hidden layer，output layer，通过Activation Function来控制输出。layer与layer通过weights连接。

RNN的标准结构：

普通神经网络结构：

两者区别：

基础的神经网络只在层与层之间建立了权Weights连接，RNN在层之间的神经元之间也建立了权连接
实际中，RNN标准结构并不能解决所有问题：

输入为一串文字，输出为分类类别，那么输出就不需要一个序列，只要单个输出：

有时候需要单个输入但是输出为序列的情况的RNN结构：

有时候输入时序列，但是不随着序列变化：
实际中，大部分问题序列都是不等长的。比如：自然语言处理中，源语言和目标语言的句子往往长度是不同的。就需要Encoder-Decoder模型，也叫Seq2Seq模型。

Encoder-Decoder模型结构原理：先编码后解码。左侧的RNN用来编码得到c，然后再用右侧的RNN把c解码。

二、标准RNN的流程

标准的RNN采用的是前向传播过程：

图中的：x为输入input，h为hidden layer单元，o为输出output，L为loss损失函数，y为training set训练集，右上角小标号代表t时刻状态。W，U，V代表权值Weights。

前向传播算法公式：

φ为激活函数，一般选择是tanh函数，b为biases偏置

t时刻的输出o：
预测输出为：

其中δ为激活函数，RNN常用语分类，这里一般用softmax函数

三、RNN的训练方法(Back Propagation Through Time,BPTT)

BPTT用来RNN训练。它的本质是BP算法，只是加上了时间。因为RNN处理时间序列数据，要基于时间反向传播。

核心思想：BPTT和BP算法相同，都沿着需要优化参数的负梯度方向不断寻找更优的点直到收敛。也就是梯度下降法。

需要寻找最优的有三个参数：U，V，W。

U,W两个参数的寻找最优的过程需要用到历史数据。而，V只关注当前h（hidden layer）的数据。

求V的偏导数**：**

也即是L(Loss)对V求偏导，V到L还经过o(output)，里面有激活函数。所以是复合函数求导过程。

因为RNN的L(Loss)的损失是随着时间累加的，所以叠加后的结果如上图。
求W的偏导数：

W偏导求解需要用到历史数据，假设我们只有三个时刻，假设第三个时刻L对W的偏导数为：
求U的偏导数：

U偏导求解需要用到历史数据，假设我们只有三个时刻，假设第三个时刻L对U的偏导数为：
上面只是对某个时刻的W和U求的偏导数，但是RNN的L(Loss)损失是随着时间累加的，要追溯到历史数据，那么整个损失函数L对W，U的偏导数十分复杂的。通过找规律发现：
根据RNN图得知，Activation Function激活函数是嵌套在对h(hidden)的偏导里面的，把中间累乘部分替换为tanh或者sigmoid写法为：
上面的累乘会导致激活函数的累乘，会导致“梯度消失”和”梯度爆炸“现象。

1、sigmoid函数（logistics函数）和导数：

结论：使用sigmoid函数作为激活函数，肯定是累乘的时候结果越来越小，随着时间推移小数的累乘导致梯度小到接近于0，这就是“梯度消失”。梯度消失会导致，那一层的参数再也不会更新，那么那一层隐藏层就变成了单纯的映射层，就毫无意义了。

总结：sigmoid函数(logisitic函数)不是0中心对称

2、tanh函数和导数：

总结：tanh函数是中心对称，会导致神经网络的收敛性更好，是tanh函数相对于sigmoid函数来说梯度较大，收敛速度更快且引起梯度消失更慢
解决梯度消失的方法：

1、选取其他激活函数

一般选用ReLU作为激活函数

ReLU的导数：

总结：ReLU函数的左导数为0，右导数为1，避免了“梯度消失”，然而右导数为1，会导致“梯度爆炸”，但是可以设定合适的阈值可以解决“梯度爆炸”问题。还有一点就是，左导数恒为0，有可能导致把神经元学死，同样设置合适的步长（training_step）可以避免问题发生。

2、改变传播结构

改变传播结构，也就是引入LSTM(Long-Short Term Memory)，长短期记忆网络。是一种时间递归神经网络。适合处理和预测时间序列中间隔和延迟相对较长的重要时间。LSTM区别于RNN的地方，就是在算法中加了一个判断信息有用与否的处理器Cell。

利用CNN识别图片

发表于 2018-12-15 | 分类于 TensorFlow

字数统计: 676 | 阅读时长 ≈ 4

具体CNN的原理以及应用请看上一篇文章。废话不多说，直接上代码，看运行效果

利用CNN识别image的源代码

# -- coding: utf-8 --

"""
@author: victor

Convolutional Neural Network Example
Build a convolutional neural network with Tensorflow
This example is using TensorFlow layers API
see 'convolutional_network_raw' example for a raw TensorFlow
implementation with variables
"""

#from future import division,print_function,absolute_import


#Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist=input_data.read_data_sets("MNIST_data/",one_hot=False)

#import module
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

#Training parameters
learning_rate=0.001
num_steps=1000
batch_size=128

#Network parameters
num_input=784#MNIST data input(img shape:28*28)
num_classes=10#MNIST total classes(0-9 digits)
dropout=0.25#Dropout,probability to drop a unit

#Create the neural network
def conv_net(x_dict,n_classes,dropout,reuse,is_training):
  
    #Define a scope for reusing the variables
    with tf.variable_scope('ConvNet',reuse=reuse):
        #tf Estimator input is a dict,in case of multiple inputs
        x=x_dict['images']

        
        #MNIST data input is a 1-D vector of 784 features(28*28 pixels)
        #Reshape to match picture format [Height x Width x Channel]
        #Tensor input become 4-D:[Batch Size,Height,Width,Channel]
        x=tf.reshape(x,shape=[-1,28,28,1])

      
        #Convolution Layer with 32 filters and a kernel size of 5
        conv1=tf.layers.conv2d(x,32,5,activation=tf.nn.relu)

        #Max Pooling(down-sampling) with strides of 2 and kernel size of 2
        conv1=tf.layers.max_pooling2d(conv1,2,2)

        

        #Convolution Layer with 64 filters and a kernel size of 3
        conv2=tf.layers.conv2d(conv1,64,3,activation=tf.nn.relu)

        #Max Pooling(down-sampling) with strides of 2 and kernel size of 2
        conv2=tf.layers.average_pooling2d(conv2,2,2)

        
        #Flatten the data to a 1-D vector for the fully connected layer
        fc1=tf.contrib.layers.flatten(conv2)

        

        #Fully connected layer(in tf contrib folder for now)
        fc1=tf.layers.dense(fc1,1024)

        #Apply Dropout(if is_training is False,dropout is not applied)
        fc1=tf.layers.dropout(fc1,rate=dropout,training=is_training)

        #Output layer,class prediction
        out=tf.layers.dense(fc1,n_classes)

    return out

    
    
#Define the model function(following Tf Estimator Template)

def model_fn(features,labels,mode):
  
    #Build the neural network
    #Because Dropout have different behavior at training and prediction time
    #we need to create 2 distinct computation graphs that still share the same weights
    logits_train=conv_net(features,num_classes,dropout,reuse=False,is_training=True)
    logits_test=conv_net(features,num_classes,dropout,reuse=True,is_training=False)

    

    #Predictions
    pred_classes=tf.argmax(logits_test,axis=1)
    pred_probas=tf.nn.softmax(logits_test)

   
    #If prediction mode,early return 
    if mode==tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode,predictions=pred_classes)

      
    #Define loss and optimizer
    loss_op=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
         logits=logits_train,labels=tf.cast(labels,dtype=tf.int32)))

    optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op=optimizer.minimize(loss_op,global_step=tf.train.get_global_step())

    
    #Evaluate the accuracy of the model
    acc_op=tf.metrics.accuracy(labels=labels,predictions=pred_classes)

    
    #TF Estimators requires to return a EstimatorSpec,that specify
    #the different ops for training,evaluating,...
    estim_specs=tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=pred_classes,
                loss=loss_op,
                train_op=train_op,
                eval_metric_ops={'accuracy':acc_op})

    return estim_specs 

   
#Build the Estimator
model=tf.estimator.Estimator(model_fn)



#Define the input function for training
input_fn=tf.estimator.inputs.numpy_input_fn(
         x={'images':mnist.train.images},
         y=mnist.train.labels,
         batch_size=batch_size,
         num_epochs=None,
         shuffle=True)

#Train the Model
model.train(input_fn,steps=num_steps)


#Predict single images
n_images=10

#Get images from test set
test_images=mnist.test.images[:n_images]

#Prepare the input data
input_fn=tf.estimator.inputs.numpy_input_fn(
         x={'images':test_images},shuffle=False)

#Use the model to predict the images class
preds=list(model.predict(input_fn))



#Display
for i in range(n_images):
    plt.imshow(np.reshape(test_images[i],[28,28]),cmap='gray')
    plt.show()
    print('Model prediction:',preds[i])
    plt.xlabel('Model prediction:'+str(preds[i]),fontsize=14)
    plt.pause(0.5)

运行效果

cnn识别图片

查看Tensor board上的效果

Tensorboard的操作
利用Google Chrome查看图形化差异

loss

CNN的入门demo

发表于 2018-12-15 | 分类于 TensorFlow

字数统计: 1.1k | 阅读时长 ≈ 5

一、CNN的源代码

#author:victor
#import module

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data


#input MNIST_data

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

sess = tf.InteractiveSession()



#define placeholder for inputs to network
x = tf.placeholder(tf.float32, shape=[None, 784])#28*28
y_ = tf.placeholder(tf.float32, shape=[None, 10])#0~9共10个数字


#define weights(神经元的权重)
def weight_variable(shape):

    #tf.truncated_normal(shape,mean,stddev)
    #shape:表示生成张量的维度
    #mean:表示均值
    #stddev：表示标准差
    #这是一个截断产生正太分布的函数
    #tf.truncated_normal与tf.random_normal的区别是：
    #这两个输入参数几乎完全一致，都是正态分布产生函数
    #tf.truncated_normal截断的标准差是2倍的stddev
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)



#define biases(神经元的偏置常量)
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)



#define conv（卷积层）
#padding有两个值，一个SAME,一个VALID
#padding设置为SAME：说明输入图片和输出图片大小一致
#padding设置为VALID：说明图片经过滤波器filter后可能会变小
#设置conv的滑动步长strides为1，1，1，1

#define convolutional layer
#x:x为image的所有信息
#W:Weight
#strides的前后都为1，然后第二个，第三个，表示，x方向，y方向都为1
#strides=[1,x_movement,y_movement,1]
#must have strides[0]=strides[3]=1
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")


#define pooling（池化层）
##pooling的方式是max pooling
#设置它的滑动步长strides为1，2，2，1
#use max_pool method
#ksize：也就是kernel size
#strides=[1,x_movement,y_movement,1]
#strides[1]=strides[2]=2,也就是隔2个像素移动一下
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

##conv1 layer卷积层##
#W_conv1就是Weights
#patch 5*5,in size=1是image的厚度,out size=32
W_conv1 = weight_variable([5, 5, 1, 32])

#b_conv1就是biases，32就是卷积核的个数，按照经验取值
b_conv1 = bias_variable([32])

#-1：是把数据扁平化，28*28就是所有像素点784，1由于这个MNIST里的图片全都是黑白的所有只有1，如果是彩色的就可以有其他的值
x_image = tf.reshape(x, [-1, 28, 28, 1])
#print(x_image,shape)#[n_samples,28,28,1]

#非线性化处理
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)#output size 282832之前的步长是1，1

##pooling layer池化层##
h_pool1 = max_pool_2x2(h_conv1)#output size 141432，pooling的步长是2，2，则就是原来基础上除以2
##conv1 layer##


#conv2 layer卷积层##
W_conv2 = weight_variable([5, 5, 32, 64])#patch 5*5,in size 32,out size 64
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)#output size 141464,第二层的输入是第一层的输出，输出的pooling是14*14，步长是1，1


##pooling layer池化层##
h_pool2 = max_pool_2x2(h_conv2)#output size然后步长是2，2，所以就是7764
##conv2 layer##


##func1 layer全连接层##
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

##转换##
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])#[n_samples,7,7,64]转换为[n_samples,7764]

h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

#define keep_prob
keep_prob = tf.placeholder("float")

#use dropout solve overfitting(使用dropout防止过拟合)keep_prob为0~1之间数
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)#考虑到有overfitting，加一个dropout处理

##func1 layer##
##func2 layer全连接层##
#第二层的input=1024是第一层的输出1024，第二层的输出为10因为是有0~9，10个数字

W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

#prediction
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

#use cross_entropy交叉熵
#the error between prediction and real data

cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))

#对于庞大的神经网络使用AdamOptimizer不适用GradientDescentOptimizer了train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))


#import step
sess.run(tf.global_variables_initializer())
for i in range(20000):
    batch = mnist.train.next_batch(50)
    if i % 100 == 0:

        #控制它的keep_prob为1.0也就是所有元素全部保留
        train_accuracy = accuracy.eval(feed_dict={
            x: batch[0], y: batch[1], keep_prob: 1.0})
        print("step %d, training accuracy %g" % (i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})

print("test accuracy %g" % accuracy.eval(feed_dict={
    x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

二、运行效果

刚开始训练精度并不高，在90%左右

start training

随着慢慢的训练5000步左右的时候，精度逐渐增加到99%左右

cnn

等过万的时候，精确度已经很高了，接近于100%

cnn

训练到15000步，精确度已经很高，几乎100%
训练结束后的结果

对比上一节的MNIST入门的Demo利用GradientDescentOptimizer直接进行训练，利用CNN训练，精确度基本上99%

ps：由于设置的循环range为20000，训练次数比较大，跑起来比较耗时，我安装的是CPU版本的Tensorflow，跑了大概1个小时训练结束。