@@ -292,12 +292,168 @@ class RBM(object):
292292 dtype = theano.config.floatX)
293293 return [pre_sigmoid_v1, v1_mean, v1_sample]
294294```
295+ 现在,我们可以使用这些函数来定义一个Gibbs采样步骤的符号图。我们定义如下两个函数:
296+ * ` gibbs_vhv ` 表示从可视单元中开始的Gibbs采样的步骤。我们将可以看到这对于从RBM中采样是非常有用的。
297+ * ` gibbs_hvh ` 表示从隐藏单元中开始Gibbs采样的步骤。这个函数再实现CD和PCD更新中是非常有用的。
298+ 代码如下:
299+ 300+ ``` Python
301+ def gibbs_hvh (self , h0_sample ):
302+ ''' This function implements one step of Gibbs sampling,
303+ starting from the hidden state'''
304+ pre_sigmoid_v1, v1_mean, v1_sample = self .sample_v_given_h(h0_sample)
305+ pre_sigmoid_h1, h1_mean, h1_sample = self .sample_h_given_v(v1_sample)
306+ return [pre_sigmoid_v1, v1_mean, v1_sample,
307+ pre_sigmoid_h1, h1_mean, h1_sample]
308+ ```
309+ 310+ ``` Python
311+ def gibbs_vhv (self , v0_sample ):
312+ ''' This function implements one step of Gibbs sampling,
313+ starting from the visible state'''
314+ pre_sigmoid_h1, h1_mean, h1_sample = self .sample_h_given_v(v0_sample)
315+ pre_sigmoid_v1, v1_mean, v1_sample = self .sample_v_given_h(h1_sample)
316+ return [pre_sigmoid_h1, h1_mean, h1_sample,
317+ pre_sigmoid_v1, v1_mean, v1_sample]
318+ 319+ # start-snippet-2
320+ ```
321+ 322+ 这个类还有一个函数去计算模型的自由能量,以便去计算参数的梯度。注意我们也会返回pre-sigmoid。
323+ 324+ ``` Python
325+ def free_energy (self , v_sample ):
326+ ''' Function to compute the free energy '''
327+ wx_b = T.dot(v_sample, self .W) + self .hbias
328+ vbias_term = T.dot(v_sample, self .vbias)
329+ hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis = 1 )
330+ return - hidden_term - vbias_term
331+ ```
332+ 我们随后添加一个` get_cost_update ` 方法,目的是产生CD-k和PCD-k的更新的象征性梯度。
333+ 334+ ``` Python
335+ def get_cost_updates (self , lr = 0.1 , persistent = None , k = 1 ):
336+ """ This functions implements one step of CD-k or PCD-k
337+
338+ :param lr: learning rate used to train the RBM
339+
340+ :param persistent: None for CD. For PCD, shared variable
341+ containing old state of Gibbs chain. This must be a shared
342+ variable of size (batch size, number of hidden units).
343+
344+ :param k: number of Gibbs steps to do in CD-k/PCD-k
345+
346+ Returns a proxy for the cost and the updates dictionary. The
347+ dictionary contains the update rules for weights and biases but
348+ also an update of the shared variable used to store the persistent
349+ chain, if one is used.
350+
351+ """
352+ 353+ # compute positive phase
354+ pre_sigmoid_ph, ph_mean, ph_sample = self .sample_h_given_v(self .input)
355+ 356+ # decide how to initialize persistent chain:
357+ # for CD, we use the newly generate hidden sample
358+ # for PCD, we initialize from the old state of the chain
359+ if persistent is None :
360+ chain_start = ph_sample
361+ else :
362+ chain_start = persistent
363+ ```
364+ 注意` get_cost_update ` 作为参数被变量化为` persistent ` 。这允许我们去使用相同的代码来实现CD和PCD。为了使用PCD,` persistent ` 需要被关联到一个共享变量,它包含前一次迭代的Gibbs链的状态。
365+ 366+ 假如` persistent ` 为` None ` ,则我们使用正相位时产生的隐藏样本来初始化Gibbs链,以此实现CD。当我们已经建立了这个链的开始点的时候,我们就可以计算这个Gibbs链的终点的样本,以及我们需要的去获得梯度的样本。为了获得这些,我们使用Theano提供的` sacn ` 操作,我们建议读者去阅读这个[ 链接] ( http://deeplearning.net/software/theano/library/scan.html ) 。
367+ 368+ ``` Python
369+ # perform actual negative phase
370+ # in order to implement CD-k/PCD-k we need to scan over the
371+ # function that implements one gibbs step k times.
372+ # Read Theano tutorial on scan for more information :
373+ # http://deeplearning.net/software/theano/library/scan.html
374+ # the scan will return the entire Gibbs chain
375+ (
376+ [
377+ pre_sigmoid_nvs,
378+ nv_means,
379+ nv_samples,
380+ pre_sigmoid_nhs,
381+ nh_means,
382+ nh_samples
383+ ],
384+ updates
385+ ) = theano.scan(
386+ self .gibbs_hvh,
387+ # the None are place holders, saying that
388+ # chain_start is the initial state corresponding to the
389+ # 6th output
390+ outputs_info = [None , None , None , None , None , chain_start],
391+ n_steps = k
392+ )
393+ ```
394+ 当你已经产生了这个链,我们在链的末尾的样例获得负相位的自由能量。注意,这个` chain_end ` 是模型参数项中的一个的象征性的Theano变量,当我们简单的求解` T.grad ` 的时候,这个函数将通过Gibbs链来得到这个梯度。这不是我们想要的(它会搞乱我们的梯度),因此我们需要指示` T.grad ` ,` chain_end ` 是一个常量。我们通过` T.grad ` 的` consider_constant ` 来做这个事情。
395+ 396+ ``` Python
397+ # determine gradients on RBM parameters
398+ # note that we only need the sample at the end of the chain
399+ chain_end = nv_samples[- 1 ]
400+ 401+ cost = T.mean(self .free_energy(self .input)) - T.mean(
402+ self .free_energy(chain_end))
403+ # We must not compute the gradient through the gibbs sampling
404+ gparams = T.grad(cost, self .params, consider_constant = [chain_end])
405+ ```
406+ 最后,我们增加由` scan ` 返回的更新字典(包含了随机状态的` theano_rng ` 更新规则)来获取参数更新。在PCD例子中,也需要更新包含Gibbs链状态的共享变量。
407+ 408+ ``` Python
409+ # constructs the update dictionary
410+ for gparam, param in zip (gparams, self .params):
411+ # make sure that the learning rate is of the right dtype
412+ updates[param] = param - gparam * T.cast(
413+ lr,
414+ dtype = theano.config.floatX
415+ )
416+ if persistent:
417+ # Note that this works only if persistent is a shared variable
418+ updates[persistent] = nh_samples[- 1 ]
419+ # pseudo-likelihood is a better proxy for PCD
420+ monitoring_cost = self .get_pseudo_likelihood_cost(updates)
421+ else :
422+ # reconstruction cross-entropy is a better proxy for CD
423+ monitoring_cost = self .get_reconstruction_cost(updates,
424+ pre_sigmoid_nvs[- 1 ])
425+ 426+ return monitoring_cost, updates
427+ ```
428+ 429+ ###进展跟踪
430+ 431+ RBMs的训练是特别困难的。由于归一化函数Z,我们无法在训练的时候估计对数似然函数log(P(x))。因而我们没有直接可以度量超参数优化与否的方法。
432+ 433+ 而下面的几个选项对用户是有用的。
434+ 435+ ####负样本的检查
436+ 437+ 在训练中获得的负样本是可以可视化的。在训练进程中,我们知道由RBM定义的模型不断逼近真实分布,p_train(x)。负样例就可以视为训练集中的样本。显而易见的,坏的超参数将在这种方式下被丢弃。
438+ 439+ ####滤波器的可视化跟踪
440+ 441+ 由模型训练的滤波器是可以可视化的。我们可以将每个单元的权值以灰度图的方式展示。滤波器应该选出数据中强的特征。对于任意的数据集,这个滤波器都是不确定的。例如,训练MNIST,滤波器就表现的像"stroke"检测器,而训练自然图像的稀疏编码的时候,则像Gabor滤波器。
442+ 443+ ####似然估计的替代
444+ 445+ 此外,更加容易处理的函数可以被用于做似然估计的替代。当我们使用PCD来训练RBM的时候,可以使用伪似然估计来替代。伪似然估计(Pseudo-likeihood,PL)更加简于计算,因为它假设所有的比特都是相互独立的,因此有:
446+ 447+ ![ PL] ( /images/7_proxies_likelihood_1.png )
448+ 295449
296450
297451
452+ ###主循环
298453
299454
300455
456+ ###结果
301457
302458
303459
0 commit comments