TFP Probabilistic Layers: Regression

View on TensorFlow.org Run in Google Colab View source on GitHub Download notebook

In this example we show how to fit regression models using TFP's "probabilistic layers."

Dependencies & Prerequisites

Import

frompprintimport pprint
importmatplotlib.pyplotasplt
importnumpyasnp
importseabornassns
importtensorflowastf
importtf_keras
importtensorflow_probabilityastfp
sns.reset_defaults()
#sns.set_style('whitegrid')
#sns.set_context('talk')
sns.set_context(context='talk',font_scale=0.7)
%matplotlib inline
tfd = tfp.distributions

Make things Fast!

Before we dive in, let's make sure we're using a GPU for this demo.

To do this, select "Runtime" -> "Change runtime type" -> "Hardware accelerator" -> "GPU".

The following snippet will verify that we have access to a GPU.

if tf.test.gpu_device_name() != '/device:GPU:0':
 print('WARNING: GPU device not found.')
else:
 print('SUCCESS: Found GPU: {}'.format(tf.test.gpu_device_name()))
WARNING: GPU device not found.

Motivation

Wouldn't it be great if we could use TFP to specify a probabilistic model then simply minimize the negative log-likelihood, i.e.,

negloglik = lambda y, rv_y: -rv_y.log_prob(y)

Well not only is it possible, but this colab shows how! (In context of linear regression problems.)

Synthesize dataset.

w0=0.125
b0=5.
x_range=[-20,60]
defload_dataset(n=150,n_tst=150):
np.random.seed(43)
defs(x):
g=(x-x_range[0])/(x_range[1]-x_range[0])
return3*(0.25+g**2.)
x=(x_range[1]-x_range[0])*np.random.rand(n)+x_range[0]
eps=np.random.randn(n)*s(x)
y=(w0*x*(1.+np.sin(x))+b0)+eps
x=x[...,np.newaxis]
x_tst=np.linspace(*x_range,num=n_tst).astype(np.float32)
x_tst=x_tst[...,np.newaxis]
returny,x,x_tst
y,x,x_tst=load_dataset()

Case 1: No Uncertainty

#Buildmodel.
model=tf_keras.Sequential([
tf_keras.layers.Dense(1),
tfp.layers.DistributionLambda(lambdat:tfd.Normal(loc=t,scale=1)),
])
#Doinference.
model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01),loss=negloglik)
model.fit(x,y,epochs=1000,verbose=False);
#Profit.
[print(np.squeeze(w.numpy()))forwinmodel.weights];
yhat=model(x_tst)
assertisinstance(yhat,tfd.Distribution)
0.13032457
5.13029

Figure 1: No uncertainty.

w=np.squeeze(model.layers[-2].kernel.numpy())
b=np.squeeze(model.layers[-2].bias.numpy())
plt.figure(figsize=[6,1.5])#inches
#plt.figure(figsize=[8,5])#inches
plt.plot(x,y,'b.',label='observed');
plt.plot(x_tst,yhat.mean(),'r',label='mean',linewidth=4);
plt.ylim(-0.,17);
plt.yticks(np.linspace(0,15,4)[1:]);
plt.xticks(np.linspace(*x_range,num=9));
ax=plt.gca();
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data',0))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
#ax.spines['left'].set_smart_bounds(True)
#ax.spines['bottom'].set_smart_bounds(True)
plt.legend(loc='centerleft',fancybox=True,framealpha=0.,bbox_to_anchor=(1.05,0.5))
plt.savefig('/tmp/fig1.png',bbox_inches='tight',dpi=300)

png

Case 2: Aleatoric Uncertainty

#Buildmodel.
model=tf_keras.Sequential([
tf_keras.layers.Dense(1+1),
tfp.layers.DistributionLambda(
lambdat:tfd.Normal(loc=t[...,:1],
scale=1e-3+tf.math.softplus(0.05*t[...,1:]))),
])
#Doinference.
model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01),loss=negloglik)
model.fit(x,y,epochs=1000,verbose=False);
#Profit.
[print(np.squeeze(w.numpy()))forwinmodel.weights];
yhat=model(x_tst)
assertisinstance(yhat,tfd.Distribution)
[0.14738432 0.1815331 ]
[4.4812164 1.2219843]

Figure 2: Aleatoric Uncertainty

plt.figure(figsize=[6,1.5])#inches
plt.plot(x,y,'b.',label='observed');
m=yhat.mean()
s=yhat.stddev()
plt.plot(x_tst,m,'r',linewidth=4,label='mean');
plt.plot(x_tst,m+2*s,'g',linewidth=2,label=r'mean+2stddev');
plt.plot(x_tst,m-2*s,'g',linewidth=2,label=r'mean-2stddev');
plt.ylim(-0.,17);
plt.yticks(np.linspace(0,15,4)[1:]);
plt.xticks(np.linspace(*x_range,num=9));
ax=plt.gca();
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data',0))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
#ax.spines['left'].set_smart_bounds(True)
#ax.spines['bottom'].set_smart_bounds(True)
plt.legend(loc='centerleft',fancybox=True,framealpha=0.,bbox_to_anchor=(1.05,0.5))
plt.savefig('/tmp/fig2.png',bbox_inches='tight',dpi=300)

png

Case 3: Epistemic Uncertainty

# Specify the surrogate posterior over `keras.layers.Dense` `kernel` and `bias`.
defposterior_mean_field(kernel_size,bias_size=0,dtype=None):
n=kernel_size+bias_size
c=np.log(np.expm1(1.))
returntf_keras.Sequential([
tfp.layers.VariableLayer(2*n,dtype=dtype),
tfp.layers.DistributionLambda(lambdat:tfd.Independent(
tfd.Normal(loc=t[...,:n],
scale=1e-5+tf.nn.softplus(c+t[...,n:])),
reinterpreted_batch_ndims=1)),
])
# Specify the prior over `keras.layers.Dense` `kernel` and `bias`.
defprior_trainable(kernel_size,bias_size=0,dtype=None):
n=kernel_size+bias_size
returntf_keras.Sequential([
tfp.layers.VariableLayer(n,dtype=dtype),
tfp.layers.DistributionLambda(lambdat:tfd.Independent(
tfd.Normal(loc=t,scale=1),
reinterpreted_batch_ndims=1)),
])
#Buildmodel.
model=tf_keras.Sequential([
tfp.layers.DenseVariational(1,posterior_mean_field,prior_trainable,kl_weight=1/x.shape[0]),
tfp.layers.DistributionLambda(lambdat:tfd.Normal(loc=t,scale=1)),
])
#Doinference.
model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01),loss=negloglik)
model.fit(x,y,epochs=1000,verbose=False);
#Profit.
[print(np.squeeze(w.numpy()))forwinmodel.weights];
yhat=model(x_tst)
assertisinstance(yhat,tfd.Distribution)
[ 0.1387333 5.125723 -4.112224 -2.2171402]
[0.12476114 5.147452 ]

Figure 3: Epistemic Uncertainty

plt.figure(figsize=[6,1.5])# inches
plt.clf();
plt.plot(x,y,'b.',label='observed');
yhats=[model(x_tst)for_inrange(100)]
avgm=np.zeros_like(x_tst[...,0])
fori,yhatinenumerate(yhats):
m=np.squeeze(yhat.mean())
s=np.squeeze(yhat.stddev())
ifi < 25:
plt.plot(x_tst,m,'r',label='ensemble means'ifi==0elseNone,linewidth=0.5)
avgm+=m
plt.plot(x_tst,avgm/len(yhats),'r',label='overall mean',linewidth=4)
plt.ylim(-0.,17);
plt.yticks(np.linspace(0,15,4)[1:]);
plt.xticks(np.linspace(*x_range,num=9));
ax=plt.gca();
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data',0))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
#ax.spines['left'].set_smart_bounds(True)
#ax.spines['bottom'].set_smart_bounds(True)
plt.legend(loc='center left',fancybox=True,framealpha=0.,bbox_to_anchor=(1.05,0.5))
plt.savefig('/tmp/fig3.png',bbox_inches='tight',dpi=300)

png

Case 4: Aleatoric & Epistemic Uncertainty

#Buildmodel.
model=tf_keras.Sequential([
tfp.layers.DenseVariational(1+1,posterior_mean_field,prior_trainable,kl_weight=1/x.shape[0]),
tfp.layers.DistributionLambda(
lambdat:tfd.Normal(loc=t[...,:1],
scale=1e-3+tf.math.softplus(0.01*t[...,1:]))),
])
#Doinference.
model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01),loss=negloglik)
model.fit(x,y,epochs=1000,verbose=False);
#Profit.
[print(np.squeeze(w.numpy()))forwinmodel.weights];
yhat=model(x_tst)
assertisinstance(yhat,tfd.Distribution)
[ 0.12753433 2.7504077 5.160624 3.8251898 -3.4283297 -0.8961645
 -2.2378397 0.1496858 ]
[0.14511648 2.7104297 5.1248145 3.7724588 ]

Figure 4: Both Aleatoric & Epistemic Uncertainty

plt.figure(figsize=[6,1.5])# inches
plt.plot(x,y,'b.',label='observed');
yhats=[model(x_tst)for_inrange(100)]
avgm=np.zeros_like(x_tst[...,0])
fori,yhatinenumerate(yhats):
m=np.squeeze(yhat.mean())
s=np.squeeze(yhat.stddev())
ifi < 15:
plt.plot(x_tst,m,'r',label='ensemble means'ifi==0elseNone,linewidth=1.)
plt.plot(x_tst,m+2*s,'g',linewidth=0.5,label='ensemble means + 2 ensemble stdev'ifi==0elseNone);
plt.plot(x_tst,m-2*s,'g',linewidth=0.5,label='ensemble means - 2 ensemble stdev'ifi==0elseNone);
avgm+=m
plt.plot(x_tst,avgm/len(yhats),'r',label='overall mean',linewidth=4)
plt.ylim(-0.,17);
plt.yticks(np.linspace(0,15,4)[1:]);
plt.xticks(np.linspace(*x_range,num=9));
ax=plt.gca();
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data',0))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
#ax.spines['left'].set_smart_bounds(True)
#ax.spines['bottom'].set_smart_bounds(True)
plt.legend(loc='center left',fancybox=True,framealpha=0.,bbox_to_anchor=(1.05,0.5))
plt.savefig('/tmp/fig4.png',bbox_inches='tight',dpi=300)

png

Case 5: Functional Uncertainty

Custom PSD Kernel

classRBFKernelFn(tf_keras.layers.Layer):
def__init__(self,**kwargs):
super(RBFKernelFn,self).__init__(**kwargs)
dtype=kwargs.get('dtype',None)
self._amplitude=self.add_variable(
initializer=tf.constant_initializer(0),
dtype=dtype,
name='amplitude')
self._length_scale=self.add_variable(
initializer=tf.constant_initializer(0),
dtype=dtype,
name='length_scale')
defcall(self,x):
# Never called -- this is just a layer so it can hold variables
# in a way Keras understands.
returnx
@property
defkernel(self):
returntfp.math.psd_kernels.ExponentiatedQuadratic(
amplitude=tf.nn.softplus(0.1*self._amplitude),
length_scale=tf.nn.softplus(5.*self._length_scale)
)

# For numeric stability, set the default floating-point dtype to float64
tf_keras.backend.set_floatx('float64')
# Build model.
num_inducing_points=40
model=tf_keras.Sequential([
tf_keras.layers.InputLayer(input_shape=[1]),
tf_keras.layers.Dense(1,kernel_initializer='ones',use_bias=False),
tfp.layers.VariationalGaussianProcess(
num_inducing_points=num_inducing_points,
kernel_provider=RBFKernelFn(),
event_shape=[1],
inducing_index_points_initializer=tf.constant_initializer(
np.linspace(*x_range,num=num_inducing_points,
dtype=x.dtype)[...,np.newaxis]),
unconstrained_observation_noise_variance_initializer=(
tf.constant_initializer(np.array(0.54).astype(x.dtype))),
),
])
# Do inference.
batch_size=32
loss=lambday,rv_y:rv_y.variational_loss(
y,kl_weight=np.array(batch_size,x.dtype)/x.shape[0])
model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01),loss=loss)
model.fit(x,y,batch_size=batch_size,epochs=1000,verbose=False)
# Profit.
yhat=model(x_tst)
assertisinstance(yhat,tfd.Distribution)

Figure 5: Functional Uncertainty

y,x,_=load_dataset()
plt.figure(figsize=[6,1.5])# inches
plt.plot(x,y,'b.',label='observed');
num_samples=7
foriinrange(num_samples):
sample_=yhat.sample().numpy()
plt.plot(x_tst,
sample_[...,0].T,
'r',
linewidth=0.9,
label='ensemble means'ifi==0elseNone);
plt.ylim(-0.,17);
plt.yticks(np.linspace(0,15,4)[1:]);
plt.xticks(np.linspace(*x_range,num=9));
ax=plt.gca();
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data',0))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
#ax.spines['left'].set_smart_bounds(True)
#ax.spines['bottom'].set_smart_bounds(True)
plt.legend(loc='center left',fancybox=True,framealpha=0.,bbox_to_anchor=(1.05,0.5))
plt.savefig('/tmp/fig5.png',bbox_inches='tight',dpi=300)

png

Except as otherwise noted, the content of this page is licensed under the Creative Commons Attribution 4.0 License, and code samples are licensed under the Apache 2.0 License. For details, see the Google Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.

Last updated 2024年02月22日 UTC.