Time series forecasting#

import os
import IPython
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

The weather dataset#

  • 14 features (air temperature, atmospheric pressure, and humidity)

  • Collected every 10 minutes (use hourly predictions)

zip_path = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    extract=True)

csv_path = os.path.splitext(zip_path)[0]
csv_path
'/home/eavelar/.keras/datasets/jena_climate_2009_2016.csv'
df = pd.read_csv(csv_path)
df = df[5::6] 

date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')
df.head()
p (mbar) T (degC) Tpot (K) Tdew (degC) rh (%) VPmax (mbar) VPact (mbar) VPdef (mbar) sh (g/kg) H2OC (mmol/mol) rho (g/m**3) wv (m/s) max. wv (m/s) wd (deg)
5 996.50 -8.05 265.38 -8.78 94.4 3.33 3.14 0.19 1.96 3.15 1307.86 0.21 0.63 192.7
11 996.62 -8.88 264.54 -9.77 93.2 3.12 2.90 0.21 1.81 2.91 1312.25 0.25 0.63 190.3
17 996.84 -8.81 264.59 -9.66 93.5 3.13 2.93 0.20 1.83 2.94 1312.18 0.18 0.63 167.2
23 996.99 -9.05 264.34 -10.02 92.6 3.07 2.85 0.23 1.78 2.85 1313.61 0.10 0.38 240.0
29 997.46 -9.63 263.72 -10.65 92.2 2.94 2.71 0.23 1.69 2.71 1317.19 0.40 0.88 157.0
plot_cols = ['T (degC)', 'p (mbar)', 'rho (g/m**3)']
plot_features = df[plot_cols]
plot_features.index = date_time
_ = plot_features.plot(subplots=True)

plot_features = df[plot_cols][:480]
plot_features.index = date_time[:480]
_ = plot_features.plot(subplots=True)
../../_images/time_series_7_0.png ../../_images/time_series_7_1.png

Inspect and cleanup#

df.describe().transpose()
count mean std min 25% 50% 75% max
p (mbar) 70091.0 989.212842 8.358886 913.60 984.20 989.57 994.720 1015.29
T (degC) 70091.0 9.450482 8.423384 -22.76 3.35 9.41 15.480 37.28
Tpot (K) 70091.0 283.493086 8.504424 250.85 277.44 283.46 289.530 311.21
Tdew (degC) 70091.0 4.956471 6.730081 -24.80 0.24 5.21 10.080 23.06
rh (%) 70091.0 76.009788 16.474920 13.88 65.21 79.30 89.400 100.00
VPmax (mbar) 70091.0 13.576576 7.739883 0.97 7.77 11.82 17.610 63.77
VPact (mbar) 70091.0 9.533968 4.183658 0.81 6.22 8.86 12.360 28.25
VPdef (mbar) 70091.0 4.042536 4.898549 0.00 0.87 2.19 5.300 46.01
sh (g/kg) 70091.0 6.022560 2.655812 0.51 3.92 5.59 7.800 18.07
H2OC (mmol/mol) 70091.0 9.640437 4.234862 0.81 6.29 8.96 12.490 28.74
rho (g/m**3) 70091.0 1216.061232 39.974263 1059.45 1187.47 1213.80 1242.765 1393.54
wv (m/s) 70091.0 1.702567 65.447512 -9999.00 0.99 1.76 2.860 14.01
max. wv (m/s) 70091.0 2.963041 75.597657 -9999.00 1.76 2.98 4.740 23.50
wd (deg) 70091.0 174.789095 86.619431 0.00 125.30 198.10 234.000 360.00

-9999 is likely erroneous, replace it with zeros.

wv = df['wv (m/s)']
wv[wv == -9999.0] = 0.0

max_wv = df['max. wv (m/s)']
max_wv[max_wv == -9999.0] = 0.0

df['wv (m/s)'].min()
0.0

Feature engineering#

plt.hist2d(df['wd (deg)'], df['wv (m/s)'], bins=(50, 50), vmax=400)
plt.colorbar()
plt.xlabel('Wind Direction [deg]')
plt.ylabel('Wind Velocity [m/s]')
Text(0, 0.5, 'Wind Velocity [m/s]')
../../_images/time_series_13_1.png

Convert the wind direction and velocity columns to a wind vector

wv = df.pop('wv (m/s)')
max_wv = df.pop('max. wv (m/s)')

# Convert to radians.
wd_rad = df.pop('wd (deg)')*np.pi / 180

# Calculate the wind x and y components.
df['Wx'] = wv*np.cos(wd_rad)
df['Wy'] = wv*np.sin(wd_rad)

# Calculate the max wind x and y components.
df['max Wx'] = max_wv*np.cos(wd_rad)
df['max Wy'] = max_wv*np.sin(wd_rad)
plt.hist2d(df['Wx'], df['Wy'], bins=(50, 50), vmax=400)
plt.colorbar()
plt.xlabel('Wind X [m/s]')
plt.ylabel('Wind Y [m/s]')
ax = plt.gca()
ax.axis('tight')
(-11.305513973134667, 8.24469928549079, -8.27438540335515, 7.7338312955467785)
../../_images/time_series_16_1.png
timestamp_s = date_time.map(pd.Timestamp.timestamp)
day = 60*60*24
year = 365.2425 * day

df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))
plt.plot(np.array(df['Day sin'])[:25])
plt.plot(np.array(df['Day cos'])[:25])
plt.xlabel('Time [h]')
plt.title('Time of day signal')
plt.show()
../../_images/time_series_19_0.png
fft = tf.signal.rfft(df['T (degC)'])
f_per_dataset = np.arange(0, len(fft))

n_samples_h = len(df['T (degC)'])
hours_per_year = 24*365.2524
years_per_dataset = n_samples_h/(hours_per_year)

f_per_year = f_per_dataset/years_per_dataset
plt.step(f_per_year, np.abs(fft))
plt.xscale('log')
plt.ylim(0, 400000)
plt.xlim([0.1, max(plt.xlim())])
plt.xticks([1, 365.2524], labels=['1/Year', '1/day'])
_ = plt.xlabel('Frequency (log scale)')
../../_images/time_series_20_0.png

Split data#

training -> 70%, validation -> 20%, test -> 10%

column_indices = {name: i for i, name in enumerate(df.columns)}

n = len(df)
train_limit = int(n*0.7)
val_limit = int(n*0.9)

train_df = df[:train_limit]
val_df = df[train_limit:val_limit]
test_df = df[val_limit:]

num_features = df.shape[1]

Normalize data#

  • this normalization should be done using moving averages (in the interest of simplicity this tutorial uses a simple average)

train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std
df_std = (df - train_mean) / train_std
df_std = df_std.melt(var_name='Column', value_name='Normalized')
plt.figure(figsize=(12, 6))
ax = sns.violinplot(x='Column', y='Normalized', data=df_std)
_ = ax.set_xticklabels(df.keys(), rotation=90)
../../_images/time_series_27_0.png

Data windowing#

class WindowGenerator():
    def __init__(self, input_width, label_width, shift, 
                    train_df=train_df, val_df=val_df, test_df=test_df, 
                    label_columns=None):
        
        self.train_df=train_df
        self.val_df=val_df
        self.test_df=test_df

        # Work out the label columns indices
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in enumerate(label_columns)}

        self.column_indices = {name: i for i, name in enumerate(train_df.columns)}

        # Work out the window parameters
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])
w1 = WindowGenerator(input_width=24, label_width=1, shift=24,
                     label_columns=['T (degC)'])
w1
Total window size: 48
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Label indices: [47]
Label column name(s): ['T (degC)']
w2 = WindowGenerator(input_width=6, label_width=1, shift=1,
                     label_columns=['T (degC)'])
w2
Total window size: 7
Input indices: [0 1 2 3 4 5]
Label indices: [6]
Label column name(s): ['T (degC)']

Split#

def split_window(self, features):
    inputs = features[:, self.input_slice, :]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
        labels = tf.stack(
            [labels[:, :, self.column_indices[name]] for name in self.label_columns], axis=-1)

    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])

    return inputs, labels

WindowGenerator.split_window = split_window
# Stack three slices, the length of the total window.
example_window = tf.stack([np.array(train_df[:w2.total_window_size]),
                           np.array(train_df[100:100+w2.total_window_size]),
                           np.array(train_df[200:200+w2.total_window_size])])

example_inputs, example_labels = w2.split_window(example_window)

print('All shapes are: (batch, time, features)')
print(f'Window shape: {example_window.shape}')
print(f'Inputs shape: {example_inputs.shape}')
print(f'Labels shape: {example_labels.shape}')
All shapes are: (batch, time, features)
Window shape: (3, 7, 19)
Inputs shape: (3, 6, 19)
Labels shape: (3, 1, 1)

The innermost indices are the features.

w2.example = example_inputs, example_labels
def plot(self, model=None, plot_col='T (degC)', max_subplots=3):
  inputs, labels = self.example
  plt.figure(figsize=(12, 8))
  plot_col_index = self.column_indices[plot_col]
  max_n = min(max_subplots, len(inputs))
  for n in range(max_n):
    plt.subplot(max_n, 1, n+1)
    plt.ylabel(f'{plot_col} [normed]')
    plt.plot(self.input_indices, inputs[n, :, plot_col_index],
             label='Inputs', marker='.', zorder=-10)

    if self.label_columns:
      label_col_index = self.label_columns_indices.get(plot_col, None)
    else:
      label_col_index = plot_col_index

    if label_col_index is None:
      continue

    plt.scatter(self.label_indices, labels[n, :, label_col_index],
                edgecolors='k', label='Labels', c='#2ca02c', s=64)
    if model is not None:
      predictions = model(inputs)
      plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                  marker='X', edgecolors='k', label='Predictions',
                  c='#ff7f0e', s=64)

    if n == 0:
      plt.legend()

  plt.xlabel('Time [h]')

WindowGenerator.plot = plot
w2.plot()
../../_images/time_series_38_0.png

Create tf.data.Datasets#

def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    
    ds = tf.keras.utils.timeseries_dataset_from_array(
        data=data,
        targets=None,
        sequence_length=self.total_window_size,
        sequence_stride=1,
        shuffle=True,
        batch_size=32,)

    ds = ds.map(self.split_window)

    return ds

WindowGenerator.make_dataset = make_dataset
@property
def train(self):
    return self.make_dataset(self.train_df)

@property
def val(self):
    return self.make_dataset(self.val_df)

@property
def test(self):
    return self.make_dataset(self.test_df)

@property
def example(self):
    """Get and cache an example batch of `inputs, labels` for plotting."""
    result = getattr(self, '_example', None)
    if result is None:
        # No example batch was found, so get one from the `.train` dataset
        result = next(iter(self.train))
        # And cache it for next time
        self._example = result
    return result

WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example
w2.train.element_spec
(TensorSpec(shape=(None, 6, 19), dtype=tf.float32, name=None),
 TensorSpec(shape=(None, 1, 1), dtype=tf.float32, name=None))
for example_inputs, example_labels in w2.train.take(1):
    print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
    print(f'Labels shape (batch, time, features): {example_labels.shape}')
Inputs shape (batch, time, features): (32, 6, 19)
Labels shape (batch, time, features): (32, 1, 1)

Single step models#

single_step_window = WindowGenerator(
    input_width=1, label_width=1, shift=1,
    label_columns=['T (degC)'])
single_step_window
Total window size: 2
Input indices: [0]
Label indices: [1]
Label column name(s): ['T (degC)']
for example_inputs, example_labels in single_step_window.train.take(1):
    print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
    print(f'Labels shape (batch, time, features): {example_labels.shape}')
Inputs shape (batch, time, features): (32, 1, 19)
Labels shape (batch, time, features): (32, 1, 1)

Baseline#

Start with a model that just returns the current temperature as the prediction, predicting “No change”. This is a reasonable baseline since temperature changes slowly.

class Baseline(tf.keras.Model):
    def __init__(self, label_index=None):
        super().__init__()
        self.label_index = label_index

    def call(self, inputs):
        if self.label_index is None:
            return inputs
        result = inputs[:, :, self.label_index]
        return result[:, :, tf.newaxis]
baseline = Baseline(label_index=column_indices['T (degC)'])

baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                 metrics=[tf.keras.metrics.MeanAbsoluteError()])

val_performance = {}
performance = {}
val_performance['Baseline'] = baseline.evaluate(single_step_window.val)
performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=0)
439/439 [==============================] - 2s 4ms/step - loss: 0.0128 - mean_absolute_error: 0.0785
wide_window = WindowGenerator(
    input_width=24, label_width=24, shift=1,
    label_columns=['T (degC)'])

wide_window
Total window size: 25
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Label indices: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]
Label column name(s): ['T (degC)']
print('Input shape:', wide_window.example[0].shape)
print('Output shape:', baseline(wide_window.example[0]).shape)
Input shape: (32, 24, 19)
Output shape: (32, 24, 1)
wide_window.plot(baseline)
../../_images/time_series_53_0.png

Linear model#

A tf.keras.layers.Dense layer with no activation set is a linear model.

linear = tf.keras.Sequential([
    tf.keras.layers.Dense(1)
])
print('Input shape:', single_step_window.example[0].shape)
print('Output shape:', linear(single_step_window.example[0]).shape)
Input shape: (32, 1, 19)
Output shape: (32, 1, 1)
MAX_EPOCHS = 20

def compile_and_fit(model, window, patience=2):
    early_stopping = tf.keras.callbacks.EarlyStopping(patience=patience, mode='min')

    model.compile(loss=tf.keras.losses.MeanSquaredError(),
                  optimizer=tf.keras.optimizers.Adam(),
                  metrics=tf.keras.metrics.MeanAbsoluteError())
    
    history = model.fit(window.train, epochs=MAX_EPOCHS, validation_data=window.val,
                        callbacks=[early_stopping])
    
    return history
history = compile_and_fit(linear, single_step_window)

val_performance['Linear'] = linear.evaluate(single_step_window.val)
performance['Linear'] = linear.evaluate(single_step_window.test, verbose=0)
Epoch 1/20
1534/1534 [==============================] - 14s 8ms/step - loss: 0.0548 - mean_absolute_error: 0.1522 - val_loss: 0.0117 - val_mean_absolute_error: 0.0789
Epoch 2/20
1534/1534 [==============================] - 12s 8ms/step - loss: 0.0102 - mean_absolute_error: 0.0743 - val_loss: 0.0091 - val_mean_absolute_error: 0.0698
Epoch 3/20
1534/1534 [==============================] - 12s 8ms/step - loss: 0.0094 - mean_absolute_error: 0.0711 - val_loss: 0.0088 - val_mean_absolute_error: 0.0692
Epoch 4/20
1534/1534 [==============================] - 12s 8ms/step - loss: 0.0092 - mean_absolute_error: 0.0703 - val_loss: 0.0088 - val_mean_absolute_error: 0.0686
Epoch 5/20
1534/1534 [==============================] - 11s 7ms/step - loss: 0.0091 - mean_absolute_error: 0.0698 - val_loss: 0.0086 - val_mean_absolute_error: 0.0680
Epoch 6/20
1534/1534 [==============================] - 12s 8ms/step - loss: 0.0091 - mean_absolute_error: 0.0698 - val_loss: 0.0089 - val_mean_absolute_error: 0.0693
Epoch 7/20
1534/1534 [==============================] - 12s 8ms/step - loss: 0.0091 - mean_absolute_error: 0.0697 - val_loss: 0.0086 - val_mean_absolute_error: 0.0681
439/439 [==============================] - 3s 6ms/step - loss: 0.0086 - mean_absolute_error: 0.0681
wide_window.plot(linear)
../../_images/time_series_60_0.png
plt.bar(x=range(len(train_df.columns)), height=linear.layers[0].kernel[:,0].numpy())
axis = plt.gca()
axis.set_xticks(range(len(train_df.columns)))
_ = axis.set_xticklabels(train_df.columns, rotation=90)
../../_images/time_series_61_0.png

Sometimes the model doesn’t even place the most weight on the output T (degC). This is one of the risks of random initialization.

Dense#

dense = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

history = compile_and_fit(dense, single_step_window)

val_performance['Dense'] = dense.evaluate(single_step_window.val)
performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0)
Epoch 1/20
1534/1534 [==============================] - 21s 12ms/step - loss: 0.0144 - mean_absolute_error: 0.0778 - val_loss: 0.0075 - val_mean_absolute_error: 0.0622
Epoch 2/20
1534/1534 [==============================] - 19s 12ms/step - loss: 0.0077 - mean_absolute_error: 0.0635 - val_loss: 0.0069 - val_mean_absolute_error: 0.0584
Epoch 3/20
1534/1534 [==============================] - 20s 13ms/step - loss: 0.0074 - mean_absolute_error: 0.0619 - val_loss: 0.0068 - val_mean_absolute_error: 0.0571
Epoch 4/20
1534/1534 [==============================] - 18s 12ms/step - loss: 0.0071 - mean_absolute_error: 0.0603 - val_loss: 0.0069 - val_mean_absolute_error: 0.0578
Epoch 5/20
1534/1534 [==============================] - 19s 13ms/step - loss: 0.0070 - mean_absolute_error: 0.0599 - val_loss: 0.0070 - val_mean_absolute_error: 0.0590
439/439 [==============================] - 4s 8ms/step - loss: 0.0070 - mean_absolute_error: 0.0590

Multi-step dense#

CONV_WIDTH = 3
conv_window = WindowGenerator(
    input_width=3,
    label_width=1,
    shift=1,
    label_columns=['T (degC)'])

conv_window
Total window size: 4
Input indices: [0 1 2]
Label indices: [3]
Label column name(s): ['T (degC)']
conv_window.plot()
plt.title("Given 3 hours of inputs, predict 1 hour into the future")
Text(0.5, 1.0, 'Given 3 hours of inputs, predict 1 hour into the future')
../../_images/time_series_67_1.png

You could train a dense model on a multiple-input-step window by adding a tf.keras.layers.Flatten as the first layer of the model.

multi_step_dense = tf.keras.Sequential([
    # (batch, time, features) => (batch, time*features)
    tf.keras.layers.Flatten(),
    # (batch, time*features) =>
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1),
    # Add back the time dimension
    # (batch, 1) => 
    tf.keras.layers.Reshape([1, -1])
    # (batch, 1, 1)
])
print('Input shape:', conv_window.example[0].shape)
print('Output shape:', multi_step_dense(conv_window.example[0]).shape)
Input shape: (32, 3, 19)
Output shape: (32, 1, 1)
history = compile_and_fit(multi_step_dense, conv_window)

IPython.display.clear_output()
val_performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.val)
performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.test, verbose=0)
438/438 [==============================] - 2s 5ms/step - loss: 0.0066 - mean_absolute_error: 0.0565
conv_window.plot(multi_step_dense)
../../_images/time_series_72_0.png

Convolution neural network#

conv_model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(32, CONV_WIDTH, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])
print('Input shape:', conv_window.example[0].shape)
print('Output shape:', conv_model(conv_window.example[0]).shape)
Input shape: (32, 3, 19)
Output shape: (32, 1, 1)
history = compile_and_fit(conv_model, conv_window)

IPython.display.clear_output()
val_performance['Conv'] = conv_model.evaluate(conv_window.val)
performance['Conv'] = conv_model.evaluate(conv_window.test, verbose=0)
438/438 [==============================] - 4s 8ms/step - loss: 0.0062 - mean_absolute_error: 0.0559
print('Wide window')
print('Input shape:', wide_window.example[0].shape)
print('Labels shape:', wide_window.example[1].shape)
print('Output shape:', conv_model(wide_window.example[0]).shape)
Wide window
Input shape: (32, 24, 19)
Labels shape: (32, 24, 1)
Output shape: (32, 22, 1)
LABEL_WIDTH = 24
INPUT_WIDTH = LABEL_WIDTH + (CONV_WIDTH - 1)
wide_conv_window = WindowGenerator(
    input_width=INPUT_WIDTH,
    label_width=LABEL_WIDTH,
    shift=1,
    label_columns=['T (degC)'])

wide_conv_window
Total window size: 27
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Label indices: [ 3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26]
Label column name(s): ['T (degC)']
print('Wide conv window')
print('Input shape:', wide_conv_window.example[0].shape)
print('Labels window:', wide_conv_window.example[1].shape)
print('Output shape:', conv_model(wide_conv_window.example[0]).shape)
Wide conv window
Input shape: (32, 26, 19)
Labels window: (32, 24, 1)
Output shape: (32, 24, 1)
wide_conv_window.plot(conv_model)
../../_images/time_series_80_0.png

Recurrent neural networks#

return_sequences argument:

  • Stacking RNN layers

  • Training a model on multiple time steps simultaneously

lstm_model = tf.keras.Sequential([
    # (batch, time, features) =>
    tf.keras.layers.LSTM(32, return_sequences=True), 
    # (batch, time, 32) =>
    tf.keras.layers.Dense(1) 
    # (batch, time, 1)
])
print('Input shape:', wide_window.example[0].shape)
print('Output shape:', lstm_model(wide_window.example[0]).shape)
Input shape: (32, 24, 19)
Output shape: (32, 24, 1)
history = compile_and_fit(lstm_model, wide_window)

IPython.display.clear_output()
val_performance['LSTM'] = lstm_model.evaluate(wide_window.val)
performance['LSTM'] = lstm_model.evaluate(wide_window.test, verbose=0)
438/438 [==============================] - 4s 8ms/step - loss: 0.0056 - mean_absolute_error: 0.0516

Performance#

x = np.arange(len(performance))
width = 0.3
metric_name = 'mean_absolute_error'
metric_index = lstm_model.metrics_names.index('mean_absolute_error')
val_mae = [v[metric_index] for v in val_performance.values()]
test_mae = [v[metric_index] for v in performance.values()]

plt.ylabel('mean_absolute_error [T (degC), normalized]')
plt.bar(x - 0.17, val_mae, width, label='Validation')
plt.bar(x + 0.17, test_mae, width, label='Test')
plt.xticks(ticks=x, labels=performance.keys(),
           rotation=45)
_ = plt.legend()
../../_images/time_series_87_0.png
for name, value in performance.items():
    print(f'{name:12s}: {value[1]:0.4f}')
Baseline    : 0.0852
Linear      : 0.0669
Dense       : 0.0596
Multi step dense: 0.0566
Conv        : 0.0579
LSTM        : 0.0519

Multi-output models#

single_step_window = WindowGenerator(input_width=1, label_width=1, shift=1)

wide_window = WindowGenerator(input_width=24, label_width=24, shift=1)

for example_inputs, example_labels in wide_window.train.take(1):
  print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
  print(f'Labels shape (batch, time, features): {example_labels.shape}')
Inputs shape (batch, time, features): (32, 24, 19)
Labels shape (batch, time, features): (32, 24, 19)

Baseline#

baseline = Baseline()
baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                 metrics=[tf.keras.metrics.MeanAbsoluteError()])
val_performance = {}
performance = {}
val_performance['Baseline'] = baseline.evaluate(wide_window.val)
performance['Baseline'] = baseline.evaluate(wide_window.test, verbose=0)
438/438 [==============================] - 2s 5ms/step - loss: 0.0886 - mean_absolute_error: 0.1589

Dense#

dense = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(num_features)
])
history = compile_and_fit(dense, single_step_window)

IPython.display.clear_output()
val_performance['Dense'] = dense.evaluate(single_step_window.val)
performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0)
439/439 [==============================] - 4s 9ms/step - loss: 0.0683 - mean_absolute_error: 0.1309

RNN#

lstm_model = tf.keras.Sequential([
    tf.keras.layers.LSTM(32, return_sequences=True),
    tf.keras.layers.Dense(num_features)
])

history = compile_and_fit(lstm_model, wide_window)

IPython.display.clear_output()
val_performance['LSTM'] = lstm_model.evaluate(wide_window.val)
performance['LSTM'] = lstm_model.evaluate(wide_window.test, verbose=0)
438/438 [==============================] - 4s 9ms/step - loss: 0.0615 - mean_absolute_error: 0.1198

Advanced: Residual Connections#

The Baseline model from earlier took advantage of the fact that the sequence doesn’t change drastically from time step to time step.

It’s common in time series analysis to build models that instead of predicting the next value, predict how the value will change in the next time step.

class ResidualWrapper(tf.keras.Model):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def call(self, inputs, *args, **kwargs):
        delta = self.model(inputs, *args, **kwargs)

        return inputs + delta
residual_lstm = ResidualWrapper(
    tf.keras.Sequential([
        tf.keras.layers.LSTM(32, return_sequences=True),
        tf.keras.layers.Dense(num_features, kernel_initializer=tf.initializers.zeros())
]))

history = compile_and_fit(residual_lstm, wide_window)

IPython.display.clear_output()
val_performance['Residual LSTM'] = residual_lstm.evaluate(wide_window.val)
performance['Residual LSTM'] = residual_lstm.evaluate(wide_window.test, verbose=0)
438/438 [==============================] - 4s 8ms/step - loss: 0.0621 - mean_absolute_error: 0.1180

Performance#

x = np.arange(len(performance))
width = 0.3

metric_name = 'mean_absolute_error'
metric_index = lstm_model.metrics_names.index('mean_absolute_error')
val_mae = [v[metric_index] for v in val_performance.values()]
test_mae = [v[metric_index] for v in performance.values()]

plt.bar(x - 0.17, val_mae, width, label='Validation')
plt.bar(x + 0.17, test_mae, width, label='Test')
plt.xticks(ticks=x, labels=performance.keys(),
           rotation=45)
plt.ylabel('MAE (average over all outputs)')
_ = plt.legend()
../../_images/time_series_104_0.png
for name, value in performance.items():
    print(f'{name:15s}: {value[1]:0.4f}')
Baseline       : 0.1638
Dense          : 0.1324
LSTM           : 0.1219
Residual LSTM  : 0.1195

Multi-step models#

OUT_STEPS = 24
multi_window = WindowGenerator(input_width=24,
                               label_width=OUT_STEPS,
                               shift=OUT_STEPS)

multi_window.plot()
multi_window
Total window size: 48
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Label indices: [24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
Label column name(s): None
../../_images/time_series_107_1.png

Baselines#

class MultiStepLastBaseline(tf.keras.Model):
    def call(self, inputs):
        return tf.tile(inputs[:, -1:, :], [1, OUT_STEPS, 1])
    
last_baseline = MultiStepLastBaseline()
last_baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                      metrics=[tf.keras.metrics.MeanAbsoluteError()])

multi_val_performance = {}
multi_performance = {}

multi_val_performance['Last'] = last_baseline.evaluate(multi_window.val)
multi_performance['Last'] = last_baseline.evaluate(multi_window.test, verbose=0)
multi_window.plot(last_baseline)
437/437 [==============================] - 2s 5ms/step - loss: 0.6285 - mean_absolute_error: 0.5007
../../_images/time_series_109_1.png
class RepeatBaseline(tf.keras.Model):
    def call(self, inputs):
        return inputs

repeat_baseline = RepeatBaseline()
repeat_baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                        metrics=[tf.keras.metrics.MeanAbsoluteError()])

multi_val_performance['Repeat'] = repeat_baseline.evaluate(multi_window.val)
multi_performance['Repeat'] = repeat_baseline.evaluate(multi_window.test, verbose=0)
multi_window.plot(repeat_baseline)
437/437 [==============================] - 2s 5ms/step - loss: 0.4270 - mean_absolute_error: 0.3959
../../_images/time_series_110_1.png

Single-shot models#

Linear#

multi_linear_model = tf.keras.Sequential([
    # (batch, time, features)
    tf.keras.layers.Lambda(lambda x: x[:, -1:, :]),
    # (batch, 1, features)
    tf.keras.layers.Dense(OUT_STEPS*num_features, kernel_initializer=tf.initializers.zeros()),
    tf.keras.layers.Reshape([OUT_STEPS, num_features])
    # (batch, out_steps, features)
])

history = compile_and_fit(multi_linear_model, multi_window)

IPython.display.clear_output()
multi_val_performance['Linear'] = multi_linear_model.evaluate(multi_window.val)
multi_performance['Linear'] = multi_linear_model.evaluate(multi_window.test, verbose=0)
multi_window.plot(multi_linear_model)
437/437 [==============================] - 3s 6ms/step - loss: 0.2560 - mean_absolute_error: 0.3057
../../_images/time_series_113_1.png

Dense#

multi_dense_model = tf.keras.Sequential([
    # (batch, time, features)
    tf.keras.layers.Lambda(lambda x: x[:, -1:, :]),
    # (batch, 1, features)
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(OUT_STEPS*num_features, kernel_initializer=tf.initializers.zeros()),
    tf.keras.layers.Reshape([OUT_STEPS, num_features])
    # (batch, out_steps, features)
])

history = compile_and_fit(multi_dense_model, multi_window)

IPython.display.clear_output()
multi_val_performance['Dense'] = multi_dense_model.evaluate(multi_window.val)
multi_performance['Dense'] = multi_dense_model.evaluate(multi_window.test, verbose=0)
multi_window.plot(multi_dense_model)
437/437 [==============================] - 3s 7ms/step - loss: 0.2195 - mean_absolute_error: 0.2827
../../_images/time_series_115_1.png

CNN#

CONV_WIDTH = 3
multi_conv_model = tf.keras.Sequential([
    # (batch, time, features)
    tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]),
    # (batch, CONV_WIDTH, features)
    tf.keras.layers.Conv1D(256, CONV_WIDTH, activation='relu'),
    # (batch, 1, 256)
    tf.keras.layers.Dense(OUT_STEPS*num_features, kernel_initializer=tf.initializers.zeros()),
    # (batch, 1,  out_steps*features)
    tf.keras.layers.Reshape([OUT_STEPS, num_features])
    # (batch, out_steps, features)
])

history = compile_and_fit(multi_conv_model, multi_window)

IPython.display.clear_output()

multi_val_performance['Conv'] = multi_conv_model.evaluate(multi_window.val)
multi_performance['Conv'] = multi_conv_model.evaluate(multi_window.test, verbose=0)
multi_window.plot(multi_conv_model)
Epoch 1/20
1532/1532 [==============================] - 17s 10ms/step - loss: 0.2385 - mean_absolute_error: 0.3046 - val_loss: 0.2235 - val_mean_absolute_error: 0.2884
Epoch 2/20
1532/1532 [==============================] - 15s 10ms/step - loss: 0.2194 - mean_absolute_error: 0.2857 - val_loss: 0.2230 - val_mean_absolute_error: 0.2879
Epoch 3/20
1532/1532 [==============================] - 35s 23ms/step - loss: 0.2160 - mean_absolute_error: 0.2829 - val_loss: 0.2188 - val_mean_absolute_error: 0.2843
Epoch 4/20
1532/1532 [==============================] - 14s 9ms/step - loss: 0.2128 - mean_absolute_error: 0.2799 - val_loss: 0.2178 - val_mean_absolute_error: 0.2838
Epoch 5/20
1532/1532 [==============================] - 15s 10ms/step - loss: 0.2108 - mean_absolute_error: 0.2784 - val_loss: 0.2149 - val_mean_absolute_error: 0.2818
Epoch 6/20
1532/1532 [==============================] - 16s 10ms/step - loss: 0.2089 - mean_absolute_error: 0.2768 - val_loss: 0.2147 - val_mean_absolute_error: 0.2806
Epoch 7/20
1532/1532 [==============================] - 15s 10ms/step - loss: 0.2075 - mean_absolute_error: 0.2757 - val_loss: 0.2140 - val_mean_absolute_error: 0.2811
Epoch 8/20
1532/1532 [==============================] - 15s 10ms/step - loss: 0.2065 - mean_absolute_error: 0.2750 - val_loss: 0.2137 - val_mean_absolute_error: 0.2799
Epoch 9/20
1532/1532 [==============================] - 15s 10ms/step - loss: 0.2054 - mean_absolute_error: 0.2740 - val_loss: 0.2137 - val_mean_absolute_error: 0.2796
Epoch 10/20
1410/1532 [==========================>...] - ETA: 6s - loss: 0.2044 - mean_absolute_error: 0.2733
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[77], line 14
      1 CONV_WIDTH = 3
      2 multi_conv_model = tf.keras.Sequential([
      3     # (batch, time, features)
      4     tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]),
   (...)
     11     # (batch, out_steps, features)
     12 ])
---> 14 history = compile_and_fit(multi_conv_model, multi_window)
     16 IPython.display.clear_output()
     18 multi_val_performance['Conv'] = multi_conv_model.evaluate(multi_window.val)

Cell In[39], line 10, in compile_and_fit(model, window, patience)
      4 early_stopping = tf.keras.callbacks.EarlyStopping(patience=patience, mode='min')
      6 model.compile(loss=tf.keras.losses.MeanSquaredError(),
      7               optimizer=tf.keras.optimizers.Adam(),
      8               metrics=tf.keras.metrics.MeanAbsoluteError())
---> 10 history = model.fit(window.train, epochs=MAX_EPOCHS, validation_data=window.val,
     11                     callbacks=[early_stopping])
     13 return history

File ~/miniconda3/envs/mldev/lib/python3.9/site-packages/keras/utils/traceback_utils.py:65, in filter_traceback.<locals>.error_handler(*args, **kwargs)
     63 filtered_tb = None
     64 try:
---> 65     return fn(*args, **kwargs)
     66 except Exception as e:
     67     filtered_tb = _process_traceback_frames(e.__traceback__)

File ~/miniconda3/envs/mldev/lib/python3.9/site-packages/keras/engine/training.py:1685, in Model.fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1677 with tf.profiler.experimental.Trace(
   1678     "train",
   1679     epoch_num=epoch,
   (...)
   1682     _r=1,
   1683 ):
   1684     callbacks.on_train_batch_begin(step)
-> 1685     tmp_logs = self.train_function(iterator)
   1686     if data_handler.should_sync:
   1687         context.async_wait()

File ~/miniconda3/envs/mldev/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py:150, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    148 filtered_tb = None
    149 try:
--> 150   return fn(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)

File ~/miniconda3/envs/mldev/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:894, in Function.__call__(self, *args, **kwds)
    891 compiler = "xla" if self._jit_compile else "nonXla"
    893 with OptionalXlaContext(self._jit_compile):
--> 894   result = self._call(*args, **kwds)
    896 new_tracing_count = self.experimental_get_tracing_count()
    897 without_tracing = (tracing_count == new_tracing_count)

File ~/miniconda3/envs/mldev/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:926, in Function._call(self, *args, **kwds)
    923   self._lock.release()
    924   # In this case we have created variables on the first call, so we run the
    925   # defunned version which is guaranteed to never create variables.
--> 926   return self._no_variable_creation_fn(*args, **kwds)  # pylint: disable=not-callable
    927 elif self._variable_creation_fn is not None:
    928   # Release the lock early so that multiple threads can perform the call
    929   # in parallel.
    930   self._lock.release()

File ~/miniconda3/envs/mldev/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/tracing_compiler.py:143, in TracingCompiler.__call__(self, *args, **kwargs)
    140 with self._lock:
    141   (concrete_function,
    142    filtered_flat_args) = self._maybe_define_function(args, kwargs)
--> 143 return concrete_function._call_flat(
    144     filtered_flat_args, captured_inputs=concrete_function.captured_inputs)

File ~/miniconda3/envs/mldev/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py:1757, in ConcreteFunction._call_flat(self, args, captured_inputs, cancellation_manager)
   1753 possible_gradient_type = gradients_util.PossibleTapeGradientTypes(args)
   1754 if (possible_gradient_type == gradients_util.POSSIBLE_GRADIENT_TYPES_NONE
   1755     and executing_eagerly):
   1756   # No tape is watching; skip to running the function.
-> 1757   return self._build_call_outputs(self._inference_function.call(
   1758       ctx, args, cancellation_manager=cancellation_manager))
   1759 forward_backward = self._select_forward_and_backward_functions(
   1760     args,
   1761     possible_gradient_type,
   1762     executing_eagerly)
   1763 forward_function, args_with_tangents = forward_backward.forward()

File ~/miniconda3/envs/mldev/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py:381, in _EagerDefinedFunction.call(self, ctx, args, cancellation_manager)
    379 with _InterpolateFunctionError(self):
    380   if cancellation_manager is None:
--> 381     outputs = execute.execute(
    382         str(self.signature.name),
    383         num_outputs=self._num_outputs,
    384         inputs=args,
    385         attrs=attrs,
    386         ctx=ctx)
    387   else:
    388     outputs = execute.execute_with_cancellation(
    389         str(self.signature.name),
    390         num_outputs=self._num_outputs,
   (...)
    393         ctx=ctx,
    394         cancellation_manager=cancellation_manager)

File ~/miniconda3/envs/mldev/lib/python3.9/site-packages/tensorflow/python/eager/execute.py:52, in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     50 try:
     51   ctx.ensure_initialized()
---> 52   tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
     53                                       inputs, attrs, num_outputs)
     54 except core._NotOkStatusException as e:
     55   if name is not None:

KeyboardInterrupt: 

RNN#

multi_lstm_model = tf.keras.Sequential([
    # (batch, time, features)
    # Adding more `lstm_units` just overfits more quickly.
    tf.keras.layers.LSTM(32),
    # (batch, lstm_units)
    tf.keras.layers.Dense(OUT_STEPS*num_features, kernel_initializer=tf.initializers.zeros()),
    # (batch, out_steps*features)
    tf.keras.layers.Reshape([OUT_STEPS, num_features])
    # (batch, out_steps, features)
])

history = compile_and_fit(multi_lstm_model, multi_window)

IPython.display.clear_output()

multi_val_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.val)
multi_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.test, verbose=0)
multi_window.plot(multi_lstm_model)

Advanced: Autoregressive model#

RNN#

class FeedBack(tf.keras.Model):
    def __init__(self, units, out_steps):
        super().__init__()
        self.out_steps = out_steps
        self.units = units
        self.lstm_cell = tf.keras.layers.LSTMCell(units)
        # Also wrap the LSTMCell in an RNN to simplify the `warmup` method.
        self.lstm_rnn = tf.keras.layers.RNN(self.lstm_cell, return_state=True)
        self.dense = tf.keras.layers.Dense(num_features)
feedback_model = FeedBack(units=32, out_steps=OUT_STEPS)
def warmup(self, inputs):
    # inputs.shape => (batch, time, features)
    # x.shape => (batch, lstm_units)
    x, *state = self.lstm_rnn(inputs)

    # predictions.shape => (batch, features)
    prediction = self.dense(x)
    return prediction, state

FeedBack.warmup = warmup
prediction, state = feedback_model.warmup(multi_window.example[0])
prediction.shape
def call(self, inputs, training=None):
    # Use a TensorArray to capture dynamically unrolled outputs.
    predictions = []
    # Initialize the LSTM state.
    prediction, state = self.warmup(inputs)

    # Insert the first prediction.
    predictions.append(prediction)

    # Run the rest of the prediction steps.
    for n in range(1, self.out_steps):
        # Use the last prediction as input.
        x = prediction
        # Execute one lstm step.
        x, state = self.lstm_cell(x, states=state, training=training)
        # Convert the lstm output to a prediction.
        prediction = self.dense(x)
        # Add the prediction to the output.
        predictions.append(prediction)

    # predictions.shape => (time, batch, features)
    predictions = tf.stack(predictions)
    # predictions.shape => (batch, time, features)
    predictions = tf.transpose(predictions, [1, 0, 2])
    return predictions

FeedBack.call = call
print('Output shape (batch, time, features): ', feedback_model(multi_window.example[0]).shape)
history = compile_and_fit(feedback_model, multi_window)

IPython.display.clear_output()

multi_val_performance['AR LSTM'] = feedback_model.evaluate(multi_window.val)
multi_performance['AR LSTM'] = feedback_model.evaluate(multi_window.test, verbose=0)
multi_window.plot(feedback_model)

Performance#

x = np.arange(len(multi_performance))
width = 0.3

metric_name = 'mean_absolute_error'
metric_index = lstm_model.metrics_names.index('mean_absolute_error')
val_mae = [v[metric_index] for v in multi_val_performance.values()]
test_mae = [v[metric_index] for v in multi_performance.values()]

plt.bar(x - 0.17, val_mae, width, label='Validation')
plt.bar(x + 0.17, test_mae, width, label='Test')
plt.xticks(ticks=x, labels=multi_performance.keys(),
           rotation=45)
plt.ylabel(f'MAE (average over all times and outputs)')
_ = plt.legend()