The success of Convolutional Neural Networks (CNNs) in computer vision is
mainly driven by their strong inductive bias, which is strong enough to allow
CNNs to solve vision-related tasks with random weights, meaning without
learning. Similarly, Long Short-Term Memory (LSTM) has a strong inductive bias
towards storing information over time. However, many real-world systems are
governed by conservation laws, which lead to the redistribution of particular
quantities -- e.g. in physical and economical systems. Our novel
Mass-Conserving LSTM (MC-LSTM) adheres to these conservation laws by extending
the inductive bias of LSTM to model the redistribution of those stored
quantities. MC-LSTMs set a new state-of-the-art for neural arithmetic units at
learning arithmetic operations, such as addition tasks, which have a strong
conservation law, as the sum is constant over time. Further, MC-LSTM is applied
to traffic forecasting, modelling a pendulum, and a large benchmark dataset in
hydrology, where it sets a new state-of-the-art for predicting peak flows. In
the hydrology example, we show that MC-LSTM states correlate with real-world
processes and are therefore interpretable.
%0 Generic
%1 hoedt2021mclstm
%A Hoedt, Pieter-Jan
%A Kratzert, Frederik
%A Klotz, Daniel
%A Halmich, Christina
%A Holzleitner, Markus
%A Nearing, Grey
%A Hochreiter, Sepp
%A Klambauer, Günter
%D 2021
%K deeplearning lstm todo:read
%T MC-LSTM: Mass-Conserving LSTM
%U http://arxiv.org/abs/2101.05186
%X The success of Convolutional Neural Networks (CNNs) in computer vision is
mainly driven by their strong inductive bias, which is strong enough to allow
CNNs to solve vision-related tasks with random weights, meaning without
learning. Similarly, Long Short-Term Memory (LSTM) has a strong inductive bias
towards storing information over time. However, many real-world systems are
governed by conservation laws, which lead to the redistribution of particular
quantities -- e.g. in physical and economical systems. Our novel
Mass-Conserving LSTM (MC-LSTM) adheres to these conservation laws by extending
the inductive bias of LSTM to model the redistribution of those stored
quantities. MC-LSTMs set a new state-of-the-art for neural arithmetic units at
learning arithmetic operations, such as addition tasks, which have a strong
conservation law, as the sum is constant over time. Further, MC-LSTM is applied
to traffic forecasting, modelling a pendulum, and a large benchmark dataset in
hydrology, where it sets a new state-of-the-art for predicting peak flows. In
the hydrology example, we show that MC-LSTM states correlate with real-world
processes and are therefore interpretable.
@misc{hoedt2021mclstm,
abstract = {The success of Convolutional Neural Networks (CNNs) in computer vision is
mainly driven by their strong inductive bias, which is strong enough to allow
CNNs to solve vision-related tasks with random weights, meaning without
learning. Similarly, Long Short-Term Memory (LSTM) has a strong inductive bias
towards storing information over time. However, many real-world systems are
governed by conservation laws, which lead to the redistribution of particular
quantities -- e.g. in physical and economical systems. Our novel
Mass-Conserving LSTM (MC-LSTM) adheres to these conservation laws by extending
the inductive bias of LSTM to model the redistribution of those stored
quantities. MC-LSTMs set a new state-of-the-art for neural arithmetic units at
learning arithmetic operations, such as addition tasks, which have a strong
conservation law, as the sum is constant over time. Further, MC-LSTM is applied
to traffic forecasting, modelling a pendulum, and a large benchmark dataset in
hydrology, where it sets a new state-of-the-art for predicting peak flows. In
the hydrology example, we show that MC-LSTM states correlate with real-world
processes and are therefore interpretable.},
added-at = {2021-01-19T10:57:00.000+0100},
author = {Hoedt, Pieter-Jan and Kratzert, Frederik and Klotz, Daniel and Halmich, Christina and Holzleitner, Markus and Nearing, Grey and Hochreiter, Sepp and Klambauer, Günter},
biburl = {https://www.bibsonomy.org/bibtex/290f152d0de74f29ff8be75d6b8a73173/annakrause},
description = {[2101.05186v1] MC-LSTM: Mass-Conserving LSTM},
interhash = {1dfd95b3a7a5f20093216026cf82e723},
intrahash = {90f152d0de74f29ff8be75d6b8a73173},
keywords = {deeplearning lstm todo:read},
note = {cite arxiv:2101.05186Comment: 14 pages (9 without references) + 18 pages appendix},
timestamp = {2021-01-19T10:57:00.000+0100},
title = {MC-LSTM: Mass-Conserving LSTM},
url = {http://arxiv.org/abs/2101.05186},
year = 2021
}