This systematic review investigates the current state of research on multimodal fusion methods, i.e., the joint analysis of multimodal inputs, for intentional, instruction-based human-computer interactions, focusing on the combination of speech and spatially expressive modalities such as gestures, touch, pen, and gaze.
We examine 50 systems from a User-Centered Design perspective, categorizing them by modality combinations, fusion strategies, application domains and media, as well as reusability. Our findings highlight a predominance of descriptive late fusion methods, limited reusability, and a lack of standardized tool support, hampering rapid prototyping and broader applicability. We identify emerging trends in machine learning-based fusion and outline future research directions to advance reusable and user-centered multimodal systems.
%0 Conference Paper
%1 heinrich2025systematic
%A Heinrich, Ronja
%A Zimmerer, Chris
%A Fischbach, Martin
%A Latoschik, Marc Erich
%B Proceedings of the 27th International Conference on Multimodal Interaction (ICMI '25)
%D 2025
%I Association for Computing Machinery
%K mmi myown xrhub
%R doi: 10.1145/3716553.3750790
%T A Systematic Review of Fusion Methods for the User-Centered Design of Multimodal Interfaces
%U https://dl.acm.org/doi/pdf/10.1145/3716553.3750790
%X This systematic review investigates the current state of research on multimodal fusion methods, i.e., the joint analysis of multimodal inputs, for intentional, instruction-based human-computer interactions, focusing on the combination of speech and spatially expressive modalities such as gestures, touch, pen, and gaze.
We examine 50 systems from a User-Centered Design perspective, categorizing them by modality combinations, fusion strategies, application domains and media, as well as reusability. Our findings highlight a predominance of descriptive late fusion methods, limited reusability, and a lack of standardized tool support, hampering rapid prototyping and broader applicability. We identify emerging trends in machine learning-based fusion and outline future research directions to advance reusable and user-centered multimodal systems.
%@ 979-8-4007-1499-3/2025/10
@inproceedings{heinrich2025systematic,
abstract = {This systematic review investigates the current state of research on multimodal fusion methods, i.e., the joint analysis of multimodal inputs, for intentional, instruction-based human-computer interactions, focusing on the combination of speech and spatially expressive modalities such as gestures, touch, pen, and gaze.
We examine 50 systems from a User-Centered Design perspective, categorizing them by modality combinations, fusion strategies, application domains and media, as well as reusability. Our findings highlight a predominance of descriptive late fusion methods, limited reusability, and a lack of standardized tool support, hampering rapid prototyping and broader applicability. We identify emerging trends in machine learning-based fusion and outline future research directions to advance reusable and user-centered multimodal systems.},
added-at = {2025-07-31T10:24:58.000+0200},
author = {Heinrich, Ronja and Zimmerer, Chris and Fischbach, Martin and Latoschik, Marc Erich},
biburl = {https://www.bibsonomy.org/bibtex/26b18da70f3f9fad5187f725bc5a95a0d/hci-uwb},
booktitle = {Proceedings of the 27th International Conference on Multimodal Interaction (ICMI '25)},
doi = {doi: 10.1145/3716553.3750790},
eventdate = {October 13-17, 2025},
eventtitle = {27th International Conference on Multimodal Interaction (ICMI '25)},
interhash = {a63c31c35b2688b6ee3195ba8864105e},
intrahash = {6b18da70f3f9fad5187f725bc5a95a0d},
isbn = {979-8-4007-1499-3/2025/10},
keywords = {mmi myown xrhub},
publisher = {Association for Computing Machinery},
timestamp = {2026-04-15T16:56:12.000+0200},
title = {A Systematic Review of Fusion Methods for the User-Centered Design of Multimodal Interfaces},
url = {https://dl.acm.org/doi/pdf/10.1145/3716553.3750790},
year = 2025
}