A Cyberware laser scan of DWM was made, Baldi's generic morphology was mapped into the form of DWM, this head was trained on real data recorded with Optotrak LED markers, and the quality of its speech was evaluated. Participants were asked to recognize auditory sentences presented alone in noise, aligned with the newly trained synthetic textured mapped target face, or the original natural face. There was a significant advantage when the noisy auditory sentence was paired with either head, with the synthetic textured mapped target face giving as much of an improvement as the original recordings of the natural face.
%0 Conference Paper
%1 Cohen2002
%A Cohen, Michael M.
%A Massaro, Dominic W.
%A Clark, Rashid
%B Proceedings of the 4th IEEE International Conference on Multimodal Interfaces (ICMI)
%C Pittsburgh, PA, USA
%D 2002
%K LED analysis;Speech animation;Humans;Laser animation;generic animation;image computer diodes;Magnetic emitting face;talking head heads;Morphology;Shape;Speech intelligibility;speech interface;Facial interfaces;Cyberware laser mapped markers;auditory modes;Light morphology;noise;speech processing;Speech quality;speech recognition;face scan;DWM;Optotrak sentence synthesis synthesis;speech-based synthesis;synthetic target texture;speech textured training;user user
%P 499-504
%R 10.1109/ICMI.2002.1167046
%T Training a talking head
%X A Cyberware laser scan of DWM was made, Baldi's generic morphology was mapped into the form of DWM, this head was trained on real data recorded with Optotrak LED markers, and the quality of its speech was evaluated. Participants were asked to recognize auditory sentences presented alone in noise, aligned with the newly trained synthetic textured mapped target face, or the original natural face. There was a significant advantage when the noisy auditory sentence was paired with either head, with the synthetic textured mapped target face giving as much of an improvement as the original recordings of the natural face.
@inproceedings{Cohen2002,
abstract = {A Cyberware laser scan of DWM was made, Baldi's generic morphology was mapped into the form of DWM, this head was trained on real data recorded with Optotrak LED markers, and the quality of its speech was evaluated. Participants were asked to recognize auditory sentences presented alone in noise, aligned with the newly trained synthetic textured mapped target face, or the original natural face. There was a significant advantage when the noisy auditory sentence was paired with either head, with the synthetic textured mapped target face giving as much of an improvement as the original recordings of the natural face.},
added-at = {2021-02-01T10:51:23.000+0100},
address = {Pittsburgh, PA, USA},
author = {Cohen, Michael M. and Massaro, Dominic W. and Clark, Rashid},
biburl = {https://www.bibsonomy.org/bibtex/269a518e677a28667a13e436c5c2a5e33/m-toman},
booktitle = {Proceedings of the 4th IEEE International Conference on Multimodal Interfaces (ICMI)},
doi = {10.1109/ICMI.2002.1167046},
file = {:pdfs/cohen_icmi_2002.pdf:PDF},
interhash = {9d89eaa74f8cfdc95f2e22b3f10cebaf},
intrahash = {69a518e677a28667a13e436c5c2a5e33},
keywords = {LED analysis;Speech animation;Humans;Laser animation;generic animation;image computer diodes;Magnetic emitting face;talking head heads;Morphology;Shape;Speech intelligibility;speech interface;Facial interfaces;Cyberware laser mapped markers;auditory modes;Light morphology;noise;speech processing;Speech quality;speech recognition;face scan;DWM;Optotrak sentence synthesis synthesis;speech-based synthesis;synthetic target texture;speech textured training;user user},
month = oct,
owner = {schabus},
pages = {499-504},
timestamp = {2021-02-01T10:51:23.000+0100},
title = {Training a talking head},
year = 2002
}