Mobile Health (mhealth) applications are increasing in popularity, and the collection of disease-specific time series data using Ecological Momentary Assessment (EMA) questionnaires has been shown to help in the creation of personalised predictors for next-step forecasting, which can be crucial in giving preemptive interventions. In this work, we propose a framework that aims to mitigate a common issue in EMA data - that some users contribute a bulk of the data while most users contribute too little. Our proposed framework aims to discover a ‘useful’ neighbourhood of ‘long’ users for ‘short’ ones, by optimising for the error of the user-level predictors for users with little data available for learning. For each user-level predictor, this is done by iteratively adding the next-most-similar long user from a similarity-ordered list as long as the error of the learned model does not increase. This method is compared against a baseline that exploits all available data for the long users, as well as an exhaustive search model that retains only only those users that yield the lowest error predictor. We also explore multiple ways to define similarity, and study the impact of each on the two search strategies on two EMA datasets from an mHealth app ‘TrackYourDiabetes’ - with users from Bulgaria and Spain. Our experiments over the two datasets show a 2.5% and 36.6% improvement respectively for RMSE while using on average 42.8% and 69.9% less data than the baseline method.
%0 Conference Paper
%1 9474685
%A Unnikrishnan, Vishnu
%A Shah, Yash
%A Schleicher, Miro
%A Fernández-Viadero, Carlos
%A Strandzheva, Mirela
%A Velikova, Doroteya
%A Dimitrov, Plamen
%A Pryss, Ruediger
%A Schobel, Johannes
%A Schlee, Winfried
%A Spiliopoulou, Myra
%B 2021 IEEE 34th International Symposium on Computer-Based Medical Systems (CBMS)
%D 2021
%K kmd medical_mining myown tinnitus
%P 295-300
%R 10.1109/CBMS52027.2021.00080
%T Love thy Neighbours: A Framework for Error-Driven Discovery of Useful Neighbourhoods for One-Step Forecasts on EMA data
%X Mobile Health (mhealth) applications are increasing in popularity, and the collection of disease-specific time series data using Ecological Momentary Assessment (EMA) questionnaires has been shown to help in the creation of personalised predictors for next-step forecasting, which can be crucial in giving preemptive interventions. In this work, we propose a framework that aims to mitigate a common issue in EMA data - that some users contribute a bulk of the data while most users contribute too little. Our proposed framework aims to discover a ‘useful’ neighbourhood of ‘long’ users for ‘short’ ones, by optimising for the error of the user-level predictors for users with little data available for learning. For each user-level predictor, this is done by iteratively adding the next-most-similar long user from a similarity-ordered list as long as the error of the learned model does not increase. This method is compared against a baseline that exploits all available data for the long users, as well as an exhaustive search model that retains only only those users that yield the lowest error predictor. We also explore multiple ways to define similarity, and study the impact of each on the two search strategies on two EMA datasets from an mHealth app ‘TrackYourDiabetes’ - with users from Bulgaria and Spain. Our experiments over the two datasets show a 2.5% and 36.6% improvement respectively for RMSE while using on average 42.8% and 69.9% less data than the baseline method.
@inproceedings{9474685,
abstract = {Mobile Health (mhealth) applications are increasing in popularity, and the collection of disease-specific time series data using Ecological Momentary Assessment (EMA) questionnaires has been shown to help in the creation of personalised predictors for next-step forecasting, which can be crucial in giving preemptive interventions. In this work, we propose a framework that aims to mitigate a common issue in EMA data - that some users contribute a bulk of the data while most users contribute too little. Our proposed framework aims to discover a ‘useful’ neighbourhood of ‘long’ users for ‘short’ ones, by optimising for the error of the user-level predictors for users with little data available for learning. For each user-level predictor, this is done by iteratively adding the next-most-similar long user from a similarity-ordered list as long as the error of the learned model does not increase. This method is compared against a baseline that exploits all available data for the long users, as well as an exhaustive search model that retains only only those users that yield the lowest error predictor. We also explore multiple ways to define similarity, and study the impact of each on the two search strategies on two EMA datasets from an mHealth app ‘TrackYourDiabetes’ - with users from Bulgaria and Spain. Our experiments over the two datasets show a 2.5% and 36.6% improvement respectively for RMSE while using on average 42.8% and 69.9% less data than the baseline method.},
added-at = {2021-07-13T15:37:35.000+0200},
author = {Unnikrishnan, Vishnu and Shah, Yash and Schleicher, Miro and Fernández-Viadero, Carlos and Strandzheva, Mirela and Velikova, Doroteya and Dimitrov, Plamen and Pryss, Ruediger and Schobel, Johannes and Schlee, Winfried and Spiliopoulou, Myra},
biburl = {https://www.bibsonomy.org/bibtex/24d5608b48658856ac17cbd2d401d5fee/kmd-ovgu},
booktitle = {2021 IEEE 34th International Symposium on Computer-Based Medical Systems (CBMS)},
doi = {10.1109/CBMS52027.2021.00080},
interhash = {90c9885d59b06b98398cae39923dfbb9},
intrahash = {4d5608b48658856ac17cbd2d401d5fee},
issn = {2372-9198},
keywords = {kmd medical_mining myown tinnitus},
month = {June},
pages = {295-300},
timestamp = {2021-07-13T15:37:35.000+0200},
title = {Love thy Neighbours: A Framework for Error-Driven Discovery of Useful Neighbourhoods for One-Step Forecasts on EMA data},
year = 2021
}