Modern machine learning methods are critical to the development of
large-scale personalized learning systems that cater directly to the needs of
individual learners. The recently developed SPARse Factor Analysis (SPARFA)
framework provides a new statistical model and algorithms for machine
learning-based learning analytics, which estimate a learner's knowledge of the
latent concepts underlying a domain, and content analytics, which estimate the
relationships among a collection of questions and the latent concepts. SPARFA
estimates these quantities given only the binary-valued graded responses to a
collection of questions. In order to better interpret the estimated latent
concepts, SPARFA relies on a post-processing step that utilizes user-defined
tags (e.g., topics or keywords) available for each question. In this paper, we
relax the need for user-defined tags by extending SPARFA to jointly process
both graded learner responses and the text of each question and its associated
answer(s) or other feedback. Our purely data-driven approach (i) enhances the
interpretability of the estimated latent concepts without the need of
explicitly generating a set of tags or performing a post-processing step, (ii)
improves the prediction performance of SPARFA, and (iii) scales to large
test/assessments where human annotation would prove burdensome. We demonstrate
the efficacy of the proposed approach on two real educational datasets.
Description
Joint Topic Modeling and Factor Analysis of Textual Information and
Graded Response Data
%0 Generic
%1 lan2013joint
%A Lan, Andrew S.
%A Studer, Christoph
%A Waters, Andrew E.
%A Baraniuk, Richard G.
%D 2013
%K joint model topic toread
%T Joint Topic Modeling and Factor Analysis of Textual Information and
Graded Response Data
%U http://arxiv.org/abs/1305.1956
%X Modern machine learning methods are critical to the development of
large-scale personalized learning systems that cater directly to the needs of
individual learners. The recently developed SPARse Factor Analysis (SPARFA)
framework provides a new statistical model and algorithms for machine
learning-based learning analytics, which estimate a learner's knowledge of the
latent concepts underlying a domain, and content analytics, which estimate the
relationships among a collection of questions and the latent concepts. SPARFA
estimates these quantities given only the binary-valued graded responses to a
collection of questions. In order to better interpret the estimated latent
concepts, SPARFA relies on a post-processing step that utilizes user-defined
tags (e.g., topics or keywords) available for each question. In this paper, we
relax the need for user-defined tags by extending SPARFA to jointly process
both graded learner responses and the text of each question and its associated
answer(s) or other feedback. Our purely data-driven approach (i) enhances the
interpretability of the estimated latent concepts without the need of
explicitly generating a set of tags or performing a post-processing step, (ii)
improves the prediction performance of SPARFA, and (iii) scales to large
test/assessments where human annotation would prove burdensome. We demonstrate
the efficacy of the proposed approach on two real educational datasets.
@misc{lan2013joint,
abstract = {Modern machine learning methods are critical to the development of
large-scale personalized learning systems that cater directly to the needs of
individual learners. The recently developed SPARse Factor Analysis (SPARFA)
framework provides a new statistical model and algorithms for machine
learning-based learning analytics, which estimate a learner's knowledge of the
latent concepts underlying a domain, and content analytics, which estimate the
relationships among a collection of questions and the latent concepts. SPARFA
estimates these quantities given only the binary-valued graded responses to a
collection of questions. In order to better interpret the estimated latent
concepts, SPARFA relies on a post-processing step that utilizes user-defined
tags (e.g., topics or keywords) available for each question. In this paper, we
relax the need for user-defined tags by extending SPARFA to jointly process
both graded learner responses and the text of each question and its associated
answer(s) or other feedback. Our purely data-driven approach (i) enhances the
interpretability of the estimated latent concepts without the need of
explicitly generating a set of tags or performing a post-processing step, (ii)
improves the prediction performance of SPARFA, and (iii) scales to large
test/assessments where human annotation would prove burdensome. We demonstrate
the efficacy of the proposed approach on two real educational datasets.},
added-at = {2013-10-18T13:27:38.000+0200},
author = {Lan, Andrew S. and Studer, Christoph and Waters, Andrew E. and Baraniuk, Richard G.},
biburl = {https://www.bibsonomy.org/bibtex/22a8df43258181ed85e5d43b489fd45fb/hotho},
description = {Joint Topic Modeling and Factor Analysis of Textual Information and
Graded Response Data},
interhash = {911707523671c994e5c3fe63c3df5c4a},
intrahash = {2a8df43258181ed85e5d43b489fd45fb},
keywords = {joint model topic toread},
note = {cite arxiv:1305.1956},
timestamp = {2013-10-18T13:27:38.000+0200},
title = {Joint Topic Modeling and Factor Analysis of Textual Information and
Graded Response Data},
url = {http://arxiv.org/abs/1305.1956},
year = 2013
}