We develop a new model and algorithms for machine learning-based learning analytics, which estimate a learner's knowledge of the concepts underlying a domain, and content analytics, which estimate the relationships among a collection of questions and those concepts. Our model represents the probability that a learner provides the correct response to a question in terms of three factors: their understanding of a set of underlying concepts, the concepts involved in each question, and each question's intrinsic difficulty. We estimate these factors given the graded responses to a collection of questions. The underlying estimation problem is ill-posed in general, especially when only a subset of the questions are answered. The key observation that enables a well-posed solution is the fact that typical educational domains of interest involve only a small number of key concepts. Leveraging this observation, we develop both a bi-convex maximum-likelihood-based solution and a Bayesian solution to the resulting SPARse Factor Analysis (SPARFA) problem. We also incorporate user-defined tags on questions to facilitate the interpretability of the estimated factors. Experiments with synthetic and real-world data demonstrate the efficacy of our approach. Finally, we make a connection between SPARFA and noisy, binary-valued (1-bit) dictionary learning that is of independent interest.
%0 Journal Article
%1 citeulike:14074449
%A Lan, Andrew S.
%A Waters, Andrew E.
%A Studer, Christoph
%A Baraniuk, Richard G.
%D 2014
%I JMLR.org
%J J. Mach. Learn. Res.
%K dimensionality-reduction personalized-learning
%N 1
%P 1959--2008
%T Sparse Factor Analysis for Learning and Content Analytics
%U http://portal.acm.org/citation.cfm?id=2670314
%V 15
%X We develop a new model and algorithms for machine learning-based learning analytics, which estimate a learner's knowledge of the concepts underlying a domain, and content analytics, which estimate the relationships among a collection of questions and those concepts. Our model represents the probability that a learner provides the correct response to a question in terms of three factors: their understanding of a set of underlying concepts, the concepts involved in each question, and each question's intrinsic difficulty. We estimate these factors given the graded responses to a collection of questions. The underlying estimation problem is ill-posed in general, especially when only a subset of the questions are answered. The key observation that enables a well-posed solution is the fact that typical educational domains of interest involve only a small number of key concepts. Leveraging this observation, we develop both a bi-convex maximum-likelihood-based solution and a Bayesian solution to the resulting SPARse Factor Analysis (SPARFA) problem. We also incorporate user-defined tags on questions to facilitate the interpretability of the estimated factors. Experiments with synthetic and real-world data demonstrate the efficacy of our approach. Finally, we make a connection between SPARFA and noisy, binary-valued (1-bit) dictionary learning that is of independent interest.
@article{citeulike:14074449,
abstract = {{We develop a new model and algorithms for machine learning-based learning analytics, which estimate a learner's knowledge of the concepts underlying a domain, and content analytics, which estimate the relationships among a collection of questions and those concepts. Our model represents the probability that a learner provides the correct response to a question in terms of three factors: their understanding of a set of underlying concepts, the concepts involved in each question, and each question's intrinsic difficulty. We estimate these factors given the graded responses to a collection of questions. The underlying estimation problem is ill-posed in general, especially when only a subset of the questions are answered. The key observation that enables a well-posed solution is the fact that typical educational domains of interest involve only a small number of key concepts. Leveraging this observation, we develop both a bi-convex maximum-likelihood-based solution and a Bayesian solution to the resulting SPARse Factor Analysis (SPARFA) problem. We also incorporate user-defined tags on questions to facilitate the interpretability of the estimated factors. Experiments with synthetic and real-world data demonstrate the efficacy of our approach. Finally, we make a connection between SPARFA and noisy, binary-valued (1-bit) dictionary learning that is of independent interest.}},
added-at = {2018-03-19T12:24:51.000+0100},
author = {Lan, Andrew S. and Waters, Andrew E. and Studer, Christoph and Baraniuk, Richard G.},
biburl = {https://www.bibsonomy.org/bibtex/28fc024b20b21f32dc83723acb39cf91b/aho},
citeulike-article-id = {14074449},
citeulike-linkout-0 = {http://portal.acm.org/citation.cfm?id=2670314},
interhash = {bc9ba29cf0db08c5aab7f16e6e7bb65a},
intrahash = {8fc024b20b21f32dc83723acb39cf91b},
issn = {1532-4435},
journal = {J. Mach. Learn. Res.},
keywords = {dimensionality-reduction personalized-learning},
month = jan,
number = 1,
pages = {1959--2008},
posted-at = {2016-06-23 02:00:59},
priority = {2},
publisher = {JMLR.org},
timestamp = {2018-03-19T12:24:51.000+0100},
title = {{Sparse Factor Analysis for Learning and Content Analytics}},
url = {http://portal.acm.org/citation.cfm?id=2670314},
volume = 15,
year = 2014
}