Instructors routinely use automated assessment methods to evaluate the semantic qualities of student implementations and, sometimes, test suites. In this work, we distill a variety of automated assessment methods in the literature down to a pair of assessment models. We identify pathological assessment outcomes in each model that point to underlying methodological flaws. These theoretical flaws broadly threaten the validity of the techniques, and we actually observe them in multiple assignments of an introductory programming course. We propose adjustments that remedy these flaws and then demonstrate, on these same assignments, that our interventions improve the accuracy of assessment. We believe that with these adjustments, instructors can greatly improve the accuracy of automated assessment.
Description
Interesting discussion on how to ensure that test suite is correct and how to test test suites in courses where test suite is a part of the assigment
%0 Conference Paper
%1 Wrenn:2018:TT:3230977.3230999
%A Wrenn, John
%A Krishnamurthi, Shriram
%A Fisler, Kathi
%B Proceedings of the 2018 ACM Conference on International Computing Education Research
%C New York, NY, USA
%D 2018
%I ACM
%K automatic-assessment programming
%P 51--59
%R 10.1145/3230977.3230999
%T Who Tests the Testers?
%U http://doi.acm.org/10.1145/3230977.3230999
%X Instructors routinely use automated assessment methods to evaluate the semantic qualities of student implementations and, sometimes, test suites. In this work, we distill a variety of automated assessment methods in the literature down to a pair of assessment models. We identify pathological assessment outcomes in each model that point to underlying methodological flaws. These theoretical flaws broadly threaten the validity of the techniques, and we actually observe them in multiple assignments of an introductory programming course. We propose adjustments that remedy these flaws and then demonstrate, on these same assignments, that our interventions improve the accuracy of assessment. We believe that with these adjustments, instructors can greatly improve the accuracy of automated assessment.
%@ 978-1-4503-5628-2
@inproceedings{Wrenn:2018:TT:3230977.3230999,
abstract = {Instructors routinely use automated assessment methods to evaluate the semantic qualities of student implementations and, sometimes, test suites. In this work, we distill a variety of automated assessment methods in the literature down to a pair of assessment models. We identify pathological assessment outcomes in each model that point to underlying methodological flaws. These theoretical flaws broadly threaten the validity of the techniques, and we actually observe them in multiple assignments of an introductory programming course. We propose adjustments that remedy these flaws and then demonstrate, on these same assignments, that our interventions improve the accuracy of assessment. We believe that with these adjustments, instructors can greatly improve the accuracy of automated assessment.},
acmid = {3230999},
added-at = {2018-08-13T13:20:28.000+0200},
address = {New York, NY, USA},
author = {Wrenn, John and Krishnamurthi, Shriram and Fisler, Kathi},
biburl = {https://www.bibsonomy.org/bibtex/2b24dc9cde92b25c8ac8d9a46c7029a22/brusilovsky},
booktitle = {Proceedings of the 2018 ACM Conference on International Computing Education Research},
description = {Interesting discussion on how to ensure that test suite is correct and how to test test suites in courses where test suite is a part of the assigment},
doi = {10.1145/3230977.3230999},
interhash = {5c2bcf061f34c28138d9e6b79269186e},
intrahash = {b24dc9cde92b25c8ac8d9a46c7029a22},
isbn = {978-1-4503-5628-2},
keywords = {automatic-assessment programming},
location = {Espoo, Finland},
numpages = {9},
pages = {51--59},
publisher = {ACM},
series = {ICER '18},
timestamp = {2018-08-13T13:20:28.000+0200},
title = {Who Tests the Testers?},
url = {http://doi.acm.org/10.1145/3230977.3230999},
year = 2018
}