Text retrieval (TR) techniques have been widely used to support concept and bug location. When locating bugs, developers often formulate queries based on the bug descriptions. More than that, a large body of research uses bug descriptions to evaluate bug location techniques using TR. The implicit assumption is that the bug descriptions and the relevant source code files share important words. In this paper, we present an empirical study that explores this conjecture. We found that bug reports share more terms with the patched classes than with the other classes in the system. Furthermore, we found that the class names are more likely to share terms with the bug descriptions than other code locations, while more verbose parts of the code (e.g., comments) will share more words. We also found that the shared terms may be better predictors for bug location than some TR techniques.
%0 Conference Paper
%1 Moreno2013
%A Moreno, Laura
%A Bandara, Wathsala
%A Haiduc, Sonia
%A Marcus, Andrian
%B 29th IEEE International Conference on Software Maintenance
%D 2013
%K vocabulary identifier
%P 452--455
%T On the Relationship between the Vocabulary of Bug Reports and Source Code
%X Text retrieval (TR) techniques have been widely used to support concept and bug location. When locating bugs, developers often formulate queries based on the bug descriptions. More than that, a large body of research uses bug descriptions to evaluate bug location techniques using TR. The implicit assumption is that the bug descriptions and the relevant source code files share important words. In this paper, we present an empirical study that explores this conjecture. We found that bug reports share more terms with the patched classes than with the other classes in the system. Furthermore, we found that the class names are more likely to share terms with the bug descriptions than other code locations, while more verbose parts of the code (e.g., comments) will share more words. We also found that the shared terms may be better predictors for bug location than some TR techniques.
@inproceedings{Moreno2013,
abstract = {Text retrieval (TR) techniques have been widely used to support concept and bug location. When locating bugs, developers often formulate queries based on the bug descriptions. More than that, a large body of research uses bug descriptions to evaluate bug location techniques using TR. The implicit assumption is that the bug descriptions and the relevant source code files share important words. In this paper, we present an empirical study that explores this conjecture. We found that bug reports share more terms with the patched classes than with the other classes in the system. Furthermore, we found that the class names are more likely to share terms with the bug descriptions than other code locations, while more verbose parts of the code (e.g., comments) will share more words. We also found that the shared terms may be better predictors for bug location than some TR techniques.},
added-at = {2014-01-08T12:16:44.000+0100},
author = {Moreno, Laura and Bandara, Wathsala and Haiduc, Sonia and Marcus, Andrian},
biburl = {https://www.bibsonomy.org/bibtex/2f883d3f3fdfd47f26037563ac58c9a27/sjbutler},
booktitle = {29th IEEE International Conference on Software Maintenance},
file = {:Moreno 2013 Vocabulary of bug reports and source code.pdf:PDF},
interhash = {42f94a8882e6c3034fec3640d3bb5d7b},
intrahash = {f883d3f3fdfd47f26037563ac58c9a27},
keywords = {vocabulary identifier},
pages = {452--455},
timestamp = {2014-01-08T12:16:44.000+0100},
title = {On the Relationship between the Vocabulary of Bug Reports and Source Code},
year = 2013
}