Existing research is unclear on how to generate lessons learned for defect prediction and effort estimation. Should we seek lessons that are global to multiple projects or just local to particular projects? This paper aims to comparatively evaluate local versus global lessons learned for effort estimation and defect prediction. We applied automated clustering tools to effort and defect datasets from the PROMISE repository. Rule learners generated lessons learned from all the data, from local projects, or just from each cluster. The results indicate that the lessons learned after combining small parts of different data sources (i.e., the clusters) were superior to either generalizations formed over all the data or local lessons formed from particular projects. We conclude that when researchers attempt to draw lessons from some historical data source, they should 1) ignore any existing local divisions into multiple sources, 2) cluster across all available data, then 3) restrict the learning of lessons to the clusters from other sources that are nearest to the test data.
Description
Local versus Global Lessons for Defect Prediction and Effort Estimation
%0 Journal Article
%1 10.1109/TSE.2012.83
%A Menzies, Tim
%A Butcher, Andrew
%A Cok, David
%A Marcus, Andrian
%A Layman, Lucas
%A Shull, Forrest
%A Turhan, Burak
%A Zimmermann, Thomas
%C Los Alamitos, CA, USA
%D 2013
%I IEEE Computer Society
%J IEEE Transactions on Software Engineering
%K myown
%N 6
%P 822-834
%R http://doi.ieeecomputersociety.org/10.1109/TSE.2012.83
%T Local versus Global Lessons for Defect Prediction and Effort Estimation
%V 39
%X Existing research is unclear on how to generate lessons learned for defect prediction and effort estimation. Should we seek lessons that are global to multiple projects or just local to particular projects? This paper aims to comparatively evaluate local versus global lessons learned for effort estimation and defect prediction. We applied automated clustering tools to effort and defect datasets from the PROMISE repository. Rule learners generated lessons learned from all the data, from local projects, or just from each cluster. The results indicate that the lessons learned after combining small parts of different data sources (i.e., the clusters) were superior to either generalizations formed over all the data or local lessons formed from particular projects. We conclude that when researchers attempt to draw lessons from some historical data source, they should 1) ignore any existing local divisions into multiple sources, 2) cluster across all available data, then 3) restrict the learning of lessons to the clusters from other sources that are nearest to the test data.
@article{10.1109/TSE.2012.83,
abstract = {Existing research is unclear on how to generate lessons learned for defect prediction and effort estimation. Should we seek lessons that are global to multiple projects or just local to particular projects? This paper aims to comparatively evaluate local versus global lessons learned for effort estimation and defect prediction. We applied automated clustering tools to effort and defect datasets from the PROMISE repository. Rule learners generated lessons learned from all the data, from local projects, or just from each cluster. The results indicate that the lessons learned after combining small parts of different data sources (i.e., the clusters) were superior to either generalizations formed over all the data or local lessons formed from particular projects. We conclude that when researchers attempt to draw lessons from some historical data source, they should 1) ignore any existing local divisions into multiple sources, 2) cluster across all available data, then 3) restrict the learning of lessons to the clusters from other sources that are nearest to the test data.},
added-at = {2015-09-17T19:26:51.000+0200},
address = {Los Alamitos, CA, USA},
author = {Menzies, Tim and Butcher, Andrew and Cok, David and Marcus, Andrian and Layman, Lucas and Shull, Forrest and Turhan, Burak and Zimmermann, Thomas},
biburl = {https://www.bibsonomy.org/bibtex/28f1443d25a0b519472c247fefdbf6c1d/burak.turhan},
description = {Local versus Global Lessons for Defect Prediction and Effort Estimation},
doi = {http://doi.ieeecomputersociety.org/10.1109/TSE.2012.83},
interhash = {e540ae7d58c1c7aef1b4b536cdcd6790},
intrahash = {8f1443d25a0b519472c247fefdbf6c1d},
issn = {0098-5589},
journal = {IEEE Transactions on Software Engineering},
keywords = {myown},
number = 6,
pages = {822-834},
publisher = {IEEE Computer Society},
timestamp = {2015-09-17T19:26:51.000+0200},
title = {Local versus Global Lessons for Defect Prediction and Effort Estimation},
volume = 39,
year = 2013
}