Effective methods for identification of software defects help minimize the business costs of software development. Classification methods can be used to perform software defect prediction. When cost-sensitive methods are used, the predictions are optimized for business cost. The data sets used as input for these methods typically suffer from the class imbalance problem. That is, there are many more defect-free code examples than defective code examples to learn from. This negatively impacts the classifier’s ability to correctly predict defective code examples. Cost-sensitive classification can also be used to mitigate the affects of the class imbalance problem by setting the costs to reflect the level of imbalance in the training data set. Through an experimental process, we have developed a method for combining these two different types of costs. We demonstrate that by using our proposed approach, we can produce more cost effective predictions than several recent cost-sensitive methods used for software defect prediction. Furthermore, we examine the software defect prediction models built by our method and present the discovered insights.
%0 Generic
%1 noauthororeditor
%A Siers, Michael J.
%A Islam, Md Zahidul
%B In Proceedings of the 12th International Conference on Advanced Data Mining and Applications
%D 2016
%I Springer International Publishing
%K class-imbalance classification cost-sensitive forest myown software
%P 156-171
%T Addressing Class Imbalance and Cost Sensitivity in Software Defect Prediction by Combining Domain Costs and Balancing Costs
%U http://link.springer.com/chapter/10.1007/978-3-319-49586-6_11
%X Effective methods for identification of software defects help minimize the business costs of software development. Classification methods can be used to perform software defect prediction. When cost-sensitive methods are used, the predictions are optimized for business cost. The data sets used as input for these methods typically suffer from the class imbalance problem. That is, there are many more defect-free code examples than defective code examples to learn from. This negatively impacts the classifier’s ability to correctly predict defective code examples. Cost-sensitive classification can also be used to mitigate the affects of the class imbalance problem by setting the costs to reflect the level of imbalance in the training data set. Through an experimental process, we have developed a method for combining these two different types of costs. We demonstrate that by using our proposed approach, we can produce more cost effective predictions than several recent cost-sensitive methods used for software defect prediction. Furthermore, we examine the software defect prediction models built by our method and present the discovered insights.
@conference{noauthororeditor,
abstract = {Effective methods for identification of software defects help minimize the business costs of software development. Classification methods can be used to perform software defect prediction. When cost-sensitive methods are used, the predictions are optimized for business cost. The data sets used as input for these methods typically suffer from the class imbalance problem. That is, there are many more defect-free code examples than defective code examples to learn from. This negatively impacts the classifier’s ability to correctly predict defective code examples. Cost-sensitive classification can also be used to mitigate the affects of the class imbalance problem by setting the costs to reflect the level of imbalance in the training data set. Through an experimental process, we have developed a method for combining these two different types of costs. We demonstrate that by using our proposed approach, we can produce more cost effective predictions than several recent cost-sensitive methods used for software defect prediction. Furthermore, we examine the software defect prediction models built by our method and present the discovered insights.
},
added-at = {2017-01-06T05:09:22.000+0100},
author = {Siers, Michael J. and Islam, Md Zahidul},
biburl = {https://www.bibsonomy.org/bibtex/2aa17e4324ccb052c4410c193654f37af/mikesiers},
booktitle = {In Proceedings of the 12th International Conference on Advanced Data Mining and Applications},
interhash = {9a83a6ce3dcb8a6423fdb05c4f10331a},
intrahash = {aa17e4324ccb052c4410c193654f37af},
keywords = {class-imbalance classification cost-sensitive forest myown software},
month = {December},
pages = {156-171},
publisher = {Springer International Publishing},
series = {LNCS},
timestamp = {2017-01-06T05:09:22.000+0100},
title = {Addressing Class Imbalance and Cost Sensitivity in Software Defect Prediction by Combining Domain Costs and Balancing Costs},
url = {http://link.springer.com/chapter/10.1007/978-3-319-49586-6_11},
year = 2016
}