High accuracy in cancer prediction is important to improve the quality of the treatment and to improve the
rate of survivability of patients. As the data volume is increasing rapidly in the healthcare research, the
analytical challenge exists in double. The use of effective sampling technique in classification algorithms
always yields good prediction accuracy. The SEER public use cancer database provides various prominent
class labels for prognosis prediction. The main objective of this paper is to find the effect of sampling
techniques in classifying the prognosis variable and propose an ideal sampling method based on the
outcome of the experimentation. In the first phase of this work the traditional random sampling and
stratified sampling techniques have been used. At the next level the balanced stratified sampling with
variations as per the choice of the prognosis class labels have been tested. Much of the initial time has been
focused on performing the pre-processing of the SEER data set. The classification model for
experimentation has been built using the breast cancer, respiratory cancer and mixed cancer data sets with
three traditional classifiers namely Decision Tree, Naïve Bayes and K-Nearest Neighbour. The three
prognosis factors survival, stage and metastasis have been used as class labels for experimental
comparisons. The results shows a steady increase in the prediction accuracy of balanced stratified model
as the sample size increases, but the traditional approach fluctuates before the optimum results
%0 Journal Article
%1 noauthororeditor
%A J.S.Saleema,
%A N.Bhagawathi,
%A S.Monica,
%A Shenoy, P.Deepa
%A K.R.Venugopal,
%A L.M.Patnaik,
%D 2014
%J International Journal on Soft Computing, Artificial Intelligence and Applications (IJSCAI)
%K Cancer Classification Pre-processing Sampling
%N 1
%P 10
%R 10.5121/ijscai.2014.3102
%T Cancer Prognosis Prediction Using Balanced Stratified Sampling
%U http://airccse.org/journal/ijscai/papers/3114ijscai02.pdf
%V 3
%X High accuracy in cancer prediction is important to improve the quality of the treatment and to improve the
rate of survivability of patients. As the data volume is increasing rapidly in the healthcare research, the
analytical challenge exists in double. The use of effective sampling technique in classification algorithms
always yields good prediction accuracy. The SEER public use cancer database provides various prominent
class labels for prognosis prediction. The main objective of this paper is to find the effect of sampling
techniques in classifying the prognosis variable and propose an ideal sampling method based on the
outcome of the experimentation. In the first phase of this work the traditional random sampling and
stratified sampling techniques have been used. At the next level the balanced stratified sampling with
variations as per the choice of the prognosis class labels have been tested. Much of the initial time has been
focused on performing the pre-processing of the SEER data set. The classification model for
experimentation has been built using the breast cancer, respiratory cancer and mixed cancer data sets with
three traditional classifiers namely Decision Tree, Naïve Bayes and K-Nearest Neighbour. The three
prognosis factors survival, stage and metastasis have been used as class labels for experimental
comparisons. The results shows a steady increase in the prediction accuracy of balanced stratified model
as the sample size increases, but the traditional approach fluctuates before the optimum results
@article{noauthororeditor,
abstract = {High accuracy in cancer prediction is important to improve the quality of the treatment and to improve the
rate of survivability of patients. As the data volume is increasing rapidly in the healthcare research, the
analytical challenge exists in double. The use of effective sampling technique in classification algorithms
always yields good prediction accuracy. The SEER public use cancer database provides various prominent
class labels for prognosis prediction. The main objective of this paper is to find the effect of sampling
techniques in classifying the prognosis variable and propose an ideal sampling method based on the
outcome of the experimentation. In the first phase of this work the traditional random sampling and
stratified sampling techniques have been used. At the next level the balanced stratified sampling with
variations as per the choice of the prognosis class labels have been tested. Much of the initial time has been
focused on performing the pre-processing of the SEER data set. The classification model for
experimentation has been built using the breast cancer, respiratory cancer and mixed cancer data sets with
three traditional classifiers namely Decision Tree, Naïve Bayes and K-Nearest Neighbour. The three
prognosis factors survival, stage and metastasis have been used as class labels for experimental
comparisons. The results shows a steady increase in the prediction accuracy of balanced stratified model
as the sample size increases, but the traditional approach fluctuates before the optimum results},
added-at = {2018-01-16T04:59:08.000+0100},
author = {J.S.Saleema and N.Bhagawathi and S.Monica and Shenoy, P.Deepa and K.R.Venugopal and L.M.Patnaik},
biburl = {https://www.bibsonomy.org/bibtex/2f549e6f7a444c9fead9a7624fa927c15/leninsha},
doi = {10.5121/ijscai.2014.3102},
interhash = {aeffa00a9d061ef679171bf9ae85bd47},
intrahash = {f549e6f7a444c9fead9a7624fa927c15},
issn = {2319 - 1015},
journal = {International Journal on Soft Computing, Artificial Intelligence and Applications (IJSCAI)},
keywords = {Cancer Classification Pre-processing Sampling},
language = {English},
month = {February},
number = 1,
pages = 10,
timestamp = {2018-01-16T04:59:08.000+0100},
title = {Cancer Prognosis Prediction Using Balanced Stratified Sampling },
url = {http://airccse.org/journal/ijscai/papers/3114ijscai02.pdf},
volume = 3,
year = 2014
}