A. Sun, E. Lim, and W. Ng. Proceedings of the 4th international workshop on Web information and data management, page 96--99. New York, NY, USA, ACM, (2002)
DOI: 10.1145/584931.584952
Abstract
In web classification, web pages from one or more web sites are assigned to pre-defined categories according to their content. Since web pages are more than just plain text documents, web classification methods have to consider using other context features of web pages, such as hyperlinks and HTML tags. In this paper, we propose the use of Support Vector Machine (SVM) classifiers to classify web pages using both their text and context feature sets. We have experimented our web classification method on the WebKB data set. Compared with earlier Foil-Pilfs method on the same data set, our method has been shown to perform very well. We have also shown that the use of context features especially hyperlinks can improve the classification performance significantly.
%0 Conference Paper
%1 Sun:2002:WCU:584931.584952
%A Sun, Aixin
%A Lim, Ee-Peng
%A Ng, Wee-Keong
%B Proceedings of the 4th international workshop on Web information and data management
%C New York, NY, USA
%D 2002
%I ACM
%K anchor bachelor:2011:bachmann classification svm webpage
%P 96--99
%R 10.1145/584931.584952
%T Web classification using support vector machine
%U http://doi.acm.org/10.1145/584931.584952
%X In web classification, web pages from one or more web sites are assigned to pre-defined categories according to their content. Since web pages are more than just plain text documents, web classification methods have to consider using other context features of web pages, such as hyperlinks and HTML tags. In this paper, we propose the use of Support Vector Machine (SVM) classifiers to classify web pages using both their text and context feature sets. We have experimented our web classification method on the WebKB data set. Compared with earlier Foil-Pilfs method on the same data set, our method has been shown to perform very well. We have also shown that the use of context features especially hyperlinks can improve the classification performance significantly.
%@ 1-58113-593-9
@inproceedings{Sun:2002:WCU:584931.584952,
abstract = {In web classification, web pages from one or more web sites are assigned to pre-defined categories according to their content. Since web pages are more than just plain text documents, web classification methods have to consider using other context features of web pages, such as hyperlinks and HTML tags. In this paper, we propose the use of Support Vector Machine (SVM) classifiers to classify web pages using both their text and context feature sets. We have experimented our web classification method on the WebKB data set. Compared with earlier Foil-Pilfs method on the same data set, our method has been shown to perform very well. We have also shown that the use of context features especially hyperlinks can improve the classification performance significantly.},
acmid = {584952},
added-at = {2011-11-30T17:32:28.000+0100},
address = {New York, NY, USA},
author = {Sun, Aixin and Lim, Ee-Peng and Ng, Wee-Keong},
biburl = {https://www.bibsonomy.org/bibtex/2b09991a8b56bb1a3d1f96e9d4b44e221/telekoma},
booktitle = {Proceedings of the 4th international workshop on Web information and data management},
description = {Web classification using support vector machine},
doi = {10.1145/584931.584952},
interhash = {ef7f7833d89a558ddfa3423bb5af6451},
intrahash = {b09991a8b56bb1a3d1f96e9d4b44e221},
isbn = {1-58113-593-9},
keywords = {anchor bachelor:2011:bachmann classification svm webpage},
location = {McLean, Virginia, USA},
numpages = {4},
pages = {96--99},
publisher = {ACM},
series = {WIDM '02},
timestamp = {2011-11-30T17:32:28.000+0100},
title = {Web classification using support vector machine},
url = {http://doi.acm.org/10.1145/584931.584952},
year = 2002
}