@article{1224718, title = {Topic discovery based on text mining techniques}, address = {Tarrytown, NY, USA}, author = {Aurora Pons-Porrata and Rafael Berlanga-Llavori and Jos\'{e} Ruiz-Shulcloper}, journal = {Inf. Process. Manage.}, number = {3}, pages = {752--768}, publisher = {Pergamon Press, Inc.}, url = {http://portal.acm.org/citation.cfm?id=1224718}, volume = {43}, year = {2007}, biburl = {http://www.bibsonomy.org/bibtex/25095275ddb32f4138ee5018a6a4f3fc6/renew}, description = {Topic discovery based on text mining techniques}, abstract = {In this paper, we present a topic discovery system aimed to reveal the implicit knowledge present in news streams. This knowledge is expressed as a hierarchy of topic/subtopics, where each topic contains the set of documents that are related to it and a summary extracted from these documents. Summaries so built are useful to browse and select topics of interest from the generated hierarchies. Our proposal consists of a new incremental hierarchical clustering algorithm, which combines both partitional and agglomerative approaches, taking the main benefits from them. Finally, a new summarization method based on Testor Theory has been proposed to build the topic summaries. Experimental results in the TDT2 collection demonstrate its usefulness and effectiveness not only as a topic detection system, but also as a classification and summarization tool.}, issn = {0306-4573}, doi = {http://dx.doi.org/10.1016/j.ipm.2006.06.001}, keywords = {detection mining text topic } }