Topic modeling has gained a lot of popularity as a means for identifying and describing the topical structure of textual documents and whole corpora. There are, however, many document collections such as qualitative studies in the digital humanities that cannot easily benefit from this technology. The limited size of those corpora leads to poor quality topic models. Higher quality topic models can be learned by incorporating additional domain-specific documents with similar topical content. This, however, requires finding or even manually composing such corpora, requiring considerable effort. For solving this problem, we propose a fully automated adaptable process of
Beschreibung
Topic Cropping: Leveraging Latent Topics for the Analysis of Small Corpora - Springer
%0 Conference Paper
%1 tran2013topic
%A Tran, Nam Khanh
%A Zerr, Sergej
%A Bischoff, Kerstin
%A Niederée, Claudia
%A Krestel, Ralf
%B Research and Advanced Technology for Digital Libraries
%D 2013
%E Aalberg, Trond
%E Papatheodorou, Christos
%E Dobreva, Milena
%E Tsakonas, Giannis
%E Farrugia, CharlesJ.
%I Springer Berlin Heidelberg
%K gutearbeit myown
%P 297-308
%R 10.1007/978-3-642-40501-3_30
%T Topic Cropping: Leveraging Latent Topics for the Analysis of Small Corpora
%U http://dx.doi.org/10.1007/978-3-642-40501-3_30
%V 8092
%X Topic modeling has gained a lot of popularity as a means for identifying and describing the topical structure of textual documents and whole corpora. There are, however, many document collections such as qualitative studies in the digital humanities that cannot easily benefit from this technology. The limited size of those corpora leads to poor quality topic models. Higher quality topic models can be learned by incorporating additional domain-specific documents with similar topical content. This, however, requires finding or even manually composing such corpora, requiring considerable effort. For solving this problem, we propose a fully automated adaptable process of
%@ 978-3-642-40500-6
@inproceedings{tran2013topic,
abstract = {Topic modeling has gained a lot of popularity as a means for identifying and describing the topical structure of textual documents and whole corpora. There are, however, many document collections such as qualitative studies in the digital humanities that cannot easily benefit from this technology. The limited size of those corpora leads to poor quality topic models. Higher quality topic models can be learned by incorporating additional domain-specific documents with similar topical content. This, however, requires finding or even manually composing such corpora, requiring considerable effort. For solving this problem, we propose a fully automated adaptable process of },
added-at = {2013-12-17T14:02:38.000+0100},
author = {Tran, Nam Khanh and Zerr, Sergej and Bischoff, Kerstin and Niederée, Claudia and Krestel, Ralf},
biburl = {https://www.bibsonomy.org/bibtex/24738e67b6aa750de5b5e59068cf6b4fa/ntran},
booktitle = {Research and Advanced Technology for Digital Libraries},
description = {Topic Cropping: Leveraging Latent Topics for the Analysis of Small Corpora - Springer},
doi = {10.1007/978-3-642-40501-3_30},
editor = {Aalberg, Trond and Papatheodorou, Christos and Dobreva, Milena and Tsakonas, Giannis and Farrugia, CharlesJ.},
interhash = {dfa297ff328120800de2ec909127ab88},
intrahash = {4738e67b6aa750de5b5e59068cf6b4fa},
isbn = {978-3-642-40500-6},
keywords = {gutearbeit myown},
pages = {297-308},
publisher = {Springer Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
timestamp = {2013-12-17T14:02:38.000+0100},
title = {Topic Cropping: Leveraging Latent Topics for the Analysis of Small Corpora},
url = {http://dx.doi.org/10.1007/978-3-642-40501-3_30},
volume = 8092,
year = 2013
}