P. Kluegl, A. Hotho, and F. Puppe. KI 2010: Advances in Artificial Intelligence, 33rd Annual German Conference on AI, page 40-47. Springer, (2010)
Abstract
The accurate extraction of scholarly reference information from scientific publications is essential for many useful applications like BibTeX management systems or citation analysis. Automatic extraction methods suffer from the heterogeneity of reference notation, no matter wether the extraction model was handcrafted or learnt from labeled data. However, references of the same paper or journal are usually homogeneous. We exploit this local consistency with a novel approach. Given some initial information from such a reference section, we try to derived generalized patterns. These patterns are used to create a local model of the current document. The local model helps to identify errors and to improve the extracted information incrementally during the extraction process. Our approach is implemented with handcrafted transformation rules working on a meta-level being able to correct the information independent of the applied layout style. The experimental results compete very well with the state of the art methods and show an extremely high performance on consistent reference sections.
%0 Conference Paper
%1 2010-KI-KHP
%A Kluegl, Peter
%A Hotho, Andreas
%A Puppe, Frank
%B KI 2010: Advances in Artificial Intelligence, 33rd Annual German Conference on AI
%D 2010
%E Dillmann, Rüdiger
%E Beyerer, Jürgen
%E Hanebeck, Uwe D.
%E Schultz, Tanja
%I Springer
%K adaptive extraction ie information local metalevel myown references rule-based
%P 40-47
%T Local Adaptive Extraction of References
%U http://ki.informatik.uni-wuerzburg.de/papers/pkluegl/2010-KI-LAER.pdf
%X The accurate extraction of scholarly reference information from scientific publications is essential for many useful applications like BibTeX management systems or citation analysis. Automatic extraction methods suffer from the heterogeneity of reference notation, no matter wether the extraction model was handcrafted or learnt from labeled data. However, references of the same paper or journal are usually homogeneous. We exploit this local consistency with a novel approach. Given some initial information from such a reference section, we try to derived generalized patterns. These patterns are used to create a local model of the current document. The local model helps to identify errors and to improve the extracted information incrementally during the extraction process. Our approach is implemented with handcrafted transformation rules working on a meta-level being able to correct the information independent of the applied layout style. The experimental results compete very well with the state of the art methods and show an extremely high performance on consistent reference sections.
@inproceedings{2010-KI-KHP,
abstract = {The accurate extraction of scholarly reference information from scientific publications is essential for many useful applications like BibTeX management systems or citation analysis. Automatic extraction methods suffer from the heterogeneity of reference notation, no matter wether the extraction model was handcrafted or learnt from labeled data. However, references of the same paper or journal are usually homogeneous. We exploit this local consistency with a novel approach. Given some initial information from such a reference section, we try to derived generalized patterns. These patterns are used to create a local model of the current document. The local model helps to identify errors and to improve the extracted information incrementally during the extraction process. Our approach is implemented with handcrafted transformation rules working on a meta-level being able to correct the information independent of the applied layout style. The experimental results compete very well with the state of the art methods and show an extremely high performance on consistent reference sections. },
added-at = {2011-01-20T14:24:08.000+0100},
author = {Kluegl, Peter and Hotho, Andreas and Puppe, Frank},
biburl = {https://www.bibsonomy.org/bibtex/2174791d9668705cbf0052224694f5366/pkluegl},
booktitle = {KI 2010: Advances in Artificial Intelligence, 33rd Annual German Conference on AI},
editor = {Dillmann, Rüdiger and Beyerer, Jürgen and Hanebeck, Uwe D. and Schultz, Tanja},
interhash = {b6a5b2a32346b60eac912ee96e681dce},
intrahash = {174791d9668705cbf0052224694f5366},
keywords = {adaptive extraction ie information local metalevel myown references rule-based},
pages = {40-47},
publisher = {Springer},
series = { LNAI 6359},
timestamp = {2011-01-20T14:24:08.000+0100},
title = {Local Adaptive Extraction of References},
url = {http://ki.informatik.uni-wuerzburg.de/papers/pkluegl/2010-KI-LAER.pdf},
year = 2010
}