There is a huge amount of historical documents in libraries and in
various National Archives that have not been exploited electronically.
Although automatic reading of complete pages remains, in most cases,
a long-term objective, tasks such as word spotting, text/image alignment,
authentication and extraction of specific fields are in use today.
For all these tasks, a major step is document segmentation into text
lines. Because of the low quality and the complexity of these documents
(background noise, artifacts due to aging, interfering lines),automatic
text line segmentation remains an open research field. The objective
of this paper is to present a survey of existing methods, developed
during the last decade, and dedicated to documents of historical
interest.
%0 Journal Article
%1 Likforman-Sulem2007
%A Likforman-Sulem, Laurence
%A Zahour, Abderrazak
%A Taconet, Bruno
%D 2007
%J International Journal on Document Analysis and Recognition (IJDAR)
%K imported
%N 2-4
%P 123-138
%R 10.1007/s10032-006-0023-z
%T Text line segmentation of historical documents: a survey
%U http://arxiv.org/abs/0704.1267v1
%V 9
%X There is a huge amount of historical documents in libraries and in
various National Archives that have not been exploited electronically.
Although automatic reading of complete pages remains, in most cases,
a long-term objective, tasks such as word spotting, text/image alignment,
authentication and extraction of specific fields are in use today.
For all these tasks, a major step is document segmentation into text
lines. Because of the low quality and the complexity of these documents
(background noise, artifacts due to aging, interfering lines),automatic
text line segmentation remains an open research field. The objective
of this paper is to present a survey of existing methods, developed
during the last decade, and dedicated to documents of historical
interest.
@article{Likforman-Sulem2007,
abstract = {There is a huge amount of historical documents in libraries and in
various National Archives that have not been exploited electronically.
Although automatic reading of complete pages remains, in most cases,
a long-term objective, tasks such as word spotting, text/image alignment,
authentication and extraction of specific fields are in use today.
For all these tasks, a major step is document segmentation into text
lines. Because of the low quality and the complexity of these documents
(background noise, artifacts due to aging, interfering lines),automatic
text line segmentation remains an open research field. The objective
of this paper is to present a survey of existing methods, developed
during the last decade, and dedicated to documents of historical
interest.},
added-at = {2011-03-27T19:47:06.000+0200},
author = {Likforman-Sulem, Laurence and Zahour, Abderrazak and Taconet, Bruno},
bibsource = {DBLP, http://dblp.uni-trier.de},
biburl = {https://www.bibsonomy.org/bibtex/25ca477bebf9df3de0d90bb00507023c4/cocus},
doi = {10.1007/s10032-006-0023-z},
ee = {http://dx.doi.org/10.1007/s10032-006-0023-z},
file = {:./sulem2006textlinesegmentationforhistoricaldocuments.pdf:PDF},
interhash = {d7329ddc699c7c77b1cad6854eee27db},
intrahash = {5ca477bebf9df3de0d90bb00507023c4},
journal = {International Journal on Document Analysis and Recognition ({IJDAR})},
keywords = {imported},
number = {2-4},
pages = {123-138},
timestamp = {2011-03-27T19:47:08.000+0200},
title = {Text line segmentation of historical documents: a survey},
url = {http://arxiv.org/abs/0704.1267v1},
volume = 9,
year = 2007
}