@inproceedings{cha05, abstract = {We present an integrated framework for the document conversion from legacy formats to XML format. We describe the LegDoC project, aimed at automating the conversion of layout annotations layout-oriented formats like PDF, PS and HTML to semantic-oriented annotations. A toolkit of different components covers complementary techniques the logical document analysis and semantic annotations with the methods of machine learning. We use a real case conversion project as a driving example to exemplify different techniques implemented in the project.}, added-at = {2009-01-14T00:43:43.000+0100}, address = {Vienna, Austria}, author = {Chanod, Jean-Pierre and Chidlovskii, Boris and D\'ejean, Herv\'e and Fambon, Olivier and Fuselier, J\'er\^ome and Jacquin, Thierry and Meunier, Jean-Luc}, biburl = {http://www.bibsonomy.org/bibtex/2dc8f8e8078e4ee9552c95c682abca916/dret}, booktitle = {Proceedings of the 9th European Conference on Digital Libraries}, crossref = {ecdl2005}, description = {dret'd bibliography}, editor = {Rauber, Andreas and Christodoulakis, Stavros and {Min Tjoa}, A.}, index = {ECDL 2005}, interhash = {cc35878f613128decd4a7d92c1208869}, intrahash = {dc8f8e8078e4ee9552c95c682abca916}, keywords = {imported}, month = {September}, pages = {92-103}, publisher = {Springer-Verlag}, series = {Lecture Notes in Computer Science}, timestamp = {2009-01-14T00:43:43.000+0100}, title = {From Legacy Documents to XML: A Conversion Framework}, uri = {http://www.springerlink.com/link.asp?id=5xnqptg4hrdqmy3g}, volume = 3652, year = 2005 }