@mastersthesis{unhammer2010sf, abstract = { This thesis describes a knowledge-based method of automatic phrase alignment, with the aim of annotating a multilingual treebank for linguistic studies. Most current phrase alignment methods are based on extracting many-to-many-links from N-gram tables, perhaps filtering out true constituents or dependency links in a later step. Such methods do not utilise the full information available in a deep syntactic parse. Additionally, the goal is typically to build a machine translation system; very few methods aim at building treebanks for linguistic studies. Consequently, there is in principle no reason to exclude links which are not linguistically motivated. The method described in this thesis, on the other hand, has the explicit goal of annotating a parallel treebank for linguistic research. It takes as input parallel sentences with deep, syntactic analyses in Lexical-Functional Grammar. The grammars giving rise to the analyses are assumed to follow common analysis guidelines; if so, structural similarity in analyses gives us evidence that constituents (syntactic phrases) or functional elements (predicates, arguments, adjuncts) may be linked. A set of principles for function and constituent alignment are formulated (keeping our annotation goal in mind), and an implementation of these principles is given. Finally, the method is evaluated both manually and automatically, and compared with methods based on N-gram tables. The results suggest that the method seems promising, but also show that there are specific possibilities for improvement.}, added-at = {2011-01-10T16:00:51.000+0100}, address = {Bergen, Norway}, author = {Unhammer, Kevin Brubeck}, biburl = {http://www.bibsonomy.org/bibtex/23943fc2fc999c041fb34645a17578ef0/unhammer}, interhash = {c0631ef9f5b2938313d3b9b4a0bc7d50}, intrahash = {3943fc2fc999c041fb34645a17578ef0}, keywords = {LFG alignment master myown}, school = {Universitetet i Bergen}, timestamp = {2011-01-10T16:00:51.000+0100}, title = {Syntaktisk fraselenking}, url = {https://github.com/unhammer/lfgalign/blob/master/thesis/lfgalign.pdf}, year = 2010 } @inproceedings{volk2008hjp, added-at = {2010-11-21T20:15:05.000+0100}, address = {Manchester}, author = {Volk, M. and Marek, T. and Samuelsson, Y.}, biburl = {http://www.bibsonomy.org/bibtex/2eb8a701c76979c5d0bb9fba8b73f672b/unhammer}, booktitle = {{Proceedings of the Workshop on Human Judgements in Computational Linguistics}}, interhash = {fb06c22411e89d5aded4fb6235b746c9}, intrahash = {eb8a701c76979c5d0bb9fba8b73f672b}, keywords = {Master SMULTRON alignment judgement parallel treebank}, organization = {Association for Computational Linguistics}, pages = {51--57}, publisher = {Association for Computational Linguistics}, timestamp = {2010-11-21T20:15:05.000+0100}, title = {{Human Judgements in Parallel Treebank Alignment}}, url = {http://www.aclweb.org/anthology-new/W/W08/W08-1208.pdf}, year = 2008 } @article{stodden2009err, added-at = {2010-11-21T20:13:49.000+0100}, author = {Stodden, V.}, biburl = {http://www.bibsonomy.org/bibtex/283f66bfdbf46fc17629a4cd89cc1ca2b/unhammer}, interhash = {e5a0d3f78407929d1549a6b233c648cc}, intrahash = {83f66bfdbf46fc17629a4cd89cc1ca2b}, journal = {International Journal of Communications Law and Policy}, keywords = {master open_source reproducible_research}, number = 13, timestamp = {2010-11-21T20:13:49.000+0100}, title = {{Enabling Reproducible Research: Licensing for Scientific Innovation}}, url = {http://www.ijclp.net/issue_13.html}, year = 2009 } @article{och2003scv, added-at = {2010-11-21T20:12:13.000+0100}, author = {Och, Franz Josef and Ney, Hermann}, biburl = {http://www.bibsonomy.org/bibtex/2c8b8d8a407d6faa1594bfe2154ebe178/unhammer}, interhash = {0c6f7429fd4bb103c869316f9b606342}, intrahash = {c8b8d8a407d6faa1594bfe2154ebe178}, journal = {Computational Linguistics}, keywords = {EM MT Master SMT alignment apertium statistical}, number = 1, pages = {19-51}, timestamp = {2010-11-21T20:12:13.000+0100}, title = {{A Systematic Comparison of Various Statistical Alignment Models.}}, url = {http://www.mt-archive.info/CL-2003-Och.pdf}, volume = 29, year = 2003 } @inproceedings{graham2009fts, added-at = {2010-11-21T20:10:54.000+0100}, address = {Trinity College, Cambridge}, author = {Graham, Yvette and Bryl, Anton and van Genabith, Josef}, biburl = {http://www.bibsonomy.org/bibtex/2e55a0a2cfe2dcd57a83d761f1b2d515e/unhammer}, booktitle = {{Proceedings of LFG09}}, editor = {Butt, Miriam and King, Tracy Holloway}, interhash = {bca91dfb00e2bf0f7a2d7fbaba908faa}, intrahash = {e55a0a2cfe2dcd57a83d761f1b2d515e}, keywords = {LFG MT Master SMT transfer}, pages = {317--337}, publisher = {CSLI Publications}, timestamp = {2010-11-21T20:10:54.000+0100}, title = {{F-structure Transfer-Based Statistical Machine Translation}}, url = {http://pargram.b.uib.no/references/2009s/}, year = 2009 } @inproceedings{koehn2003spb, added-at = {2010-11-21T20:10:43.000+0100}, address = {Morristown, NJ}, author = {Koehn, P. and Och, F.J. and Marcu, D.}, biburl = {http://www.bibsonomy.org/bibtex/232e07ec025d7dc3c6c9693dcda9357bb/unhammer}, booktitle = {{NAACL '03: Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology}}, interhash = {2c2a74e4a5a79ab9b2d09caa87e93a4c}, intrahash = {32e07ec025d7dc3c6c9693dcda9357bb}, keywords = {MT Master PBSMT alignment haveread statistical}, organization = {Association for Computational Linguistics}, pages = {48--54}, publisher = {Association for Computational Linguistics}, timestamp = {2010-11-21T20:10:43.000+0100}, title = {{Statistical Phrase-Based Translation}}, url = {http://www.mt-archive.info/HLT-NAACL-2003-Koehn.pdf}, year = 2003 } @inproceedings{kaplan2002aeg, added-at = {2010-11-21T20:09:44.000+0100}, address = {Morristown, NJ}, author = {Kaplan, Ronald M. and King, Tracy Holloway and III, John T. Maxwell}, biburl = {http://www.bibsonomy.org/bibtex/2e93d7a9faa43e131de9d316192dd9dbd/unhammer}, booktitle = {{COLING-GEE '02 Proceedings of the 2002 Workshop on Grammar Engineering and Evaluation}}, interhash = {09bbace401efda4d1b798ccb43aa88f4}, intrahash = {e93d7a9faa43e131de9d316192dd9dbd}, keywords = {English LFG Master XLE}, organization = {Association for Computational Linguistics}, pages = {1--7}, publisher = {Association for Computational Linguistics}, timestamp = {2010-11-21T20:09:44.000+0100}, title = {{Adapting Existing Grammars: the XLE Experience}}, url = {http://acl.ldc.upenn.edu/coling2002/workshops/data/w06/w06-06.pdf}, volume = 15, year = 2002 } @inproceedings{dyvik2009lmp, added-at = {2010-11-21T20:07:43.000+0100}, address = {Milano}, author = {Dyvik, Helge and Meurer, Paul and Ros{\'e}n, Victoria and {De Smedt}, Koenraad}, biburl = {http://www.bibsonomy.org/bibtex/26ba1bef5f443503118e47b26ced8ca8a/unhammer}, booktitle = {{Proceedings of the Eighth International Workshop on Treebanks and Linguistic Theories}}, editor = {Passarotti, Marco and Przepiórkowski, Adam and Raynaud, Savina and Eynde, Frank Van}, interhash = {8f1c3a4761e9a7283164e6c1f3fed9b7}, intrahash = {6ba1bef5f443503118e47b26ced8ca8a}, keywords = {LFG Master XPAR alignment treebank}, pages = {71--82}, publisher = {EDUCatt}, timestamp = {2010-11-21T20:07:43.000+0100}, title = {{Linguistically Motivated Parallel Parsebanks}}, url = {http://tlt8.unicatt.it/allegati/Proceedings_TLT8.pdf#page=83}, year = 2009 } @inproceedings{chen1993asb, added-at = {2010-11-21T20:06:29.000+0100}, address = {Columbus, Ohio}, author = {Chen, Stanley F.}, biburl = {http://www.bibsonomy.org/bibtex/2f7bc5bcf528b27e6354c8b23950f8f31/unhammer}, booktitle = {{Proceedings of the 31st Annual Conference of the Association for Computational Linguistics}}, interhash = {0297d9839d29fe7fe620414285ed0756}, intrahash = {f7bc5bcf528b27e6354c8b23950f8f31}, keywords = {Master alignment statistical}, organization = {Association for Computational Linguistics}, pages = {9--16}, publisher = {Association for Computational Linguistics}, timestamp = {2010-11-21T20:06:29.000+0100}, title = {{Aligning Sentences in Bilingual Corpora using Lexical Information}}, url = {http://portal.acm.org/citation.cfm?id=981576&dl=}, year = 1993 } @article{brown1993msm, added-at = {2010-11-21T20:03:46.000+0100}, author = {Brown, Peter F. and {Della Pietra}, Stephen A. and {Della Pietra}, Vincent J. and Mercer, Robert L.}, biburl = {http://www.bibsonomy.org/bibtex/24d328348b40ce3e139de22b0d1a6dcaf/unhammer}, interhash = {f485596e97bb9fc546cf7ef7c53591c7}, intrahash = {4d328348b40ce3e139de22b0d1a6dcaf}, journal = {Computational Linguistics}, keywords = {EM MT Master alignment mathematics statistical}, number = 2, pages = {263--311}, timestamp = {2010-11-21T20:03:46.000+0100}, title = {{The Mathematics of Statistical Machine Translation: {P}arameter Estimation}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.13.8919}, volume = 19, year = 1993 } @inproceedings{butt2002pgp, added-at = {2010-11-21T20:03:04.000+0100}, address = {Morristown, NJ}, author = {Butt, Miriam and Dyvik, Helge and King, Tracy Holloway and Masuichi, Hiroshi and Rohrer, Christian}, biburl = {http://www.bibsonomy.org/bibtex/2a05c3f91c141ad62a6ad1245b70fc2c7/unhammer}, booktitle = {{COLING-GEE '02 Proceedings of the 2002 Workshop on Grammar Engineering and Evaluation}}, interhash = {997d2c029201d46b2a2b80eb8e122e19}, intrahash = {a05c3f91c141ad62a6ad1245b70fc2c7}, keywords = {LFG Master XPAR parallel syntax}, organization = {Association for Computational Linguistics}, pages = {1--7}, publisher = {Association for Computational Linguistics}, timestamp = {2010-11-21T20:03:04.000+0100}, title = {{The Parallel Grammar Project}}, url = {http://portal.acm.org/citation.cfm?id=1118783.1118786}, volume = 15, year = 2002 } @inproceedings{tinsley2007ept, added-at = {2010-11-21T09:57:24.000+0100}, address = {Bergen, Norway}, author = {Tinsley, John and Hearne, Mary and Way, Andy}, biburl = {http://www.bibsonomy.org/bibtex/2ab641a8c5c46efcb212639a15e09057d/unhammer}, booktitle = {{Proceedings of Treebanks and Linguistic Theories (TLT '07)}}, interhash = {75bb398e94a2cbb0c8d965d979be0c37}, intrahash = {ab641a8c5c46efcb212639a15e09057d}, keywords = {Master PBSMT SMT alignment haveread statistical treebank}, publisher = {NEALT}, timestamp = {2010-11-21T09:57:24.000+0100}, title = {{Exploiting Parallel Treebanks to Improve Phrase-Based Statistical Machine Translation}}, url = {http://tlt07.uib.no/papers/12.pdf}, year = 2007 } @inproceedings{tiedemann2009dat, added-at = {2010-11-21T09:56:00.000+0100}, address = {Borovets, Bulgaria}, author = {Tiedemann, J. and Kotz{\'e}, G.}, biburl = {http://www.bibsonomy.org/bibtex/292726085ce4c80ba24421a863faacc57/unhammer}, booktitle = {{Proceedings of the Workshop on Natural Language Processing Methods and Corpora in Translation, Lexicography, and Language Learning}}, interhash = {8b1794f4dc02256ab86f4a595a9ab6a0}, intrahash = {92726085ce4c80ba24421a863faacc57}, keywords = {Master SMT alignment treebank}, organization = {Association for Computational Linguistics}, pages = {33--39}, publisher = {Association for Computational Linguistics}, timestamp = {2010-11-21T09:56:00.000+0100}, title = {{A Discriminative Approach to Tree Alignment}}, url = {http://acl.eldoc.ub.rug.nl/mirror/W/W09/W09-42.pdf#page=43}, year = 2009 } @inproceedings{azzam1998crm, added-at = {2010-11-21T09:55:51.000+0100}, address = {Granada}, author = {Azzam, Saliha and Humphreys, Kevin and Gaizauskas, Robert}, biburl = {http://www.bibsonomy.org/bibtex/2863572676e2895c78515ab52c41eadcb/unhammer}, booktitle = {{Proceedings of the Workshop on Linguistic Coreference}}, interhash = {f1adb1ebfb4c54da8d9813883d7ac3df}, intrahash = {863572676e2895c78515ab52c41eadcb}, keywords = {IE Master anaphora argument_structure}, pages = {74--78}, publisher = {LREC}, timestamp = {2010-11-21T09:55:51.000+0100}, title = {{Coreference Resolution in a Multilingual Information Extraction System}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.24.5639&rep=rep1&type=pdf}, year = 1998 } @unpublished{unhammer2009aaa, added-at = {2010-11-21T09:55:09.000+0100}, author = {Unhammer, Kevin Brubeck}, biburl = {http://www.bibsonomy.org/bibtex/214bf4d2e0538bf44e78434ca82df847c/unhammer}, interhash = {99e0a0a358f1d570619ae75fbcfc6906}, intrahash = {14bf4d2e0538bf44e78434ca82df847c}, keywords = {alignment argument_structure lfg master myown}, timestamp = {2010-11-21T09:55:09.000+0100}, title = {{Do arguments and adjuncts ever align? LINGMET semester assignment} }, url = {https://github.com/downloads/unhammer/lfgalign/argstr.pdf}, year = 2009 } @unpublished{unhammer2010lcf, added-at = {2010-11-20T23:53:04.000+0100}, author = {Unhammer, Kevin Brubeck}, biburl = {http://www.bibsonomy.org/bibtex/29db1fe2661323b25f1a4b30ffb4bd6bf/unhammer}, interhash = {99a645ab49b04a217ddbf20a520c6c8b}, intrahash = {9db1fe2661323b25f1a4b30ffb4bd6bf}, keywords = {LFG Master XPAR alignment myown treebank}, note = {accepted}, timestamp = {2010-11-20T23:53:04.000+0100}, title = {{LFG-based Constituent and Function Alignment for Parallel Treebanking}}, url = {http://github.com/unhammer/lfgalign/raw/master/article/lfgalign-art.pdf}, year = 2010 } @inproceedings{zhechev2008agp, added-at = {2010-11-20T23:42:53.000+0100}, address = {Manchester}, author = {Zhechev, Ventsislav and Way, Andy}, biburl = {http://www.bibsonomy.org/bibtex/206fd9cf6db23cd4e1bc010d37bccd770/unhammer}, booktitle = {{Proceedings of the 22nd International Conference on Computational Linguistics}}, interhash = {4485f3c3d9def59a67c33a57d1688c17}, intrahash = {06fd9cf6db23cd4e1bc010d37bccd770}, keywords = {Master SMT parallel treebank}, organization = {Association for Computational Linguistics}, pages = {1105--1112}, publisher = {Association for Computational Linguistics}, timestamp = {2010-11-20T23:42:53.000+0100}, title = {{Automatic Generation of Parallel Treebanks}}, url = {http://www.nltg.brighton.ac.uk/home/Roger.Evans/private/coling2008/cdrom/PAPERS/pdf/PAPERS139.pdf}, volume = 1, year = 2008 } @inproceedings{surdeanu2003upa, added-at = {2010-11-10T16:01:27.000+0100}, address = {Sapporo, Japan}, author = {Surdeanu, Mihai and Harabagiu, Sanda and Williams, John and Aarseth, Paul}, biburl = {http://www.bibsonomy.org/bibtex/227ae32c31cd0f514bbb59f6ad4c0c493/unhammer}, booktitle = {{Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics}}, interhash = {3f05b9a23eb84234d7720e3dda18f953}, intrahash = {27ae32c31cd0f514bbb59f6ad4c0c493}, keywords = {IE Master argument_structure}, organization = {Association for Computational Linguistics}, pages = {8--15}, publisher = {Association for Computational Linguistics}, timestamp = {2010-11-10T16:01:27.000+0100}, title = {{Using Predicate-Argument Structures for Information Extraction}}, url = {http://acl.ldc.upenn.edu/acl2003/main/pdfs/Surdeanu.pdf}, volume = 1, year = 2003 } @inproceedings{samuelsson2007apa, added-at = {2010-11-10T15:58:42.000+0100}, address = {Bergen, Norway}, author = {Samuelsson, Y. and Volk, M.}, biburl = {http://www.bibsonomy.org/bibtex/22900500ffe841190abff885bd1cc595f/unhammer}, booktitle = {{Proceedings of Treebanks and Linguistic Theories (TLT '07)}}, interhash = {7331861fceb9798e8fafca725602c05a}, intrahash = {2900500ffe841190abff885bd1cc595f}, keywords = {Master XPAR alignment haveread statistical syntax}, publisher = {NEALT}, timestamp = {2010-11-10T15:58:42.000+0100}, title = {{Automatic Phrase Alignment: Using Statistical N-Gram Alignment for Syntactic Phrase Alignment}}, url = {http://tlt07.uib.no/papers/8.pdf}, year = 2007 } @inproceedings{samuelsson2006pap, added-at = {2010-11-10T15:57:34.000+0100}, address = {Prague}, author = {Samuelsson, Y. and Volk, M.}, biburl = {http://www.bibsonomy.org/bibtex/210ed3ee27f01459943dcfb9275b354d3/unhammer}, booktitle = {{Proceedings of Treebanks and Linguistic Theories (TLT '06)}}, interhash = {a0c66e37ad933800de24074ee318a7cc}, intrahash = {10ed3ee27f01459943dcfb9275b354d3}, keywords = {Master alignment corpus haveread treebank}, publisher = {ÚFAL}, timestamp = {2010-11-10T15:57:34.000+0100}, title = {{Phrase Alignment in Parallel Treebanks}}, url = {http://www.ling.su.se/staff/yvonne/pub/samuelsson_2006_align.pdf}, year = 2006 }