@inproceedings{Grover:2000, abstract = {We describe LT TTT, a recently developed software system which provides tools to perform text tokenisation and mark-up. The system includes ready-made components to segment text into paragraphs, sentences, words and other kinds of token but, crucially, it also allows users to tailor rule-sets to produce mark-up appropriate for particular applications. We present three case studies of our use of LT TTT: named-entity recognition (MUC-7), citation recognition and mark-up and the preparation of a corpus in the medical domain. We conclude with a discussion of the use of browsers to visualise marked-up text.}, added-at = {2007-12-14T02:39:43.000+0100}, author = {Grover, Claire and Matheson, Colin and Mikheev, Andrei and Moens, Marc}, biburl = {http://www.bibsonomy.org/bibtex/225d5028cf05ec729bf39533078381177/diego_ma}, booktitle = {Proc. LREC 2000}, interhash = {d0e9898b2a17acf0de47e438317368ef}, intrahash = {25d5028cf05ec729bf39533078381177}, keywords = {tokeniser XML}, timestamp = {2007-12-14T02:39:43.000+0100}, title = {{LT TTT} --- A Flexible Tokenisation Tool}, url = {http://www.ltg.ed.ac.uk/papers/}, year = 2000 }