A. Brunner, S. Engelberg, F. Jannidis, N. Tu, and L. Weimer. Proceedings of the 12th Language Resources and Evaluation Conference, page 803--812. Marseille, France, European Language Resources Association, (May 2020)
Abstract
This article presents corpus REDEWIEDERGABE, a German-language historical corpus with detailed annotations for speech, thought and writing representation (ST&WR). With approximately 490,000 tokens, it is the largest resource of its kind. It can be used to answer literary and linguistic research questions and serve as training material for machine learning. This paper describes the composition of the corpus and the annotation structure, discusses some methodological decisions and gives basic statistics about the forms of ST&WR found in this corpus.
%0 Conference Paper
%1 brunner-etal-2020-corpus
%A Brunner, Annelen
%A Engelberg, Stefan
%A Jannidis, Fotis
%A Tu, Ngoc Duyen Tanja
%A Weimer, Lukas
%B Proceedings of the 12th Language Resources and Evaluation Conference
%C Marseille, France
%D 2020
%I European Language Resources Association
%K corpus directspeech kallimachos nlp
%P 803--812
%T Corpus REDEWIEDERGABE
%U https://aclanthology.org/2020.lrec-1.100
%X This article presents corpus REDEWIEDERGABE, a German-language historical corpus with detailed annotations for speech, thought and writing representation (ST&WR). With approximately 490,000 tokens, it is the largest resource of its kind. It can be used to answer literary and linguistic research questions and serve as training material for machine learning. This paper describes the composition of the corpus and the annotation structure, discusses some methodological decisions and gives basic statistics about the forms of ST&WR found in this corpus.
%@ 979-10-95546-34-4
@inproceedings{brunner-etal-2020-corpus,
abstract = {This article presents corpus REDEWIEDERGABE, a German-language historical corpus with detailed annotations for speech, thought and writing representation (ST{\&}WR). With approximately 490,000 tokens, it is the largest resource of its kind. It can be used to answer literary and linguistic research questions and serve as training material for machine learning. This paper describes the composition of the corpus and the annotation structure, discusses some methodological decisions and gives basic statistics about the forms of ST{\&}WR found in this corpus.},
added-at = {2022-05-10T23:12:17.000+0200},
address = {Marseille, France},
author = {Brunner, Annelen and Engelberg, Stefan and Jannidis, Fotis and Tu, Ngoc Duyen Tanja and Weimer, Lukas},
biburl = {https://www.bibsonomy.org/bibtex/24ebfeec64a5b47c9630bf6c04827d86a/albinzehe},
booktitle = {Proceedings of the 12th Language Resources and Evaluation Conference},
interhash = {726e090cb4ab8825b35fb4d4f3b9cbb0},
intrahash = {4ebfeec64a5b47c9630bf6c04827d86a},
isbn = {979-10-95546-34-4},
keywords = {corpus directspeech kallimachos nlp},
language = {English},
month = may,
pages = {803--812},
publisher = {European Language Resources Association},
timestamp = {2022-05-10T23:12:17.000+0200},
title = {Corpus {REDEWIEDERGABE}},
url = {https://aclanthology.org/2020.lrec-1.100},
year = 2020
}