@inproceedings{schmidt-etal-2022-dont,
  abstract = {A large body of recent work highlights the fallacies of zero-shot cross-lingual transfer (ZS-XLT) with large multilingual language models. Namely, their performance varies substantially for different target languages and is the weakest where needed the most: for low-resource languages distant to the source language. One remedy is few-shot transfer (FS-XLT), where leveraging only a few task-annotated instances in the target language(s) may yield sizable performance gains. However, FS-XLT also succumbs to large variation, as models easily overfit to the small datasets. In this work, we present a systematic study focused on a spectrum of FS-XLT fine-tuning regimes, analyzing key properties such as effectiveness, (in)stability, and modularity. We conduct extensive experiments on both higher-level (NLI, paraphrasing) and lower-level tasks (NER, POS), presenting new FS-XLT strategies that yield both improved and more stable FS-XLT across the board. Our findings challenge established FS-XLT methods: e.g., we propose to replace sequential fine-tuning with joint fine-tuning on source and target language instances, offering consistent gains with different number of shots (including resource-rich scenarios). We also show that further gains can be achieved with multi-stage FS-XLT training in which joint multilingual fine-tuning precedes the bilingual source-target specialization.},
  added-at = {2023-02-23T08:53:31.000+0100},
  address = {Abu Dhabi, United Arab Emirates},
  author = {Schmidt, Fabian David and Vuli{\'c}, Ivan and Glava{\v{s}}, Goran},
  biburl = {https://www.bibsonomy.org/bibtex/2c4bc72101330eec90c11cf61491e5332/ifland},
  booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing},
  interhash = {3f3b6f261d44a062e86f52702068aac3},
  intrahash = {c4bc72101330eec90c11cf61491e5332},
  keywords = {caidas-area-dem-lang-tech},
  month = dec,
  pages = {10725--10742},
  publisher = {Association for Computational Linguistics},
  timestamp = {2023-02-23T08:53:31.000+0100},
  title = {Don{'}t Stop Fine-Tuning: On Training Regimes for Few-Shot Cross-Lingual Transfer with Multilingual Language Models},
  url = {https://aclanthology.org/2022.emnlp-main.736},
  year = 2022
}