@inproceedings{schmidt-etal-2022-dont, abstract = {A large body of recent work highlights the fallacies of zero-shot cross-lingual transfer (ZS-XLT) with large multilingual language models. Namely, their performance varies substantially for different target languages and is the weakest where needed the most: for low-resource languages distant to the source language. One remedy is few-shot transfer (FS-XLT), where leveraging only a few task-annotated instances in the target language(s) may yield sizable performance gains. However, FS-XLT also succumbs to large variation, as models easily overfit to the small datasets. In this work, we present a systematic study focused on a spectrum of FS-XLT fine-tuning regimes, analyzing key properties such as effectiveness, (in)stability, and modularity. We conduct extensive experiments on both higher-level (NLI, paraphrasing) and lower-level tasks (NER, POS), presenting new FS-XLT strategies that yield both improved and more stable FS-XLT across the board. Our findings challenge established FS-XLT methods: e.g., we propose to replace sequential fine-tuning with joint fine-tuning on source and target language instances, offering consistent gains with different number of shots (including resource-rich scenarios). We also show that further gains can be achieved with multi-stage FS-XLT training in which joint multilingual fine-tuning precedes the bilingual source-target specialization.}, added-at = {2023-02-23T08:53:31.000+0100}, address = {Abu Dhabi, United Arab Emirates}, author = {Schmidt, Fabian David and Vuli{\'c}, Ivan and Glava{\v{s}}, Goran}, biburl = {https://www.bibsonomy.org/bibtex/2c4bc72101330eec90c11cf61491e5332/ifland}, booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing}, interhash = {3f3b6f261d44a062e86f52702068aac3}, intrahash = {c4bc72101330eec90c11cf61491e5332}, keywords = {caidas-area-dem-lang-tech}, month = dec, pages = {10725--10742}, publisher = {Association for Computational Linguistics}, timestamp = {2023-02-23T08:53:31.000+0100}, title = {Don{'}t Stop Fine-Tuning: On Training Regimes for Few-Shot Cross-Lingual Transfer with Multilingual Language Models}, url = {https://aclanthology.org/2022.emnlp-main.736}, year = 2022 }