|
@misc{christop2025clonevalopenvoicecloning, |
|
title={{ClonEval: An Open Voice Cloning Benchmark}}, |
|
author={Iwona Christop and Tomasz Kuczyński and Marek Kubis}, |
|
year={2025}, |
|
eprint={2504.20581}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.CL}, |
|
url={https://arxiv.org/abs/2504.20581}, |
|
} |
|
|
|
@article{crema-d, |
|
author={Cao, Houwei and Cooper, David G. and Keutmann, Michael K. and Gur, Ruben C. and Nenkova, Ani and Verma, Ragini}, |
|
journal={IEEE Transactions on Affective Computing}, |
|
title={{CREMA-D: Crowd-Sourced Emotional Multimodal Actors Dataset}}, |
|
year={2014}, |
|
volume={5}, |
|
number={4}, |
|
pages={377--390}, |
|
doi={10.1109/TAFFC.2014.2336244}, |
|
} |
|
|
|
@inproceedings{librispeech2015, |
|
author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev}, |
|
booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, |
|
title={{Librispeech: An ASR corpus based on public domain audio books}}, |
|
year={2015}, |
|
pages={5206-5210}, |
|
keywords={Resource description framework;Genomics;Bioinformatics;Blogs;Information services;Electronic publishing;Speech Recognition;Corpus;LibriVox}, |
|
doi={10.1109/ICASSP.2015.7178964} |
|
} |
|
|
|
@article{ravdess, |
|
doi={10.1371/journal.pone.0196391}, |
|
author={Livingstone, Steven R. AND Russo, Frank A.}, |
|
journal={PLOS ONE}, |
|
publisher={Public Library of Science}, |
|
title={{The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English}}", |
|
year={2018}, |
|
month=may, |
|
volume={13}, |
|
URL={https://doi.org/10.1371/journal.pone.0196391}, |
|
pages={1--35}, |
|
number={5}, |
|
} |
|
|
|
@inbook{savee, |
|
author={Haq, S. and Jackson, P. J. B.}, |
|
booktitle={{Machine Audition: Principles, Algorithms and Systems}}, |
|
title={{Multimodal Emotion Recognition}}, |
|
publisher={IGI Global}, |
|
address={Hershey PA}, |
|
year={2010}, |
|
month=aug, |
|
editor={Wang, W.}, |
|
pages={398--423}, |
|
} |
|
|
|
@misc{tess, |
|
author={Pichora-Fuller, M. Kathleen and Dupuis, Kate}, |
|
publisher={Borealis}, |
|
title={{Toronto emotional speech set (TESS)}}, |
|
year={2020}, |
|
version={DRAFT VERSION}, |
|
doi={10.5683/SP2/E8H2MF}, |
|
URL={https://doi.org/10.5683/SP2/E8H2MF}, |
|
} |