File size: 2,319 Bytes
f715ec8
 
 
08d5a49
f715ec8
 
 
 
08d5a49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11ccbb2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
@misc{christop2025clonevalopenvoicecloning,
    title={{ClonEval: An Open Voice Cloning Benchmark}}, 
    author={Iwona Christop and Tomasz Kuczyński and Marek Kubis},
    year={2025},
    eprint={2504.20581},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    url={https://arxiv.org/abs/2504.20581}, 
}

@article{crema-d,
    author={Cao, Houwei and Cooper, David G. and Keutmann, Michael K. and Gur, Ruben C. and Nenkova, Ani and Verma, Ragini},
    journal={IEEE Transactions on Affective Computing},
    title={{CREMA-D: Crowd-Sourced Emotional Multimodal Actors Dataset}},
    year={2014},
    volume={5},
    number={4},
    pages={377--390},
    doi={10.1109/TAFFC.2014.2336244},
}

@inproceedings{librispeech2015,
    author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev},
    booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 
    title={{Librispeech: An ASR corpus based on public domain audio books}}, 
    year={2015},
    pages={5206-5210},
    keywords={Resource description framework;Genomics;Bioinformatics;Blogs;Information services;Electronic publishing;Speech Recognition;Corpus;LibriVox},
    doi={10.1109/ICASSP.2015.7178964}
}

@article{ravdess,
    doi={10.1371/journal.pone.0196391},
    author={Livingstone, Steven R. AND Russo, Frank A.},
    journal={PLOS ONE},
    publisher={Public Library of Science},
    title={{The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English}}",
    year={2018},
    month=may,
    volume={13},
    URL={https://doi.org/10.1371/journal.pone.0196391},
    pages={1--35},
    number={5},
}

@inbook{savee,
    author={Haq, S. and Jackson, P. J. B.},
    booktitle={{Machine Audition: Principles, Algorithms and Systems}},
    title={{Multimodal Emotion Recognition}},
    publisher={IGI Global},
    address={Hershey PA},
    year={2010},
    month=aug,
    editor={Wang, W.},
    pages={398--423},
}

@misc{tess,
    author={Pichora-Fuller, M. Kathleen and Dupuis, Kate},
    publisher={Borealis},
    title={{Toronto emotional speech set (TESS)}},
    year={2020},
    version={DRAFT VERSION},
    doi={10.5683/SP2/E8H2MF},
    URL={https://doi.org/10.5683/SP2/E8H2MF},
}