File size: 2,002 Bytes
75ec748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from src.task.task_factory import Task
from tests.tasks.evaluation.task_test_case import TaskTest


class TaskSTS22Test(TaskTest):
    # We need to have two response otherwise correlation fails (nan).
    def setUp(self) -> None:
        self.dataset_size = 72

    def test_given_a_prediction_smaller_than_corpus_when_compute_then_return_expected_result_and_warning(

        self,

    ):
        a_predictions = [0, 0, 0, 0, 1, 1, 1, 2, 2, 2]
        task = Task(
            task_name="sts22",
            metric="pearson",
        )

        expected_results = {"pearsonr": -0.2273955}
        expected_warning = (
            f"Your prediction size is of '{len(a_predictions)}', while the ground truths size is "
            f"of '{self.dataset_size}'. We computed the metric over the first {len(a_predictions)}"
            f" elements."
        )

        actual_result, actual_warning = task.compute(predictions=a_predictions)

        self.assertEvalDictEqual(expected_results, actual_result)

        self.assertEqual(expected_warning, actual_warning)

    def test_given_a_prediction_when_compute_then_return_expected_result_no_warnings(

        self,

    ):
        a_predictions = [0] * (self.dataset_size // 2) + [1] * (self.dataset_size // 2)
        task = Task(
            task_name="sts22",
            metric="pearson",
        )

        expected_results = {"pearsonr": 0.29421966}
        expected_warning = None

        actual_result, actual_warning = task.compute(predictions=a_predictions)

        self.assertEvalDictEqual(expected_results, actual_result)

        self.assertEqual(expected_warning, actual_warning)

    def test_given_a_prediction_larger_than_ground_truth_raise_error(self):
        a_predictions = [1] * (self.dataset_size + 1)
        task = Task(
            task_name="sts22",
            metric="pearson",
        )

        self.assertRaises(ValueError, task.compute, predictions=a_predictions)