File size: 1,370 Bytes
913c94a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python3
"""Quick multilingual title smoke tests (LLM)."""
from __future__ import annotations
import os, sys, pytest
from pathlib import Path

if os.getenv("VOXSUM_SKIP_LLM_TESTS") == "1":
    pytest.skip("LLM tests skipped (unset VOXSUM_SKIP_LLM_TESTS to run)", allow_module_level=True)

ROOT = Path(__file__).resolve().parent.parent
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.summarization import generate_title  # noqa: E402
from src.utils import available_gguf_llms  # noqa: E402

def _select_model():
    env_choice = os.getenv("VOXSUM_GGUF_MODEL")
    if env_choice and env_choice in available_gguf_llms:
        return env_choice
    for cand in ["Gemma-3-270M", "Gemma-3-3N-E2B", "Gemma-3-3N-E4B", "Gemma-3-1B"]:
        if cand in available_gguf_llms:
            return cand
    return next(iter(available_gguf_llms))

TEST_TRANSCRIPTS = {
    "english": "Hello everyone, today we're going to discuss artificial intelligence and its impact.",
    "french": "Bonjour à tous, aujourd'hui nous allons discuter de l'intelligence artificielle.",
}

def test_multilingual_titles():
    model_name = _select_model()
    for language, transcript in TEST_TRANSCRIPTS.items():
        title = generate_title(transcript, model_name)
        assert title, f"Empty title for {language}"
        assert len(title.split()) <= 15