File size: 1,395 Bytes
7024e68
 
 
 
a76c0df
7024e68
a76c0df
7024e68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c742fc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# summarization.py
from llama_cpp import Llama
from utils import available_gguf_llms, s2tw_converter
import time
from functools import lru_cache

@lru_cache(maxsize=1)
def get_model(gguf_repo_id, gguf_filename):
    return Llama.from_pretrained(
        repo_id=gguf_repo_id,
        filename=gguf_filename,
        verbose=False,
        n_ctx=32768,
        n_threads=4,
        repeat_penalty=1.2,
    )

def summarize_transcript(transcript, selected_gguf_model, prompt_input):
    repo_id, filename = available_gguf_llms[selected_gguf_model]
    llm = get_model(repo_id, filename)
    full_summary = []
    is_1st_token = True
    t1 = time.time()

    stream = llm.create_chat_completion(
        messages=[
            {"role": "system", "content": "You are an expert in transcript summarization."},
            {"role": "user", "content": f'{prompt_input} \n{transcript}'}
        ],
        stream=True,
    )

    for chunk in stream:
        delta = chunk['choices'][0]['delta']
        if 'content' in delta:
            if is_1st_token:
                print(f"Time to 1st Token: {time.time()-t1:.1f} sec")
                is_1st_token = False
            token = delta['content']
            full_summary.append(str(token))
            yield s2tw_converter.convert("".join(full_summary)) #, "Summarizing"
    yield s2tw_converter.convert("".join(full_summary)) #, "Summary complete"