VoxSum / src /summarization.py
Luigi's picture
Update src/summarization.py
a76c0df verified
# summarization.py
from llama_cpp import Llama
from utils import available_gguf_llms, s2tw_converter
import time
from functools import lru_cache
@lru_cache(maxsize=1)
def get_model(gguf_repo_id, gguf_filename):
return Llama.from_pretrained(
repo_id=gguf_repo_id,
filename=gguf_filename,
verbose=False,
n_ctx=32768,
n_threads=4,
repeat_penalty=1.2,
)
def summarize_transcript(transcript, selected_gguf_model, prompt_input):
repo_id, filename = available_gguf_llms[selected_gguf_model]
llm = get_model(repo_id, filename)
full_summary = []
is_1st_token = True
t1 = time.time()
stream = llm.create_chat_completion(
messages=[
{"role": "system", "content": "You are an expert in transcript summarization."},
{"role": "user", "content": f'{prompt_input} \n{transcript}'}
],
stream=True,
)
for chunk in stream:
delta = chunk['choices'][0]['delta']
if 'content' in delta:
if is_1st_token:
print(f"Time to 1st Token: {time.time()-t1:.1f} sec")
is_1st_token = False
token = delta['content']
full_summary.append(str(token))
yield s2tw_converter.convert("".join(full_summary)) #, "Summarizing"
yield s2tw_converter.convert("".join(full_summary)) #, "Summary complete"