VoxSum

Sleeping

VoxSum / src /summarization.py

Update src/summarization.py

a76c0df verified 3 months ago

1.4 kB

	# summarization.py
	from llama_cpp import Llama
	from utils import available_gguf_llms, s2tw_converter
	import time
	from functools import lru_cache

	@lru_cache(maxsize=1)
	def get_model(gguf_repo_id, gguf_filename):
	return Llama.from_pretrained(
	repo_id=gguf_repo_id,
	filename=gguf_filename,
	verbose=False,
	n_ctx=32768,
	n_threads=4,
	repeat_penalty=1.2,
	)

	def summarize_transcript(transcript, selected_gguf_model, prompt_input):
	repo_id, filename = available_gguf_llms[selected_gguf_model]
	llm = get_model(repo_id, filename)
	full_summary = []
	is_1st_token = True
	t1 = time.time()

	stream = llm.create_chat_completion(
	messages=[
	{"role": "system", "content": "You are an expert in transcript summarization."},
	{"role": "user", "content": f'{prompt_input} \n{transcript}'}
	],
	stream=True,
	)

	for chunk in stream:
	delta = chunk['choices'][0]['delta']
	if 'content' in delta:
	if is_1st_token:
	print(f"Time to 1st Token: {time.time()-t1:.1f} sec")
	is_1st_token = False
	token = delta['content']
	full_summary.append(str(token))
	yield s2tw_converter.convert("".join(full_summary)) #, "Summarizing"
	yield s2tw_converter.convert("".join(full_summary)) #, "Summary complete"