Spaces:
Sleeping
Sleeping
| # summarization.py | |
| from llama_cpp import Llama | |
| from utils import available_gguf_llms, s2tw_converter | |
| import time | |
| from functools import lru_cache | |
| def get_model(gguf_repo_id, gguf_filename): | |
| return Llama.from_pretrained( | |
| repo_id=gguf_repo_id, | |
| filename=gguf_filename, | |
| verbose=False, | |
| n_ctx=32768, | |
| n_threads=4, | |
| repeat_penalty=1.2, | |
| ) | |
| def summarize_transcript(transcript, selected_gguf_model, prompt_input): | |
| repo_id, filename = available_gguf_llms[selected_gguf_model] | |
| llm = get_model(repo_id, filename) | |
| full_summary = [] | |
| is_1st_token = True | |
| t1 = time.time() | |
| stream = llm.create_chat_completion( | |
| messages=[ | |
| {"role": "system", "content": "You are an expert in transcript summarization."}, | |
| {"role": "user", "content": f'{prompt_input} \n{transcript}'} | |
| ], | |
| stream=True, | |
| ) | |
| for chunk in stream: | |
| delta = chunk['choices'][0]['delta'] | |
| if 'content' in delta: | |
| if is_1st_token: | |
| print(f"Time to 1st Token: {time.time()-t1:.1f} sec") | |
| is_1st_token = False | |
| token = delta['content'] | |
| full_summary.append(str(token)) | |
| yield s2tw_converter.convert("".join(full_summary)) #, "Summarizing" | |
| yield s2tw_converter.convert("".join(full_summary)) #, "Summary complete" |