import gradio as gr import openvino_genai as ov_genai import queue import threading import time from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig import nncf from llama_index.core import SimpleDirectoryReader from rank_bm25 import BM25Okapi import jieba import requests from bs4 import BeautifulSoup import os import huggingface_hub as hf_hub # 先下載網頁並存成 .txt os.makedirs("./data", exist_ok=True) urls = [ #"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html", #"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000305.html", #"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000306.html", #"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000312.html", #"https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/SoushenJi.txt",#too much token "https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/mirrorstory.txt", ] for i, url in enumerate(urls): resp = requests.get(url) resp.encoding = 'utf-8' with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f: f.write(resp.text) #soup = BeautifulSoup(resp.text, "html.parser") #text = soup.get_text(separator="\n", strip=True) #with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f: # f.write(text) response = requests.get(urls[0]) response.encoding = 'utf-8' # 強制設為 UTF-8 編碼 story_default_text = response.text.strip() # 初始化 OpenVINO 模型 model_id = "hsuwill000/BitCPM4-1B_int4_ov" #model_id = "hsuwill000/MiniCPM4-0.5B_int4_ov" #can't finish. #model_id = "OpenVINO/Qwen3-0.6B-int4-ov" #can't finish. model_path = "ov" hf_hub.snapshot_download(model_id, local_dir=model_path) config = ov_genai.GenerationConfig() config.max_new_tokens = 1024 config.top_p = 0.9 config.top_k = 40 config.repetition_penalty = 1.2 pipe = ov_genai.LLMPipeline(model_path, "CPU") pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template) # 讀取剛剛存的 txt 檔案 documents = SimpleDirectoryReader("./data").load_data() texts = [doc.get_content() for doc in documents] print("==================") print(texts[0][:500]) print("==================") # 使用 jieba 斷詞做 BM25 tokenized_corpus = [list(jieba.cut(text)) for text in texts] bm25 = BM25Okapi(tokenized_corpus) def start_chat(): pipe.start_chat() return "✅ 開始對話!" def finish_chat(): pipe.finish_chat() return "🛑 結束對話!" def generate_stream(prompt): tokenized_query = list(jieba.cut(prompt)) top_k = 1 doc_scores = bm25.get_scores(tokenized_query) top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k] retrieved_texts = [texts[i] for i in top_k_indices] print("=== 檢索到的相關段落 ===") for i, txt in enumerate(retrieved_texts, 1): print(f"--- 段落 {i} ---\n{txt}\n") context = "\n\n".join(retrieved_texts) final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{prompt}\n回答:" print("=== 最終 prompt ===") print(final_prompt) q = queue.Queue() tps_result = "" def streamer(subword): print(subword, end='', flush=True) q.put(subword) return ov_genai.StreamingStatus.RUNNING def worker(): nonlocal tps_result gen_result = pipe.generate([final_prompt], streamer=streamer, config=config) tps = gen_result.perf_metrics.get_throughput().mean tps_result = f"{tps:.2f} tokens/s" q.put(None) # 結束符號 threading.Thread(target=worker).start() result = "" while True: token = q.get() if token is None: break result += token yield result, "" yield result, tps_result with gr.Blocks() as demo: gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox") with gr.Row(): with gr.Column(): start_btn = gr.Button("開始對話") end_btn = gr.Button("結束對話") status_box = gr.Textbox(label="狀態", interactive=False) TPS_box = gr.Textbox(label="TPS", interactive=False) with gr.Row(): with gr.Column(): textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...") button = gr.Button("Submit") textbox_output = gr.Textbox(label="robot answer:",lines=20, elem_id="scroll_output") with gr.Column(): StoryBox = gr.Textbox(label="Story", lines=50, placeholder="Story here...", value=story_default_text) start_btn.click(fn=start_chat, outputs=status_box) end_btn.click(fn=finish_chat, outputs=status_box) button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box]) demo.launch()