maxtest01 / app.py
hsuwill000's picture
Update app.py
6affb07 verified
import gradio as gr
import openvino_genai as ov_genai
import queue
import threading
import time
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
import nncf
from llama_index.core import SimpleDirectoryReader
from rank_bm25 import BM25Okapi
import jieba
import requests
from bs4 import BeautifulSoup
import os
import huggingface_hub as hf_hub
# 先下載網頁並存成 .txt
os.makedirs("./data", exist_ok=True)
urls = [
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html",
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000305.html",
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000306.html",
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000312.html",
#"https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/SoushenJi.txt",#too much token
"https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/mirrorstory.txt",
]
for i, url in enumerate(urls):
resp = requests.get(url)
resp.encoding = 'utf-8'
with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f:
f.write(resp.text)
#soup = BeautifulSoup(resp.text, "html.parser")
#text = soup.get_text(separator="\n", strip=True)
#with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f:
# f.write(text)
response = requests.get(urls[0])
response.encoding = 'utf-8' # 強制設為 UTF-8 編碼
story_default_text = response.text.strip()
# 初始化 OpenVINO 模型
model_id = "hsuwill000/BitCPM4-1B_int4_ov"
#model_id = "hsuwill000/MiniCPM4-0.5B_int4_ov" #can't finish.
#model_id = "OpenVINO/Qwen3-0.6B-int4-ov" #can't finish.
model_path = "ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)
config = ov_genai.GenerationConfig()
config.max_new_tokens = 1024
config.top_p = 0.9
config.top_k = 40
config.repetition_penalty = 1.2
pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
# 讀取剛剛存的 txt 檔案
documents = SimpleDirectoryReader("./data").load_data()
texts = [doc.get_content() for doc in documents]
print("==================")
print(texts[0][:500])
print("==================")
# 使用 jieba 斷詞做 BM25
tokenized_corpus = [list(jieba.cut(text)) for text in texts]
bm25 = BM25Okapi(tokenized_corpus)
def start_chat():
pipe.start_chat()
return "✅ 開始對話!"
def finish_chat():
pipe.finish_chat()
return "🛑 結束對話!"
def generate_stream(prompt):
tokenized_query = list(jieba.cut(prompt))
top_k = 1
doc_scores = bm25.get_scores(tokenized_query)
top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k]
retrieved_texts = [texts[i] for i in top_k_indices]
print("=== 檢索到的相關段落 ===")
for i, txt in enumerate(retrieved_texts, 1):
print(f"--- 段落 {i} ---\n{txt}\n")
context = "\n\n".join(retrieved_texts)
final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{prompt}\n回答:"
print("=== 最終 prompt ===")
print(final_prompt)
q = queue.Queue()
tps_result = ""
def streamer(subword):
print(subword, end='', flush=True)
q.put(subword)
return ov_genai.StreamingStatus.RUNNING
def worker():
nonlocal tps_result
gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
tps = gen_result.perf_metrics.get_throughput().mean
tps_result = f"{tps:.2f} tokens/s"
q.put(None) # 結束符號
threading.Thread(target=worker).start()
result = ""
while True:
token = q.get()
if token is None:
break
result += token
yield result, ""
yield result, tps_result
with gr.Blocks() as demo:
gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
with gr.Row():
with gr.Column():
start_btn = gr.Button("開始對話")
end_btn = gr.Button("結束對話")
status_box = gr.Textbox(label="狀態", interactive=False)
TPS_box = gr.Textbox(label="TPS", interactive=False)
with gr.Row():
with gr.Column():
textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
button = gr.Button("Submit")
textbox_output = gr.Textbox(label="robot answer:",lines=20, elem_id="scroll_output")
with gr.Column():
StoryBox = gr.Textbox(label="Story", lines=50, placeholder="Story here...", value=story_default_text)
start_btn.click(fn=start_chat, outputs=status_box)
end_btn.click(fn=finish_chat, outputs=status_box)
button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box])
demo.launch()