Spaces:
Running
Running
File size: 4,840 Bytes
259675b 9424b30 6bda22b d45acbf 9424b30 678e02e c58cf79 0bc0417 c58cf79 b307fb6 0bc0417 9e3d2bd fad8bf1 b29f69c 0bc0417 47f6bfe 0bc0417 8075ee2 0bc0417 fbf99b8 6bda22b 6affb07 145ef29 23052cd a05d380 0bc0417 a05d380 23052cd edd5af4 ecb186b c58cf79 0bc0417 c58cf79 be9030e c58cf79 678e02e d7d5739 b75b9d4 d45acbf c58cf79 0bc0417 c58cf79 0bc0417 c58cf79 fd8fd57 0bc0417 c58cf79 0bc0417 6bda22b 06ea06c 0bc0417 6bda22b 3e39aa5 6bda22b 259675b 6bda22b 06ea06c ecb186b 4a7c181 0bc0417 3e39aa5 9424b30 6bda22b 259675b d45acbf 6bda22b 4a7c181 0bc0417 4a7c181 d45acbf a402d27 3e39aa5 d2e7baa f64f4a5 fd1afe0 f64f4a5 fbf99b8 acc1226 fbf99b8 6d02822 1c0770f bd4f72b 0bc0417 d45acbf 4a7c181 b307fb6 0bc0417 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import gradio as gr
import openvino_genai as ov_genai
import queue
import threading
import time
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
import nncf
from llama_index.core import SimpleDirectoryReader
from rank_bm25 import BM25Okapi
import jieba
import requests
from bs4 import BeautifulSoup
import os
import huggingface_hub as hf_hub
# 先下載網頁並存成 .txt
os.makedirs("./data", exist_ok=True)
urls = [
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html",
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000305.html",
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000306.html",
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000312.html",
#"https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/SoushenJi.txt",#too much token
"https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/mirrorstory.txt",
]
for i, url in enumerate(urls):
resp = requests.get(url)
resp.encoding = 'utf-8'
with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f:
f.write(resp.text)
#soup = BeautifulSoup(resp.text, "html.parser")
#text = soup.get_text(separator="\n", strip=True)
#with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f:
# f.write(text)
response = requests.get(urls[0])
response.encoding = 'utf-8' # 強制設為 UTF-8 編碼
story_default_text = response.text.strip()
# 初始化 OpenVINO 模型
model_id = "hsuwill000/BitCPM4-1B_int4_ov"
#model_id = "hsuwill000/MiniCPM4-0.5B_int4_ov" #can't finish.
#model_id = "OpenVINO/Qwen3-0.6B-int4-ov" #can't finish.
model_path = "ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)
config = ov_genai.GenerationConfig()
config.max_new_tokens = 1024
config.top_p = 0.9
config.top_k = 40
config.repetition_penalty = 1.2
pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
# 讀取剛剛存的 txt 檔案
documents = SimpleDirectoryReader("./data").load_data()
texts = [doc.get_content() for doc in documents]
print("==================")
print(texts[0][:500])
print("==================")
# 使用 jieba 斷詞做 BM25
tokenized_corpus = [list(jieba.cut(text)) for text in texts]
bm25 = BM25Okapi(tokenized_corpus)
def start_chat():
pipe.start_chat()
return "✅ 開始對話!"
def finish_chat():
pipe.finish_chat()
return "🛑 結束對話!"
def generate_stream(prompt):
tokenized_query = list(jieba.cut(prompt))
top_k = 1
doc_scores = bm25.get_scores(tokenized_query)
top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k]
retrieved_texts = [texts[i] for i in top_k_indices]
print("=== 檢索到的相關段落 ===")
for i, txt in enumerate(retrieved_texts, 1):
print(f"--- 段落 {i} ---\n{txt}\n")
context = "\n\n".join(retrieved_texts)
final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{prompt}\n回答:"
print("=== 最終 prompt ===")
print(final_prompt)
q = queue.Queue()
tps_result = ""
def streamer(subword):
print(subword, end='', flush=True)
q.put(subword)
return ov_genai.StreamingStatus.RUNNING
def worker():
nonlocal tps_result
gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
tps = gen_result.perf_metrics.get_throughput().mean
tps_result = f"{tps:.2f} tokens/s"
q.put(None) # 結束符號
threading.Thread(target=worker).start()
result = ""
while True:
token = q.get()
if token is None:
break
result += token
yield result, ""
yield result, tps_result
with gr.Blocks() as demo:
gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
with gr.Row():
with gr.Column():
start_btn = gr.Button("開始對話")
end_btn = gr.Button("結束對話")
status_box = gr.Textbox(label="狀態", interactive=False)
TPS_box = gr.Textbox(label="TPS", interactive=False)
with gr.Row():
with gr.Column():
textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
button = gr.Button("Submit")
textbox_output = gr.Textbox(label="robot answer:",lines=20, elem_id="scroll_output")
with gr.Column():
StoryBox = gr.Textbox(label="Story", lines=50, placeholder="Story here...", value=story_default_text)
start_btn.click(fn=start_chat, outputs=status_box)
end_btn.click(fn=finish_chat, outputs=status_box)
button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box])
demo.launch()
|