Spaces:

hsuwill000
/

maxtest01

Running

File size: 4,840 Bytes

259675b
9424b30
6bda22b
d45acbf
9424b30
678e02e
 
c58cf79
 
 
0bc0417
 
 
c58cf79
b307fb6
0bc0417
 
 
 
9e3d2bd
fad8bf1
 
 
b29f69c
 
0bc0417
 
 
47f6bfe
0bc0417
8075ee2
 
 
 
 
0bc0417
fbf99b8
 
 
 
 
6bda22b
6affb07
 
 
145ef29
23052cd
 
 
a05d380
0bc0417
a05d380
 
23052cd
edd5af4
ecb186b
c58cf79
0bc0417
 
c58cf79
be9030e
 
 
c58cf79
 
 
678e02e
d7d5739
 
 
 
 
 
 
b75b9d4
d45acbf
c58cf79
 
 
 
 
0bc0417
c58cf79
 
 
0bc0417
c58cf79
fd8fd57
0bc0417
c58cf79
 
0bc0417
6bda22b
06ea06c
0bc0417
6bda22b
3e39aa5
6bda22b
 
259675b
6bda22b
06ea06c
ecb186b
4a7c181
0bc0417
3e39aa5
9424b30
6bda22b
259675b
d45acbf
6bda22b
 
 
 
4a7c181
0bc0417
4a7c181
d45acbf
a402d27
3e39aa5
d2e7baa
f64f4a5
 
 
 
fd1afe0
f64f4a5
fbf99b8
 
 
acc1226
fbf99b8
 
 
6d02822
1c0770f
bd4f72b
0bc0417
d45acbf
4a7c181
b307fb6
0bc0417

import gradio as gr
import openvino_genai as ov_genai
import queue
import threading
import time
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
import nncf
from llama_index.core import SimpleDirectoryReader
from rank_bm25 import BM25Okapi
import jieba
import requests
from bs4 import BeautifulSoup
import os

import huggingface_hub as hf_hub

# 先下載網頁並存成 .txt
os.makedirs("./data", exist_ok=True)
urls = [
    #"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html",
    #"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000305.html",
    #"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000306.html",
    #"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000312.html",
    #"https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/SoushenJi.txt",#too much token
    "https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/mirrorstory.txt",
]
for i, url in enumerate(urls):
    resp = requests.get(url)
    resp.encoding = 'utf-8'
    with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f:
        f.write(resp.text)
    #soup = BeautifulSoup(resp.text, "html.parser")
    #text = soup.get_text(separator="\n", strip=True)
    #with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f:
    #    f.write(text)


    response = requests.get(urls[0])
    response.encoding = 'utf-8'  # 強制設為 UTF-8 編碼
    story_default_text = response.text.strip()

# 初始化 OpenVINO 模型
model_id = "hsuwill000/BitCPM4-1B_int4_ov" 
#model_id = "hsuwill000/MiniCPM4-0.5B_int4_ov" #can't finish.
#model_id = "OpenVINO/Qwen3-0.6B-int4-ov" #can't finish.
model_path = "ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)

config = ov_genai.GenerationConfig()
config.max_new_tokens = 1024
config.top_p = 0.9
config.top_k = 40
config.repetition_penalty = 1.2

pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)

# 讀取剛剛存的 txt 檔案
documents = SimpleDirectoryReader("./data").load_data()
texts = [doc.get_content() for doc in documents]
print("==================")
print(texts[0][:500])
print("==================")
# 使用 jieba 斷詞做 BM25
tokenized_corpus = [list(jieba.cut(text)) for text in texts]
bm25 = BM25Okapi(tokenized_corpus)

def start_chat():
    pipe.start_chat()
    return "✅ 開始對話！"

def finish_chat():
    pipe.finish_chat()
    return "🛑 結束對話！"

def generate_stream(prompt):
    tokenized_query = list(jieba.cut(prompt))
    top_k = 1
    doc_scores = bm25.get_scores(tokenized_query)
    top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k]
    retrieved_texts = [texts[i] for i in top_k_indices]

    print("=== 檢索到的相關段落 ===")
    for i, txt in enumerate(retrieved_texts, 1):
        print(f"--- 段落 {i} ---\n{txt}\n")

    context = "\n\n".join(retrieved_texts)
    final_prompt = f"根據以下資訊，請簡潔回答問題：\n{context}\n\n問題：{prompt}\n回答："

    print("=== 最終 prompt ===")
    print(final_prompt)

    q = queue.Queue()
    tps_result = ""

    def streamer(subword):
        print(subword, end='', flush=True)
        q.put(subword)
        return ov_genai.StreamingStatus.RUNNING

    def worker():
        nonlocal tps_result
        gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
        tps = gen_result.perf_metrics.get_throughput().mean
        tps_result = f"{tps:.2f} tokens/s"
        q.put(None)  # 結束符號

    threading.Thread(target=worker).start()

    result = ""
    while True:
        token = q.get()
        if token is None:
            break
        result += token
        yield result, ""
    yield result, tps_result

with gr.Blocks() as demo:
    gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
    with gr.Row():
        with gr.Column():
            start_btn = gr.Button("開始對話")
            end_btn = gr.Button("結束對話")
        status_box = gr.Textbox(label="狀態", interactive=False)
        TPS_box = gr.Textbox(label="TPS", interactive=False)
    with gr.Row():
        with gr.Column():
            textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
            button = gr.Button("Submit")
            textbox_output = gr.Textbox(label="robot answer:",lines=20, elem_id="scroll_output")
        with gr.Column():
            StoryBox = gr.Textbox(label="Story", lines=50, placeholder="Story here...", value=story_default_text)
            
    

    start_btn.click(fn=start_chat, outputs=status_box)
    end_btn.click(fn=finish_chat, outputs=status_box)

    button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box])

demo.launch()