Spaces:

hsuwill000
/

maxtest01

Running

App Files Files Community

maxtest01 / app.py

hsuwill000

Update app.py

6affb07 verified about 1 month ago

raw

history blame contribute delete

4.84 kB

	import gradio as gr
	import openvino_genai as ov_genai
	import queue
	import threading
	import time
	from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
	import nncf
	from llama_index.core import SimpleDirectoryReader
	from rank_bm25 import BM25Okapi
	import jieba
	import requests
	from bs4 import BeautifulSoup
	import os

	import huggingface_hub as hf_hub

	# 先下載網頁並存成 .txt
	os.makedirs("./data", exist_ok=True)
	urls = [
	#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html",
	#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000305.html",
	#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000306.html",
	#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000312.html",
	#"https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/SoushenJi.txt",#too much token
	"https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/mirrorstory.txt",
	]
	for i, url in enumerate(urls):
	resp = requests.get(url)
	resp.encoding = 'utf-8'
	with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f:
	f.write(resp.text)
	#soup = BeautifulSoup(resp.text, "html.parser")
	#text = soup.get_text(separator="\n", strip=True)
	#with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f:
	# f.write(text)


	response = requests.get(urls[0])
	response.encoding = 'utf-8' # 強制設為 UTF-8 編碼
	story_default_text = response.text.strip()

	# 初始化 OpenVINO 模型
	model_id = "hsuwill000/BitCPM4-1B_int4_ov"
	#model_id = "hsuwill000/MiniCPM4-0.5B_int4_ov" #can't finish.
	#model_id = "OpenVINO/Qwen3-0.6B-int4-ov" #can't finish.
	model_path = "ov"
	hf_hub.snapshot_download(model_id, local_dir=model_path)

	config = ov_genai.GenerationConfig()
	config.max_new_tokens = 1024
	config.top_p = 0.9
	config.top_k = 40
	config.repetition_penalty = 1.2

	pipe = ov_genai.LLMPipeline(model_path, "CPU")
	pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)

	# 讀取剛剛存的 txt 檔案
	documents = SimpleDirectoryReader("./data").load_data()
	texts = [doc.get_content() for doc in documents]
	print("==================")
	print(texts[0][:500])
	print("==================")
	# 使用 jieba 斷詞做 BM25
	tokenized_corpus = [list(jieba.cut(text)) for text in texts]
	bm25 = BM25Okapi(tokenized_corpus)

	def start_chat():
	pipe.start_chat()
	return "✅ 開始對話！"

	def finish_chat():
	pipe.finish_chat()
	return "🛑 結束對話！"

	def generate_stream(prompt):
	tokenized_query = list(jieba.cut(prompt))
	top_k = 1
	doc_scores = bm25.get_scores(tokenized_query)
	top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k]
	retrieved_texts = [texts[i] for i in top_k_indices]

	print("=== 檢索到的相關段落 ===")
	for i, txt in enumerate(retrieved_texts, 1):
	print(f"--- 段落 {i} ---\n{txt}\n")

	context = "\n\n".join(retrieved_texts)
	final_prompt = f"根據以下資訊，請簡潔回答問題：\n{context}\n\n問題：{prompt}\n回答："

	print("=== 最終 prompt ===")
	print(final_prompt)

	q = queue.Queue()
	tps_result = ""

	def streamer(subword):
	print(subword, end='', flush=True)
	q.put(subword)
	return ov_genai.StreamingStatus.RUNNING

	def worker():
	nonlocal tps_result
	gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
	tps = gen_result.perf_metrics.get_throughput().mean
	tps_result = f"{tps:.2f} tokens/s"
	q.put(None) # 結束符號

	threading.Thread(target=worker).start()

	result = ""
	while True:
	token = q.get()
	if token is None:
	break
	result += token
	yield result, ""
	yield result, tps_result

	with gr.Blocks() as demo:
	gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
	with gr.Row():
	with gr.Column():
	start_btn = gr.Button("開始對話")
	end_btn = gr.Button("結束對話")
	status_box = gr.Textbox(label="狀態", interactive=False)
	TPS_box = gr.Textbox(label="TPS", interactive=False)
	with gr.Row():
	with gr.Column():
	textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
	button = gr.Button("Submit")
	textbox_output = gr.Textbox(label="robot answer:",lines=20, elem_id="scroll_output")
	with gr.Column():
	StoryBox = gr.Textbox(label="Story", lines=50, placeholder="Story here...", value=story_default_text)



	start_btn.click(fn=start_chat, outputs=status_box)
	end_btn.click(fn=finish_chat, outputs=status_box)

	button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box])

	demo.launch()