File size: 4,289 Bytes
4d871c7 76cb536 0801ebc a12a90a 182208d 3f48b5b 4d871c7 a12a90a 6453441 7160766 8b4afb4 7160766 155b74f 4d871c7 a12a90a 3f48b5b a12a90a 3f48b5b a12a90a 3f48b5b a12a90a 3f48b5b a12a90a 3f48b5b 76da388 7160766 3f48b5b 155b74f a12a90a 0801ebc 7fae2e6 0801ebc ec76eef 7fae2e6 7913ae5 a12a90a 0801ebc a12a90a 3f48b5b df8b6cb 3f48b5b 3486524 4d871c7 e5d3a7a 3f48b5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
from transformers import AutoTokenizer
from optimum.intel import OVModelForCausalLM
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning, message="__array__ implementation doesn't accept a copy keyword")
# 載入 OpenVINO 語言模型
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
print("Loading model...")
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
# 載入向量模型 (用來將文本轉換為向量)
encoder = SentenceTransformer("all-MiniLM-L6-v2")
# FAQ 知識庫 (問題 + 回答)
faq_data = [
("What is FAISS?", "FAISS is a library for efficient similarity search and clustering of dense vectors."),
("How does FAISS work?", "FAISS uses indexing structures to quickly retrieve the nearest neighbors of a query vector."),
("Can FAISS run on GPU?", "Yes, FAISS supports GPU acceleration for faster computation."),
("What is OpenVINO?", "OpenVINO is an inference engine optimized for Intel hardware."),
("How to fine-tune a model?", "Fine-tuning involves training a model on a specific dataset to adapt it to a particular task."),
("What is the best way to optimize inference speed?", "Using quantization and model distillation can significantly improve inference speed.")
]
# 轉換 FAQ 問題為向量
faq_questions = [q for q, _ in faq_data]
faq_answers = [a for _, a in faq_data]
faq_vectors = np.array(encoder.encode(faq_questions)).astype("float32")
# 建立 FAISS 索引
d = faq_vectors.shape[1] # 向量維度
index = faiss.IndexFlatL2(d)
index.add(faq_vectors)
# 對話歷史記錄
history = []
# 查詢函數 (先檢索 FAQ,無匹配則交給模型)
def respond(prompt):
global history
# 將輸入轉換為向量,並用 FAISS 查詢
query_vector = np.array(encoder.encode([prompt])).astype("float32")
D, I = index.search(query_vector, 1) # 找最相近的 FAQ
if D[0][0] < 1.0: # 設定相似度閾值 (數值越低代表越相似) (5.0太大 啥問題都會丟給FAISS)
response = faq_answers[I[0][0]] # 直接回應 FAQ 答案
else:
# 若 FAQ 沒有匹配,則使用語言模型
messages = [{"role": "system", "content": "Answer the question in English only."}]
for user_text, assistant_text in history:
messages.append({"role": "user", "content": user_text})
messages.append({"role": "assistant", "content": assistant_text})
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
do_sample=True
)
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
history.append((prompt, response))
return response
# 清除歷史記錄
def clear_history():
global history
history = []
return "History cleared!"
# Gradio 介面
with gr.Blocks() as demo:
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino with history,FAISS ")
with gr.Tabs():
with gr.TabItem("聊天"):
chat_if = gr.Interface(
fn=respond,
inputs=gr.Textbox(label="Prompt", placeholder="請輸入訊息..."),
outputs=gr.Textbox(label="Response", interactive=False),
api_name="hchat",
title="DeepSeek-R1 with FAISS FAQ",
description="This chatbot first searches an FAQ database using FAISS, then responds using a language model if no match is found."
)
with gr.Row():
clear_button = gr.Button("🧹 Clear History")
clear_button.click(fn=clear_history, inputs=[], outputs=[])
if __name__ == "__main__":
print("Launching Gradio app...")
demo.launch(server_name="0.0.0.0", server_port=7860, share=True) |