Panyun commited on
Commit
0626db3
·
verified ·
1 Parent(s): c6f96db

Delete app.py.py

Browse files
Files changed (1) hide show
  1. app.py.py +0 -34
app.py.py DELETED
@@ -1,34 +0,0 @@
1
- import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
-
4
- # 加载模型(使用量化版本节省显存)
5
- model_name = "Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8"
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForCausalLM.from_pretrained(
8
- model_name,
9
- device_map="auto", # 自动分配GPU/CPU
10
- torch_dtype="auto"
11
- )
12
-
13
- # 定义生成函数
14
- def generate_response(message, history):
15
- # 格式化对话历史
16
- messages = [{"role": "user", "content": message}]
17
- text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
18
- inputs = tokenizer(text, return_tensors="pt").to(model.device)
19
-
20
- # 生成回复
21
- outputs = model.generate(
22
- **inputs,
23
- max_new_tokens=512,
24
- temperature=0.7
25
- )
26
- response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
27
- return response
28
-
29
- # 启动Gradio界面
30
- gr.ChatInterface(
31
- fn=generate_response,
32
- title="Qwen2.5-7B大模型在线演示",
33
- description="输入问题后按回车开始对话"
34
- ).launch()