File size: 2,007 Bytes
96e1c92
 
 
 
 
 
 
 
 
 
 
 
 
877f8f8
 
 
 
 
 
 
 
 
a181a06
877f8f8
a181a06
71bc555
 
 
 
 
40c3b4e
 
 
 
877f8f8
a181a06
71bc555
877f8f8
 
 
 
 
a181a06
71bc555
877f8f8
 
71bc555
 
 
 
877f8f8
96e1c92
877f8f8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# tokenizer = AutoTokenizer.from_pretrained("IDEA-CCNL/Randeng-Pegasus-523M-Summary-Chinese")
# model = AutoModelForSeq2SeqLM.from_pretrained("IDEA-CCNL/Randeng-Pegasus-523M-Summary-Chinese")

# def summarize(text):
#     inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
#     summary_ids = model.generate(inputs["input_ids"], max_length=128)
#     return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# demo = gr.Interface(fn=summarize, inputs="text", outputs="text", title="中文文本摘要 Demo")
# demo.launch()
# import gradio as gr

# def greet():
#     return "你好,世界!这是你第一个成功运行的 Hugging Face Space 🎉"

# demo = gr.Interface(fn=greet, inputs=[], outputs="text")

# demo.launch()

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# 强制使用 slow tokenizer,避免 tiktoken/SentencePiece 报错
tokenizer = AutoTokenizer.from_pretrained(
    "IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese",
    use_fast=False
)
from transformers import PegasusTokenizer  # not PegasusTokenizerFast
tokenizer = PegasusTokenizer.from_pretrained("IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese")


model = AutoModelForSeq2SeqLM.from_pretrained("IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese")

# 摘要函数
def summarize(text):
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length=64, min_length=20, length_penalty=2.0, num_beams=4)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# UI
demo = gr.Interface(
    fn=summarize,
    inputs=gr.Textbox(lines=10, label="请输入中文文章"),
    outputs=gr.Textbox(label="自动生成的摘要"),
    title="中文文本摘要 Demo",
    description="使用 Randeng Pegasus 238M 模型生成简洁摘要"
)

demo.launch()