# import gradio as gr # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # tokenizer = AutoTokenizer.from_pretrained("IDEA-CCNL/Randeng-Pegasus-523M-Summary-Chinese") # model = AutoModelForSeq2SeqLM.from_pretrained("IDEA-CCNL/Randeng-Pegasus-523M-Summary-Chinese") # def summarize(text): # inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) # summary_ids = model.generate(inputs["input_ids"], max_length=128) # return tokenizer.decode(summary_ids[0], skip_special_tokens=True) # demo = gr.Interface(fn=summarize, inputs="text", outputs="text", title="中文文本摘要 Demo") # demo.launch() # import gradio as gr # def greet(): # return "你好,世界!这是你第一个成功运行的 Hugging Face Space 🎉" # demo = gr.Interface(fn=greet, inputs=[], outputs="text") # demo.launch() import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # 强制使用 slow tokenizer,避免 tiktoken/SentencePiece 报错 tokenizer = AutoTokenizer.from_pretrained( "IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese", use_fast=False ) from transformers import PegasusTokenizer # not PegasusTokenizerFast tokenizer = PegasusTokenizer.from_pretrained("IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese") model = AutoModelForSeq2SeqLM.from_pretrained("IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese") # 摘要函数 def summarize(text): inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True) summary_ids = model.generate(inputs["input_ids"], max_length=64, min_length=20, length_penalty=2.0, num_beams=4) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary # UI demo = gr.Interface( fn=summarize, inputs=gr.Textbox(lines=10, label="请输入中文文章"), outputs=gr.Textbox(label="自动生成的摘要"), title="中文文本摘要 Demo", description="使用 Randeng Pegasus 238M 模型生成简洁摘要" ) demo.launch()