RHM-text-summarizer-light

Paused

ar08 commited on Jun 22

Commit

dcd6dd2

verified ·

1 Parent(s): 3c0c86a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,20 +10,25 @@ model_name = "Rahmat82/t5-small-finetuned-summarization-xsum"
 model = ORTModelForSeq2SeqLM.from_pretrained(model_name, export=True)
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
-# Create summarizer pipeline with Optimum
 summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device_map="auto", batch_size=12)
-# Define summarization function with 1024 token cap
 def summarize_text(text):
     if not text.strip():
         return "Please enter some text."
-    # Tokenize and truncate to max 1024 tokens
     inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
     input_text = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
-    # Summarize the truncated text
-    result = summarizer(input_text)
     return result[0]["summary_text"]
 # Gradio app
@@ -31,9 +36,9 @@ app = gr.Interface(
     fn=summarize_text,
     inputs=gr.Textbox(lines=15, placeholder="Paste your text here...", label="Input Text"),
     outputs=gr.Textbox(label="Summary"),
-    title="🚀 ONNX-Powered T5 Summarizer (1024 tokens)",
-    description="Summarize long text using a fine-tuned ONNX-accelerated T5-small model (max input: 1024 tokens)"
 )
-# Launch the app
 app.launch()

 model = ORTModelForSeq2SeqLM.from_pretrained(model_name, export=True)
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+# Create summarizer pipeline
 summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device_map="auto", batch_size=12)
+# Summarization function with max input tokens and medium summary length
 def summarize_text(text):
     if not text.strip():
         return "Please enter some text."
+    # Tokenize and truncate to 1024 tokens
     inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
     input_text = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
+    # Generate medium-length summary
+    result = summarizer(
+        input_text,
+        min_length=30,   # 👈 medium minimum length
+        max_length=100,  # 👈 medium maximum length
+        do_sample=False
+    )
     return result[0]["summary_text"]
 # Gradio app
     fn=summarize_text,
     inputs=gr.Textbox(lines=15, placeholder="Paste your text here...", label="Input Text"),
     outputs=gr.Textbox(label="Summary"),
+    title="🚀 ONNX-Powered T5 Summarizer (Medium Summary)",
+    description="Summarize long text into a medium-length summary using an ONNX-accelerated T5-small model (max input: 1024 tokens)"
 )
+# Launch
 app.launch()