Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import fitz # PyMuPDF | |
import re | |
import pandas as pd | |
# Load detection models | |
bias_detector = pipeline("text-classification", model="himel7/bias-detector") | |
bias_type_classifier = pipeline("text-classification", model="maximuspowers/bias-type-classifier") | |
# Load neutralizer models (lazy load for speed) | |
neutralizer_models = { | |
"BART Neutralizer": "himel7/bias-neutralizer-bart", | |
"T5 Small Neutralizer": "himel7/bias-neutralizer-t5s" | |
} | |
neutralizers = {} | |
def get_neutralizer(model_name): | |
if model_name not in neutralizers: | |
neutralizers[model_name] = pipeline("text2text-generation", model=neutralizer_models[model_name]) | |
return neutralizers[model_name] | |
# Utils | |
def extract_text_from_pdf(pdf_file): | |
text = "" | |
with fitz.open(pdf_file) as pdf: | |
for page in pdf: | |
text += page.get_text("text") | |
return text | |
def split_into_sentences(text): | |
sentences = re.split(r'(?<=[.!?])\s+', text.strip()) | |
return [s for s in sentences if s] | |
def analyze_sentence(sentence): | |
detection_result = bias_detector(sentence)[0] | |
label = detection_result['label'] | |
score = detection_result['score'] | |
if label == "LABEL_1": # Biased | |
type_result = bias_type_classifier(sentence)[0] | |
return { | |
"sentence": sentence, | |
"bias": "Biased", | |
"bias_score": round(score, 2), | |
"bias_type": type_result['label'], | |
"bias_type_score": round(type_result['score'], 2) | |
} | |
else: | |
return { | |
"sentence": sentence, | |
"bias": "Unbiased", | |
"bias_score": round(score, 2), | |
"bias_type": "-", | |
"bias_type_score": "-" | |
} | |
def analyze_pdf(pdf_file): | |
text = extract_text_from_pdf(pdf_file) | |
sentences = split_into_sentences(text) | |
results = [analyze_sentence(s) for s in sentences] | |
# Stats | |
total = len(results) | |
biased = sum(1 for r in results if r["bias"] == "Biased") | |
unbiased = total - biased | |
stats_md = f""" | |
### π Bias Statistics | |
- **Total Sentences:** {total} | |
- **Biased Sentences:** {biased} ({(biased/total)*100:.1f}%) | |
- **Unbiased Sentences:** {unbiased} ({(unbiased/total)*100:.1f}%) | |
""" | |
df = pd.DataFrame(results) | |
return stats_md, df | |
def analyze_text(text): | |
return analyze_sentence(text) | |
# New: Neutralize Bias | |
def neutralize_text(text, model_choice): | |
neutralizer = get_neutralizer(model_choice) | |
result = neutralizer(text, max_length=512, do_sample=False) | |
return result[0]["generated_text"] | |
def neutralize_pdf(pdf_file, model_choice): | |
text = extract_text_from_pdf(pdf_file) | |
sentences = split_into_sentences(text) | |
neutralizer = get_neutralizer(model_choice) | |
neutralized_sentences = [neutralizer(s, max_length=512, do_sample=False)[0]["generated_text"] for s in sentences] | |
neutralized_text = " ".join(neutralized_sentences) | |
return neutralized_text | |
# Top badges | |
badges_html = """ | |
<p align="center"> | |
<a href="https://huggingface.co/himel7/bias-detector"> | |
<img src="https://img.shields.io/badge/π€-Hugging%20Face-yellow.svg"> | |
</a> | |
<a href="https://huggingface.co/himel7/bias-detector"> | |
<img src="https://img.shields.io/badge/Model-Homepage-purple.svg"> | |
</a> | |
<a href="https://github.com/Himel1996/NewsBiasDetector/"> | |
<img src="https://img.shields.io/badge/GitHub-Repo-orange.svg"> | |
</a> | |
<a href="https://arxiv.org/abs/2505.13010v1"> | |
<img src="https://img.shields.io/badge/arXiv-2505.13010-red.svg"> | |
</a> | |
</p> | |
""" | |
# Build UI | |
with gr.Blocks() as demo: | |
gr.HTML(badges_html) | |
gr.Markdown("## Bias Analyzer & Neutralizer") | |
gr.Markdown("### This app helps you to detect biases in sentences, analyse them, and neutralize sentences.") | |
with gr.Tab("Single Sentence"): | |
text_input = gr.Textbox(lines=3, placeholder="Enter a sentence...") | |
output = gr.JSON() | |
btn = gr.Button("Analyze") | |
btn.click(analyze_text, inputs=text_input, outputs=output) | |
gr.Markdown("### Neutralize Bias") | |
model_choice = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer") | |
neutral_output = gr.Textbox(label="Neutralized Sentence", lines=3) | |
neutral_btn = gr.Button("Neutralize") | |
neutral_btn.click(neutralize_text, inputs=[text_input, model_choice], outputs=neutral_output) | |
with gr.Tab("Analyze PDF"): | |
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
stats_output = gr.Markdown() | |
table_output = gr.Dataframe(headers=["Sentence", "Bias", "Bias Score", "Bias Type", "Bias Type Score"]) | |
analyze_btn = gr.Button("Analyze PDF") | |
analyze_btn.click(analyze_pdf, inputs=pdf_input, outputs=[stats_output, table_output]) | |
gr.Markdown("### Neutralize Entire PDF") | |
model_choice_pdf = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer") | |
neutral_pdf_output = gr.Textbox(label="Neutralized PDF Text", lines=15) | |
neutral_pdf_btn = gr.Button("Neutralize PDF") | |
neutral_pdf_btn.click(neutralize_pdf, inputs=[pdf_input, model_choice_pdf], outputs=neutral_pdf_output) | |
if __name__ == "__main__": | |
demo.launch() | |