Spaces:
Sleeping
Sleeping
File size: 5,300 Bytes
6da6acb 0f32d66 6da6acb 208382e 6da6acb 208382e 0f32d66 6da6acb 0f32d66 6da6acb 0f32d66 6da6acb 0f32d66 208382e 7540157 208382e 86d3754 208382e 86d3754 0f32d66 86d3754 208382e 0f32d66 6da6acb 208382e 6da6acb 86d3754 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import gradio as gr
from transformers import pipeline
import fitz # PyMuPDF
import re
import pandas as pd
# Load detection models
bias_detector = pipeline("text-classification", model="himel7/bias-detector")
bias_type_classifier = pipeline("text-classification", model="maximuspowers/bias-type-classifier")
# Load neutralizer models (lazy load for speed)
neutralizer_models = {
"BART Neutralizer": "himel7/bias-neutralizer-bart",
"T5 Small Neutralizer": "himel7/bias-neutralizer-t5s"
}
neutralizers = {}
def get_neutralizer(model_name):
if model_name not in neutralizers:
neutralizers[model_name] = pipeline("text2text-generation", model=neutralizer_models[model_name])
return neutralizers[model_name]
# Utils
def extract_text_from_pdf(pdf_file):
text = ""
with fitz.open(pdf_file) as pdf:
for page in pdf:
text += page.get_text("text")
return text
def split_into_sentences(text):
sentences = re.split(r'(?<=[.!?])\s+', text.strip())
return [s for s in sentences if s]
def analyze_sentence(sentence):
detection_result = bias_detector(sentence)[0]
label = detection_result['label']
score = detection_result['score']
if label == "LABEL_1": # Biased
type_result = bias_type_classifier(sentence)[0]
return {
"sentence": sentence,
"bias": "Biased",
"bias_score": round(score, 2),
"bias_type": type_result['label'],
"bias_type_score": round(type_result['score'], 2)
}
else:
return {
"sentence": sentence,
"bias": "Unbiased",
"bias_score": round(score, 2),
"bias_type": "-",
"bias_type_score": "-"
}
def analyze_pdf(pdf_file):
text = extract_text_from_pdf(pdf_file)
sentences = split_into_sentences(text)
results = [analyze_sentence(s) for s in sentences]
# Stats
total = len(results)
biased = sum(1 for r in results if r["bias"] == "Biased")
unbiased = total - biased
stats_md = f"""
### 📊 Bias Statistics
- **Total Sentences:** {total}
- **Biased Sentences:** {biased} ({(biased/total)*100:.1f}%)
- **Unbiased Sentences:** {unbiased} ({(unbiased/total)*100:.1f}%)
"""
df = pd.DataFrame(results)
return stats_md, df
def analyze_text(text):
return analyze_sentence(text)
# New: Neutralize Bias
def neutralize_text(text, model_choice):
neutralizer = get_neutralizer(model_choice)
result = neutralizer(text, max_length=512, do_sample=False)
return result[0]["generated_text"]
def neutralize_pdf(pdf_file, model_choice):
text = extract_text_from_pdf(pdf_file)
sentences = split_into_sentences(text)
neutralizer = get_neutralizer(model_choice)
neutralized_sentences = [neutralizer(s, max_length=512, do_sample=False)[0]["generated_text"] for s in sentences]
neutralized_text = " ".join(neutralized_sentences)
return neutralized_text
# Top badges
badges_html = """
<p align="center">
<a href="https://huggingface.co/himel7/bias-detector">
<img src="https://img.shields.io/badge/🤗-Hugging%20Face-yellow.svg">
</a>
<a href="https://huggingface.co/himel7/bias-detector">
<img src="https://img.shields.io/badge/Model-Homepage-purple.svg">
</a>
<a href="https://github.com/Himel1996/NewsBiasDetector/">
<img src="https://img.shields.io/badge/GitHub-Repo-orange.svg">
</a>
<a href="https://arxiv.org/abs/2505.13010v1">
<img src="https://img.shields.io/badge/arXiv-2505.13010-red.svg">
</a>
</p>
"""
# Build UI
with gr.Blocks() as demo:
gr.HTML(badges_html)
gr.Markdown("## Bias Analyzer & Neutralizer")
gr.Markdown("### This app helps you to detect biases in sentences, analyse them, and neutralize sentences.")
with gr.Tab("Single Sentence"):
text_input = gr.Textbox(lines=3, placeholder="Enter a sentence...")
output = gr.JSON()
btn = gr.Button("Analyze")
btn.click(analyze_text, inputs=text_input, outputs=output)
gr.Markdown("### Neutralize Bias")
model_choice = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer")
neutral_output = gr.Textbox(label="Neutralized Sentence", lines=3)
neutral_btn = gr.Button("Neutralize")
neutral_btn.click(neutralize_text, inputs=[text_input, model_choice], outputs=neutral_output)
with gr.Tab("Analyze PDF"):
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
stats_output = gr.Markdown()
table_output = gr.Dataframe(headers=["Sentence", "Bias", "Bias Score", "Bias Type", "Bias Type Score"])
analyze_btn = gr.Button("Analyze PDF")
analyze_btn.click(analyze_pdf, inputs=pdf_input, outputs=[stats_output, table_output])
gr.Markdown("### Neutralize Entire PDF")
model_choice_pdf = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer")
neutral_pdf_output = gr.Textbox(label="Neutralized PDF Text", lines=15)
neutral_pdf_btn = gr.Button("Neutralize PDF")
neutral_pdf_btn.click(neutralize_pdf, inputs=[pdf_input, model_choice_pdf], outputs=neutral_pdf_output)
if __name__ == "__main__":
demo.launch()
|