File size: 5,300 Bytes
6da6acb
 
0f32d66
 
 
6da6acb
208382e
6da6acb
 
 
208382e
 
 
 
 
 
 
 
 
 
 
 
 
0f32d66
 
 
 
 
 
 
 
 
 
 
 
 
6da6acb
 
 
 
0f32d66
 
 
 
 
 
 
 
6da6acb
0f32d66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6da6acb
0f32d66
 
 
 
 
 
208382e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7540157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208382e
86d3754
 
208382e
 
86d3754
0f32d66
 
 
 
 
86d3754
208382e
 
 
 
 
 
0f32d66
 
 
 
 
 
6da6acb
208382e
 
 
 
 
 
 
6da6acb
86d3754
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import gradio as gr
from transformers import pipeline
import fitz  # PyMuPDF
import re
import pandas as pd

# Load detection models
bias_detector = pipeline("text-classification", model="himel7/bias-detector")
bias_type_classifier = pipeline("text-classification", model="maximuspowers/bias-type-classifier")

# Load neutralizer models (lazy load for speed)
neutralizer_models = {
    "BART Neutralizer": "himel7/bias-neutralizer-bart",
    "T5 Small Neutralizer": "himel7/bias-neutralizer-t5s"
}
neutralizers = {}

def get_neutralizer(model_name):
    if model_name not in neutralizers:
        neutralizers[model_name] = pipeline("text2text-generation", model=neutralizer_models[model_name])
    return neutralizers[model_name]

# Utils
def extract_text_from_pdf(pdf_file):
    text = ""
    with fitz.open(pdf_file) as pdf:
        for page in pdf:
            text += page.get_text("text")
    return text

def split_into_sentences(text):
    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
    return [s for s in sentences if s]

def analyze_sentence(sentence):
    detection_result = bias_detector(sentence)[0]
    label = detection_result['label']
    score = detection_result['score']

    if label == "LABEL_1":  # Biased
        type_result = bias_type_classifier(sentence)[0]
        return {
            "sentence": sentence,
            "bias": "Biased",
            "bias_score": round(score, 2),
            "bias_type": type_result['label'],
            "bias_type_score": round(type_result['score'], 2)
        }
    else:
        return {
            "sentence": sentence,
            "bias": "Unbiased",
            "bias_score": round(score, 2),
            "bias_type": "-",
            "bias_type_score": "-"
        }

def analyze_pdf(pdf_file):
    text = extract_text_from_pdf(pdf_file)
    sentences = split_into_sentences(text)

    results = [analyze_sentence(s) for s in sentences]

    # Stats
    total = len(results)
    biased = sum(1 for r in results if r["bias"] == "Biased")
    unbiased = total - biased

    stats_md = f"""
    ### 📊 Bias Statistics
    - **Total Sentences:** {total}
    - **Biased Sentences:** {biased} ({(biased/total)*100:.1f}%)
    - **Unbiased Sentences:** {unbiased} ({(unbiased/total)*100:.1f}%)
    """

    df = pd.DataFrame(results)
    return stats_md, df

def analyze_text(text):
    return analyze_sentence(text)

# New: Neutralize Bias
def neutralize_text(text, model_choice):
    neutralizer = get_neutralizer(model_choice)
    result = neutralizer(text, max_length=512, do_sample=False)
    return result[0]["generated_text"]

def neutralize_pdf(pdf_file, model_choice):
    text = extract_text_from_pdf(pdf_file)
    sentences = split_into_sentences(text)

    neutralizer = get_neutralizer(model_choice)
    neutralized_sentences = [neutralizer(s, max_length=512, do_sample=False)[0]["generated_text"] for s in sentences]
    neutralized_text = " ".join(neutralized_sentences)
    return neutralized_text


# Top badges
badges_html = """
<p align="center">
  <a href="https://huggingface.co/himel7/bias-detector">
    <img src="https://img.shields.io/badge/🤗-Hugging%20Face-yellow.svg">
  </a>
  <a href="https://huggingface.co/himel7/bias-detector">
    <img src="https://img.shields.io/badge/Model-Homepage-purple.svg">
  </a>
  <a href="https://github.com/Himel1996/NewsBiasDetector/">
    <img src="https://img.shields.io/badge/GitHub-Repo-orange.svg">
  </a>
  <a href="https://arxiv.org/abs/2505.13010v1">
    <img src="https://img.shields.io/badge/arXiv-2505.13010-red.svg">
  </a>
</p>
"""

# Build UI
with gr.Blocks() as demo:
    gr.HTML(badges_html)
    gr.Markdown("## Bias Analyzer & Neutralizer")
    gr.Markdown("### This app helps you to detect biases in sentences, analyse them, and neutralize sentences.")

    with gr.Tab("Single Sentence"):
        text_input = gr.Textbox(lines=3, placeholder="Enter a sentence...")
        output = gr.JSON()
        btn = gr.Button("Analyze")
        btn.click(analyze_text, inputs=text_input, outputs=output)

        gr.Markdown("### Neutralize Bias")
        model_choice = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer")
        neutral_output = gr.Textbox(label="Neutralized Sentence", lines=3)
        neutral_btn = gr.Button("Neutralize")
        neutral_btn.click(neutralize_text, inputs=[text_input, model_choice], outputs=neutral_output)

    with gr.Tab("Analyze PDF"):
        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        stats_output = gr.Markdown()
        table_output = gr.Dataframe(headers=["Sentence", "Bias", "Bias Score", "Bias Type", "Bias Type Score"])
        analyze_btn = gr.Button("Analyze PDF")
        analyze_btn.click(analyze_pdf, inputs=pdf_input, outputs=[stats_output, table_output])

        gr.Markdown("### Neutralize Entire PDF")
        model_choice_pdf = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer")
        neutral_pdf_output = gr.Textbox(label="Neutralized PDF Text", lines=15)
        neutral_pdf_btn = gr.Button("Neutralize PDF")
        neutral_pdf_btn.click(neutralize_pdf, inputs=[pdf_input, model_choice_pdf], outputs=neutral_pdf_output)


if __name__ == "__main__":
    demo.launch()