File size: 4,566 Bytes
1ebdf28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e2bd12
1ebdf28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from transformers import pipeline
import gradio as gr
import nltk

from nltk.tokenize import sent_tokenize
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import gradio as gr


nltk.download("punkt")
nltk.download('punkt_tab')

model_name = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli" #"MoritzLaurer/DeBERTa-v3-base-mnli-fever-docnli-ling-2c" 
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

labels = ["entailment", "neutral", "contradiction"]


def nli(hypothesis, premise):
    inputs = tokenizer(premise, hypothesis, return_tensors="pt", truncation=True, max_length=512)
    logits = model(**inputs).logits[0]
    probs = torch.softmax(logits, -1).tolist()
    return dict(zip(labels, probs))


def get_labels(result):
    if result["entailment"]> result["neutral"] and result["entailment"]> result["contradiction"]:
        return "entailment"
        
    elif result["entailment"]<result["neutral"] and result["contradiction"]<result["neutral"]:
        return "neutral"

    else:
        return "contradiction"





def detect_hallucinations(generated_text, source_text):
    """
    Detect intrinsic and extrinsic hallucinations in the generated text.
    """


    generated_sentences = sent_tokenize(generated_text)
    source_sentences = sent_tokenize(source_text)

    intrinsic = []
    extrinsic = []

    correct_sents = []
    
    for i in range(len(generated_sentences)):
        for j in range(len(source_sentences)):

            # result = nli([generated_sentences[i], source_sentences[j]])[0]
            prediction = nli(generated_sentences[i], source_sentences[j])
            label = get_labels(prediction)
            score = prediction[label]

            result = {"label": label, "score": score}
            
            if result['label'].lower() == "contradiction":
                intrinsic.append({
                    "generated_sentence": generated_sentences[i],
                    "source_sentence": source_sentences[j],
                    "contradiction_score": result['score']
                })

            elif result['label'].lower() == "entailment":
                correct_sents.append(generated_sentences[i])
                break
        
            
        if result['label'].lower() == "neutral" and generated_sentences[i] not in correct_sents:
                extrinsic.append({
                    "claim": generated_sentences[i],
                    "source_sentence": source_sentences[j],
                    "status": "not_supported",
                    "confidence": result['score']
            })
                
    

    return {
        "intrinsic": intrinsic,
        "extrinsic": extrinsic
    }

def gradio_interface(generated_text, source_text):
    result = detect_hallucinations(generated_text, source_text)
    return result

theme = gr.themes.Soft(primary_hue="teal", secondary_hue="blue", neutral_hue="gray").set(
    body_text_color="*neutral_900",
    block_label_text_color="*neutral_900",
    block_title_text_color="*neutral_900"
)



custom_css = """
.gradio-container { background-color: #ffffff !important; }
.gradio-json { font-family: 'Fira Code', monospace; font-size: 14px; color: #1f2937 !important; }
#header_text {
  color: #111 !important;
"""


dark_css = """
.gradio-container {
  background-color: #000 !important;
  color: #eee !important;
}
.gradio-container .gr-block {
  background-color: #000 !important;
}
.gradio-container textarea, .gradio-container input {
  background-color: #111 !important;
  color: #eee !important;
}
.gradio-json {
  background-color: #111 !important;
  color: #eee !important;
}
#header_text {
  color: #eee !important;
}
"""

demo = gr.Blocks(theme=theme, css=dark_css)

with demo:
    gr.Markdown("#Hallucination Detector", elem_id="header_text")
    gr.Markdown(
        "Detects **intrinsic** (internal contradictions) and **extrinsic** "
        "(source unsupported) hallucinations",
        elem_id="header_text"
    )
    gen = gr.Textbox(lines=8, label="Generated Text")
    src = gr.Textbox(lines=8, label="Source Text")
    out = gr.JSON(label="🔍 Analysis Result (JSON)")

    gen.submit(detect_hallucinations, inputs=[gen, src], outputs=out)
    src.submit(detect_hallucinations, inputs=[gen, src], outputs=out)
    gr.Button("Run Analysis").click(detect_hallucinations, inputs=[gen, src], outputs=out)

demo.launch()