Spaces:

kishanmaharaj
/

cfilt-reference-based-hallucination-detection

Running

cfilt-reference-based-hallucination-detection

File size: 4,566 Bytes

from transformers import pipeline
import gradio as gr
import nltk

from nltk.tokenize import sent_tokenize
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import gradio as gr


nltk.download("punkt")
nltk.download('punkt_tab')

model_name = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli" #"MoritzLaurer/DeBERTa-v3-base-mnli-fever-docnli-ling-2c" 
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

labels = ["entailment", "neutral", "contradiction"]


def nli(hypothesis, premise):
    inputs = tokenizer(premise, hypothesis, return_tensors="pt", truncation=True, max_length=512)
    logits = model(**inputs).logits[0]
    probs = torch.softmax(logits, -1).tolist()
    return dict(zip(labels, probs))


def get_labels(result):
    if result["entailment"]> result["neutral"] and result["entailment"]> result["contradiction"]:
        return "entailment"
        
    elif result["entailment"]<result["neutral"] and result["contradiction"]<result["neutral"]:
        return "neutral"

    else:
        return "contradiction"





def detect_hallucinations(generated_text, source_text):
    """
    Detect intrinsic and extrinsic hallucinations in the generated text.
    """


    generated_sentences = sent_tokenize(generated_text)
    source_sentences = sent_tokenize(source_text)

    intrinsic = []
    extrinsic = []

    correct_sents = []
    
    for i in range(len(generated_sentences)):
        for j in range(len(source_sentences)):

            # result = nli([generated_sentences[i], source_sentences[j]])[0]
            prediction = nli(generated_sentences[i], source_sentences[j])
            label = get_labels(prediction)
            score = prediction[label]

            result = {"label": label, "score": score}
            
            if result['label'].lower() == "contradiction":
                intrinsic.append({
                    "generated_sentence": generated_sentences[i],
                    "source_sentence": source_sentences[j],
                    "contradiction_score": result['score']
                })

            elif result['label'].lower() == "entailment":
                correct_sents.append(generated_sentences[i])
                break
        
            
        if result['label'].lower() == "neutral" and generated_sentences[i] not in correct_sents:
                extrinsic.append({
                    "claim": generated_sentences[i],
                    "source_sentence": source_sentences[j],
                    "status": "not_supported",
                    "confidence": result['score']
            })
                
    

    return {
        "intrinsic": intrinsic,
        "extrinsic": extrinsic
    }

def gradio_interface(generated_text, source_text):
    result = detect_hallucinations(generated_text, source_text)
    return result

theme = gr.themes.Soft(primary_hue="teal", secondary_hue="blue", neutral_hue="gray").set(
    body_text_color="*neutral_900",
    block_label_text_color="*neutral_900",
    block_title_text_color="*neutral_900"
)



custom_css = """
.gradio-container { background-color: #ffffff !important; }
.gradio-json { font-family: 'Fira Code', monospace; font-size: 14px; color: #1f2937 !important; }
#header_text {
  color: #111 !important;
"""


dark_css = """
.gradio-container {
  background-color: #000 !important;
  color: #eee !important;
}
.gradio-container .gr-block {
  background-color: #000 !important;
}
.gradio-container textarea, .gradio-container input {
  background-color: #111 !important;
  color: #eee !important;
}
.gradio-json {
  background-color: #111 !important;
  color: #eee !important;
}
#header_text {
  color: #eee !important;
}
"""

demo = gr.Blocks(theme=theme, css=dark_css)

with demo:
    gr.Markdown("#Hallucination Detector", elem_id="header_text")
    gr.Markdown(
        "Detects **intrinsic** (internal contradictions) and **extrinsic** "
        "(source unsupported) hallucinations",
        elem_id="header_text"
    )
    gen = gr.Textbox(lines=8, label="Generated Text")
    src = gr.Textbox(lines=8, label="Source Text")
    out = gr.JSON(label="🔍 Analysis Result (JSON)")

    gen.submit(detect_hallucinations, inputs=[gen, src], outputs=out)
    src.submit(detect_hallucinations, inputs=[gen, src], outputs=out)
    gr.Button("Run Analysis").click(detect_hallucinations, inputs=[gen, src], outputs=out)

demo.launch()