Spaces:

kishanmaharaj
/

cfilt-reference-based-hallucination-detection

Sleeping

App Files Files Community

cfilt-reference-based-hallucination-detection / app.py

kishanmaharaj

Update app.py

5e2bd12 verified about 1 month ago

raw

history blame contribute delete

4.57 kB

	from transformers import pipeline
	import gradio as gr
	import nltk

	from nltk.tokenize import sent_tokenize
	import torch
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import gradio as gr


	nltk.download("punkt")
	nltk.download('punkt_tab')

	model_name = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli" #"MoritzLaurer/DeBERTa-v3-base-mnli-fever-docnli-ling-2c"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	labels = ["entailment", "neutral", "contradiction"]


	def nli(hypothesis, premise):
	inputs = tokenizer(premise, hypothesis, return_tensors="pt", truncation=True, max_length=512)
	logits = model(**inputs).logits[0]
	probs = torch.softmax(logits, -1).tolist()
	return dict(zip(labels, probs))


	def get_labels(result):
	if result["entailment"]> result["neutral"] and result["entailment"]> result["contradiction"]:
	return "entailment"

	elif result["entailment"]<result["neutral"] and result["contradiction"]<result["neutral"]:
	return "neutral"

	else:
	return "contradiction"





	def detect_hallucinations(generated_text, source_text):
	"""
	Detect intrinsic and extrinsic hallucinations in the generated text.
	"""


	generated_sentences = sent_tokenize(generated_text)
	source_sentences = sent_tokenize(source_text)

	intrinsic = []
	extrinsic = []

	correct_sents = []

	for i in range(len(generated_sentences)):
	for j in range(len(source_sentences)):

	# result = nli([generated_sentences[i], source_sentences[j]])[0]
	prediction = nli(generated_sentences[i], source_sentences[j])
	label = get_labels(prediction)
	score = prediction[label]

	result = {"label": label, "score": score}

	if result['label'].lower() == "contradiction":
	intrinsic.append({
	"generated_sentence": generated_sentences[i],
	"source_sentence": source_sentences[j],
	"contradiction_score": result['score']
	})

	elif result['label'].lower() == "entailment":
	correct_sents.append(generated_sentences[i])
	break


	if result['label'].lower() == "neutral" and generated_sentences[i] not in correct_sents:
	extrinsic.append({
	"claim": generated_sentences[i],
	"source_sentence": source_sentences[j],
	"status": "not_supported",
	"confidence": result['score']
	})



	return {
	"intrinsic": intrinsic,
	"extrinsic": extrinsic
	}

	def gradio_interface(generated_text, source_text):
	result = detect_hallucinations(generated_text, source_text)
	return result

	theme = gr.themes.Soft(primary_hue="teal", secondary_hue="blue", neutral_hue="gray").set(
	body_text_color="*neutral_900",
	block_label_text_color="*neutral_900",
	block_title_text_color="*neutral_900"
	)



	custom_css = """
	.gradio-container { background-color: #ffffff !important; }
	.gradio-json { font-family: 'Fira Code', monospace; font-size: 14px; color: #1f2937 !important; }
	#header_text {
	color: #111 !important;
	"""


	dark_css = """
	.gradio-container {
	background-color: #000 !important;
	color: #eee !important;
	}
	.gradio-container .gr-block {
	background-color: #000 !important;
	}
	.gradio-container textarea, .gradio-container input {
	background-color: #111 !important;
	color: #eee !important;
	}
	.gradio-json {
	background-color: #111 !important;
	color: #eee !important;
	}
	#header_text {
	color: #eee !important;
	}
	"""

	demo = gr.Blocks(theme=theme, css=dark_css)

	with demo:
	gr.Markdown("#Hallucination Detector", elem_id="header_text")
	gr.Markdown(
	"Detects intrinsic (internal contradictions) and extrinsic "
	"(source unsupported) hallucinations",
	elem_id="header_text"
	)
	gen = gr.Textbox(lines=8, label="Generated Text")
	src = gr.Textbox(lines=8, label="Source Text")
	out = gr.JSON(label="🔍 Analysis Result (JSON)")

	gen.submit(detect_hallucinations, inputs=[gen, src], outputs=out)
	src.submit(detect_hallucinations, inputs=[gen, src], outputs=out)
	gr.Button("Run Analysis").click(detect_hallucinations, inputs=[gen, src], outputs=out)

	demo.launch()