File size: 2,483 Bytes
73dcc97 e90cc8b 045a203 e90cc8b 73dcc97 e90cc8b 9c05cea e90cc8b 9c05cea e90cc8b 9c05cea e90cc8b 045a203 9c05cea e90cc8b 9c05cea e90cc8b 9c05cea e90cc8b 9c05cea e90cc8b 9c05cea e90cc8b 9c05cea e90cc8b 9c05cea e90cc8b 9c05cea e90cc8b 9c05cea e90cc8b 9c05cea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import streamlit as st
import zipfile
import os
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
ZIP_FILE = "xnli-multilingual-nli-dataset.zip"
EXTRACT_DIR = "extracted_data"
@st.cache_data
def extract_and_list_csv_files():
if not os.path.exists(EXTRACT_DIR):
if not os.path.exists(ZIP_FILE):
return []
with zipfile.ZipFile(ZIP_FILE, "r") as zip_ref:
zip_ref.extractall(EXTRACT_DIR)
return [f for f in os.listdir(EXTRACT_DIR) if f.endswith('.csv')]
@st.cache_resource
def load_nli_model():
tokenizer = AutoTokenizer.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
model = AutoModelForSequenceClassification.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
return pipeline("text-classification", model=model, tokenizer=tokenizer)
st.set_page_config(page_title="Multilingual NLI Explorer", layout="centered")
st.title("π Multilingual NLI (Natural Language Inference) Explorer")
st.markdown("Upload premise & hypothesis pairs or use the dataset to explore entailment, contradiction, or neutrality.")
csv_files = extract_and_list_csv_files()
if not csv_files:
st.warning("β οΈ No CSV files found. Please make sure `xnli-multilingual-nli-dataset.zip` is uploaded.")
else:
selected_csv = st.selectbox("Choose a language CSV file:", csv_files)
if selected_csv:
file_path = os.path.join(EXTRACT_DIR, selected_csv)
try:
df = pd.read_csv(file_path).dropna()
sample_df = df.sample(min(5, len(df))).reset_index(drop=True)
st.subheader("π Sample from Dataset")
st.dataframe(sample_df[['premise', 'hypothesis', 'label']])
st.subheader("π Run Inference")
index = st.number_input("Select Sample Index", min_value=0, max_value=len(sample_df)-1, value=0, step=1)
premise = sample_df.loc[index, 'premise']
hypothesis = sample_df.loc[index, 'hypothesis']
st.markdown(f"**Premise:** {premise}")
st.markdown(f"**Hypothesis:** {hypothesis}")
if st.button("Run NLI Prediction"):
nli_pipeline = load_nli_model()
result = nli_pipeline(f"{premise} </s> {hypothesis}")
st.success(f"**Prediction:** {result[0]['label']} (Score: {result[0]['score']:.2f})")
except Exception as e:
st.error(f"β Error reading CSV file: {e}")
|