File size: 2,483 Bytes
73dcc97
e90cc8b
 
045a203
e90cc8b
73dcc97
e90cc8b
 
 
 
9c05cea
e90cc8b
9c05cea
 
e90cc8b
 
9c05cea
e90cc8b
045a203
9c05cea
e90cc8b
 
9c05cea
e90cc8b
9c05cea
e90cc8b
 
 
9c05cea
 
 
 
 
 
e90cc8b
9c05cea
 
e90cc8b
9c05cea
 
 
e90cc8b
9c05cea
 
e90cc8b
9c05cea
 
 
 
e90cc8b
9c05cea
 
e90cc8b
9c05cea
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import streamlit as st
import zipfile
import os
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

ZIP_FILE = "xnli-multilingual-nli-dataset.zip"
EXTRACT_DIR = "extracted_data"

@st.cache_data
def extract_and_list_csv_files():
    if not os.path.exists(EXTRACT_DIR):
        if not os.path.exists(ZIP_FILE):
            return []
        with zipfile.ZipFile(ZIP_FILE, "r") as zip_ref:
            zip_ref.extractall(EXTRACT_DIR)
    return [f for f in os.listdir(EXTRACT_DIR) if f.endswith('.csv')]

@st.cache_resource
def load_nli_model():
    tokenizer = AutoTokenizer.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
    model = AutoModelForSequenceClassification.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
    return pipeline("text-classification", model=model, tokenizer=tokenizer)

st.set_page_config(page_title="Multilingual NLI Explorer", layout="centered")
st.title("🌍 Multilingual NLI (Natural Language Inference) Explorer")
st.markdown("Upload premise & hypothesis pairs or use the dataset to explore entailment, contradiction, or neutrality.")

csv_files = extract_and_list_csv_files()

if not csv_files:
    st.warning("⚠️ No CSV files found. Please make sure `xnli-multilingual-nli-dataset.zip` is uploaded.")
else:
    selected_csv = st.selectbox("Choose a language CSV file:", csv_files)

    if selected_csv:
        file_path = os.path.join(EXTRACT_DIR, selected_csv)

        try:
            df = pd.read_csv(file_path).dropna()
            sample_df = df.sample(min(5, len(df))).reset_index(drop=True)

            st.subheader("πŸ“„ Sample from Dataset")
            st.dataframe(sample_df[['premise', 'hypothesis', 'label']])

            st.subheader("πŸ” Run Inference")
            index = st.number_input("Select Sample Index", min_value=0, max_value=len(sample_df)-1, value=0, step=1)
            premise = sample_df.loc[index, 'premise']
            hypothesis = sample_df.loc[index, 'hypothesis']

            st.markdown(f"**Premise:** {premise}")
            st.markdown(f"**Hypothesis:** {hypothesis}")

            if st.button("Run NLI Prediction"):
                nli_pipeline = load_nli_model()
                result = nli_pipeline(f"{premise} </s> {hypothesis}")
                st.success(f"**Prediction:** {result[0]['label']} (Score: {result[0]['score']:.2f})")
        except Exception as e:
            st.error(f"❌ Error reading CSV file: {e}")