Spaces:

MuzzammilShah
/

NLP-Playground

Sleeping

File size: 8,224 Bytes

3ab6535

import streamlit as st
import torch
from utils.ui_helpers import plot_similarity_heatmap

def show_semantic_search(nlp_engine):
    """Display the semantic search UI component"""
    #st.markdown("🔍🧠")
    st.title("Semantic Search🔍🧠")
    st.markdown("""
    Search for semantically similar texts using sentence embeddings.
    This tool converts text to vector representations and computes similarity.
    """)
    
    # Create tabs for different modes
    tab1, tab2 = st.tabs(["Semantic Search", "Text Similarity"])
    
    # TAB 1: Semantic Search
    with tab1:
        st.markdown("### Search in a Corpus of Texts")
        
        # Corpus input
        corpus_text = st.text_area(
            "Enter corpus texts (one text per line)",
            "The weather is sunny today.\nI enjoy walking in the park on a beautiful day.\nAI is transforming many industries.",
            height=150,
            help="Enter multiple sentences or paragraphs, one per line"
        )
        
        # Convert to list
        corpus = [text.strip() for text in corpus_text.split('\n') if text.strip()]
        
        # Query input
        query = st.text_input(
            "Enter your search query",
            "What is the forecast for today?"
        )
        
        # Process button
        if st.button("Search", key="search_button"):
            if not corpus or not query:
                st.error("Please provide both corpus texts and a search query.")
            else:
                with st.spinner("Computing similarities..."):
                    # Get embeddings
                    query_embedding = nlp_engine.get_embeddings(query)
                    corpus_embeddings = nlp_engine.get_embeddings(corpus)
                    
                    # Ensure query embedding is 2D for comparison
                    if query_embedding.ndim == 1:
                        query_embedding = query_embedding.unsqueeze(0)
                    
                    # Compute similarities
                    similarities = torch.nn.functional.cosine_similarity(
                        query_embedding, 
                        corpus_embeddings, 
                        dim=1
                    ).tolist()
                    
                    # Display results
                    st.markdown("### Search Results")
                    
                    # Plot similarities
                    fig = plot_similarity_heatmap(query, corpus, similarities)
                    st.plotly_chart(fig, use_container_width=True)
                    
                    # Show sorted results in a table
                    results = []
                    for i, (text, score) in enumerate(sorted(zip(corpus, similarities), key=lambda x: x[1], reverse=True)):
                        results.append({
                            "Rank": i + 1,
                            "Text": text,
                            "Similarity": f"{score:.4f}"
                        })
                    
                    st.table(results)
    
    # TAB 2: Text Similarity
    with tab2:
        st.markdown("### Compare Two Texts")
        
        # Text inputs
        col1, col2 = st.columns(2)
        
        with col1:
            text1 = st.text_area(
                "Text 1",
                "The weather forecast predicts rain tomorrow.",
                height=150
            )
        
        with col2:
            text2 = st.text_area(
                "Text 2",
                "According to meteorologists, precipitation is expected the following day.",
                height=150
            )
        
        # Process button
        if st.button("Compare", key="compare_button"):
            if not text1 or not text2:
                st.error("Please provide both texts to compare.")
            else:
                with st.spinner("Computing similarity..."):
                    # Get embeddings
                    embedding1 = nlp_engine.get_embeddings(text1)
                    embedding2 = nlp_engine.get_embeddings(text2)
                    
                    # Ensure embeddings are 2D for comparison
                    if embedding1.ndim == 1:
                        embedding1 = embedding1.unsqueeze(0)
                    if embedding2.ndim == 1:
                        embedding2 = embedding2.unsqueeze(0)
                    
                    # Compute similarity
                    similarity = torch.nn.functional.cosine_similarity(
                        embedding1, 
                        embedding2, 
                        dim=1
                    ).item()
                    
                    # Display result
                    st.markdown("### Similarity Result")
                    
                    # Create a visual representation of similarity
                    similarity_percentage = round(similarity * 100, 2)
                    st.progress(similarity)
                    
                    # Show the similarity score
                    if similarity_percentage > 80:
                        st.success(f"Similarity Score: {similarity_percentage}% (Very Similar)")
                    elif similarity_percentage > 60:
                        st.info(f"Similarity Score: {similarity_percentage}% (Moderately Similar)")
                    elif similarity_percentage > 40:
                        st.warning(f"Similarity Score: {similarity_percentage}% (Somewhat Similar)")
                    else:
                        st.error(f"Similarity Score: {similarity_percentage}% (Not Very Similar)")
    
    # Example section
    with st.expander("Example texts to try"):
        st.markdown("""
        ### Example Corpus for Semantic Search
        
        ```
        Artificial intelligence is revolutionizing healthcare through early disease detection.
        Machine learning algorithms can predict patient outcomes based on historical data.
        The automotive industry is investing heavily in self-driving car technology.
        Climate change is causing more frequent and severe weather events globally.
        Regular exercise and a balanced diet are essential for maintaining good health.
        The global economy faces significant challenges due to supply chain disruptions.
        Renewable energy sources are becoming increasingly cost-competitive with fossil fuels.
        ```
        
        **Example Queries:**
        - How is AI helping doctors?
        - What's happening with autonomous vehicles?
        - How can I stay healthy?
        - What are the economic trends currently?
        
        ### Example Text Pairs for Similarity Comparison
        
        **Similar Pairs:**
        
        Text 1: `The film received positive reviews from critics and performed well at the box office.`
        Text 2: `The movie was praised by reviewers and was commercially successful.`
        
        Text 1: `The company announced a significant increase in quarterly earnings.`
        Text 2: `The firm reported a substantial growth in profits for the last quarter.`
        
        **Dissimilar Pairs:**
        
        Text 1: `The recipe calls for two tablespoons of olive oil and fresh herbs.`
        Text 2: `The basketball game went into overtime after a tied score at the final buzzer.`
        
        Text 1: `Quantum computing leverages principles of quantum mechanics to process information.`
        Text 2: `The annual flower festival attracts thousands of visitors to the botanical gardens.`
        """)
    
    # Information about the model
    with st.expander("About this model"):
        st.markdown("""
        **Model**: `sentence-transformers/all-MiniLM-L6-v2`
        
        This is a sentence transformer model that generates fixed-size embeddings for text.
        
        - **Size**: 80M parameters (smaller than BERT Base)
        - **Embedding Dimension**: 384
        - **Performance**: 58.80% accuracy on Semantic Textual Similarity (STS) benchmark
        
        The model maps sentences or paragraphs to a dense vector space where semantically similar texts are close to each other, enabling semantic search and similarity comparisons.
        """)