File size: 3,548 Bytes
4fa700e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d198c7d
4fa700e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
from sentence_transformers import SentenceTransformer
import torch

# Load the model
model = SentenceTransformer("google/embeddinggemma-300m")

def find_similar_documents(query, documents):
    if not query.strip():
        return "Please enter a query."
    if not documents.strip():
        return "Please enter documents (one per line)."
    
    # Split documents by lines
    doc_list = [doc.strip() for doc in documents.split('\n') if doc.strip()]
    
    if not doc_list:
        return "Please enter at least one document."
    
    # Encode query and documents
    query_embeddings = model.encode_query(query)
    document_embeddings = model.encode_document(doc_list)
    
    # Compute similarities
    similarities = model.similarity(query_embeddings, document_embeddings)
    
    # Sort documents by similarity score
    sorted_indices = torch.argsort(similarities[0], descending=True)
    
    # Format results
    results = []
    for i, idx in enumerate(sorted_indices):
        score = similarities[0][idx].item()
        doc = doc_list[idx]
        results.append(f"{i+1}. Score: {score:.4f}\n   Document: {doc}")
    
    return "\n\n".join(results)

with gr.Blocks(title="Document Similarity Search", theme=gr.themes.Default(primary_hue="blue", secondary_hue="indigo", neutral_hue="zinc", font=[gr.themes.GoogleFont("Roboto"), "Arial", "sans-serif"])) as demo:
    gr.Markdown("# Document Similarity Search")
    gr.Markdown("Find the most relevant documents for your query using Google's Embedding Gemma model.")
    
    with gr.Row():
        with gr.Column(scale=1):
            query_input = gr.Textbox(
                label="Query",
                placeholder="Enter your search query...",
                lines=2
            )
            
            documents_input = gr.Textbox(
                label="Documents",
                placeholder="Enter documents (one per line)...",
                lines=10
            )
            
            search_btn = gr.Button("Search", variant="primary")
        
        with gr.Column(scale=1):
            output = gr.Textbox(
                label="Results",
                lines=15,
                show_copy_button=True
            )
    
    # Example usage
    gr.Examples(
        examples=[
            [
                "Which planet is known as the Red Planet?",
                "Venus is often called Earth's twin because of its similar size and proximity.\nMars, known for its reddish appearance, is often referred to as the Red Planet.\nJupiter, the largest planet in our solar system, has a prominent red spot.\nSaturn, famous for its rings, is sometimes mistaken for the Red Planet."
            ],
            [
                "What causes seasons on Earth?",
                "The tilt of Earth's axis causes different parts of the planet to receive varying amounts of sunlight throughout the year.\nThe moon's gravitational pull affects ocean tides but not seasons.\nEarth's orbit around the sun is slightly elliptical, but this has minimal effect on seasons.\nThe rotation of Earth on its axis causes day and night cycles."
            ]
        ],
        inputs=[query_input, documents_input]
    )
    
    search_btn.click(
        fn=find_similar_documents,
        inputs=[query_input, documents_input],
        outputs=output
    )
    
    # Allow Enter key to trigger search
    query_input.submit(
        fn=find_similar_documents,
        inputs=[query_input, documents_input],
        outputs=output
    )

demo.launch()