File size: 5,202 Bytes
3ab6535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import streamlit as st
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import json
from typing import Dict, List, Any, Union
import torch

def display_json_as_table(data: Union[Dict, List]):
    """Display JSON data as a formatted table"""
    if isinstance(data, list):
        # Convert list of dicts to dataframe if possible
        try:
            df = pd.DataFrame(data)
            st.dataframe(df)
        except:
            # Fall back to JSON display
            st.json(data)
    else:
        # Convert dict to dataframe with keys and values as columns
        try:
            df = pd.DataFrame(list(data.items()), columns=['Key', 'Value'])
            st.dataframe(df)
        except:
            # Fall back to JSON display
            st.json(data)

def format_sentiment_result(result: List[Dict]):
    """Format sentiment analysis result for display"""
    if not result:
        return {}
    
    # Get the first result (usually there's only one)
    item = result[0]
    
    # Create a dataframe for display
    df = pd.DataFrame({
        'Label': [item['label']],
        'Score': [round(item['score'] * 100, 2)],
    })
    
    # Add visual indicator based on sentiment
    is_positive = item['label'].lower() == 'positive'
    color = 'green' if is_positive else 'red'
    emoji = 'πŸ˜ƒ' if is_positive else '😞'
    
    return {
        'dataframe': df,
        'color': color,
        'emoji': emoji,
        'score': round(item['score'] * 100, 2)
    }

def plot_sentiment_gauge(score: float, color: str):
    """Create a gauge chart for sentiment score"""
    fig = px.pie(
        values=[score, 100-score],
        names=['Score', ''],
        color_discrete_sequence=[color, '#F0F2F6'],
        hole=0.7,
    )
    
    fig.update_layout(
        annotations=[dict(text=f"{score}%", x=0.5, y=0.5, font_size=20, showarrow=False)],
        showlegend=False,
        margin=dict(l=20, r=20, t=20, b=20),
        height=200,
    )
    
    return fig

def format_ner_results(ner_results: List[Dict]):
    """Format NER results for display"""
    # Convert to DataFrame for easier display
    if not ner_results:
        return pd.DataFrame()
    
    entities_data = []
    for entity in ner_results:
        entities_data.append({
            'Entity': entity['word'],
            'Type': entity['entity_group'],
            'Score': round(entity['score'] * 100, 2),
            'Start': entity['start'],
            'End': entity['end']
        })
    
    return pd.DataFrame(entities_data)

def highlight_entities_in_text(text: str, entities: List[Dict]):
    """Highlight entities in text for display"""
    if not entities:
        return text
    
    # Sort entities by start position in reverse order
    # to avoid index shifting when replacing
    sorted_entities = sorted(entities, key=lambda x: x['start'], reverse=True)
    
    # Convert text to HTML with highlights
    html_text = text
    for entity in sorted_entities:
        start = entity['start']
        end = entity['end']
        entity_text = text[start:end]
        entity_type = entity['entity_group']
        
        # Define color based on entity type
        color_map = {
            'PER': '#FFD700',  # Person - Gold
            'ORG': '#98FB98',  # Organization - Pale Green
            'LOC': '#ADD8E6',  # Location - Light Blue
            'MISC': '#FFA07A'   # Miscellaneous - Light Salmon
        }
        
        color = color_map.get(entity_type, '#D3D3D3')  # Default to light gray
        
        # Replace text with highlighted version
        highlight = f'<span style="background-color: {color}; padding: 0px 2px; border-radius: 3px;" title="{entity_type} ({round(entity["score"]*100)}%)">{entity_text}</span>'
        html_text = html_text[:start] + highlight + html_text[end:]
    
    return html_text

def plot_similarity_heatmap(query: str, texts: List[str], similarities: List[float]):
    """Create a bar chart for similarity scores"""
    # Create a DataFrame with the data
    df = pd.DataFrame({
        'Text': texts,
        'Similarity': similarities
    })
    
    # Sort by similarity score in descending order
    df = df.sort_values('Similarity', ascending=False)
    
    # Create the bar chart
    fig = px.bar(
        df,
        x='Similarity',
        y='Text',
        orientation='h',
        title=f'Similarity to: "{query}"',
        labels={'Similarity': 'Cosine Similarity', 'Text': 'Corpus Text'},
        range_x=[0, 1]
    )
    
    fig.update_layout(
        height=300,
        margin=dict(l=20, r=20, t=40, b=20),
    )
    
    return fig

def display_text_with_answer(context: str, answer: Dict):
    """Display context text with highlighted answer"""
    if not answer or 'start' not in answer or 'end' not in answer:
        return context
    
    start = answer['start']
    end = answer['end']
    answer_text = answer['answer']
    
    # Replace the answer with a highlighted version
    highlighted_text = (
        context[:start] +
        f'<span style="background-color: #FFFF00; font-weight: bold;">{answer_text}</span>' +
        context[end:]
    )
    
    return highlighted_text