File size: 5,064 Bytes
f448bd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import streamlit as st
import time
import pandas as pd
import io
import zipfile
from gliner import GLiNER
from streamlit_extras.stylable_container import stylable_container
import plotly.express as px
import os
from comet_ml import Experiment

COMET_API_KEY = os.environ.get("COMET_API_KEY")
COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")

if COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME:
    comet_initialized = True
else:
    comet_initialized = False
    st.warning("Comet ML not initialized. Check environment variables.")

text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", key='my_text_area')

if st.button("Results"):
    with st.spinner("Wait for it...", show_time=True):
        time.sleep(5)
        model = GLiNER.from_pretrained("xomad/gliner-model-merge-large-v1.0")
        labels = ["person", "country", "city", "organization", "date", "money", "percent value", "position"]
        entities = model.predict_entities(text, labels)
        df = pd.DataFrame(entities)

        if comet_initialized:
            experiment = Experiment(
                api_key=COMET_API_KEY,
                workspace=COMET_WORKSPACE,
                project_name=COMET_PROJECT_NAME,
            )
            experiment.log_parameter("input_text", text)
            experiment.log_table("predicted_entities", df)

        properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
        df_styled = df.style.set_properties(**properties)
        st.dataframe(df_styled)

        with st.expander("See Glossary of tags"):
            st.write('''
            '**text**': ['entity extracted from your text data']
            '**score**': ['accuracy score; how accurately a tag has been assigned to a given entity']
            '**label**': ['label (tag) assigned to a given extracted entity']
            '**start**': ['index of the start of the corresponding entity']
            '**end**': ['index of the end of the corresponding entity']
            ''')

        if df is not None:
            fig = px.treemap(df, path=[px.Constant("all"), 'text', 'label'],
                                 values='score', color='label')
            fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
            st.subheader("Tree map", divider = "red")
            st.plotly_chart(fig)
            if comet_initialized:
                experiment.log_figure(figure=fig, figure_name="entity_treemap")

        if df is not None:
            value_counts1 = df['label'].value_counts()
            df1 = pd.DataFrame(value_counts1)
            final_df = df1.reset_index().rename(columns={"index": "label"})
            col1, col2 = st.columns(2)
            with col1:
                fig1 = px.pie(final_df, values='count', names='label', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted labels')
                fig1.update_traces(textposition='inside', textinfo='percent+label')
                st.subheader("Pie Chart", divider = "red")
                st.plotly_chart(fig1)
                if comet_initialized:
                    experiment.log_figure(figure=fig1, figure_name="label_pie_chart")
            with col2:
                fig2 = px.bar(final_df, x="count", y="label", color="label", text_auto=True, title='Occurrences of predicted labels')
                st.subheader("Bar Chart", divider = "red")
                st.plotly_chart(fig2)
                if comet_initialized:
                    experiment.log_figure(figure=fig2, figure_name="label_bar_chart")

        dfa = pd.DataFrame(
            data={
                'text': ['entity extracted from your text data'], 'score': ['accuracy score; how accurately a tag has been assigned to a given entity'], 'label': ['label (tag) assigned to a given extracted entity'],
                'start': ['index of the start of the corresponding entity'],
                'end': ['index of the end of the corresponding entity'],
                })
        buf = io.BytesIO()
        with zipfile.ZipFile(buf, "w") as myzip:
            myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
            myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
            if comet_initialized:
                myzip.writestr("Summary of the results_glossary_combined.csv", pd.concat([df, dfa]).to_csv(index=False))

        with stylable_container(
            key="download_button",
            css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",
        ):
            st.download_button(
                label="Download zip file",
                data=buf.getvalue(),
                file_name="zip file.zip",
                mime="application/zip",
            )
            if comet_initialized:
                experiment.log_asset(buf.getvalue(), file_name="downloadable_results.zip")

        st.divider()
        if comet_initialized:
            experiment.end()