| | import gradio as gr |
| |
|
| | from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings, OpenAIEmbeddings |
| | from langchain.vectorstores import Pinecone |
| | import pinecone |
| | import os |
| | os.environ["TOKENIZERS_PARALLELISM"] = "false" |
| |
|
| |
|
| | PINECONE_KEY = os.environ.get("PINECONE_KEY", "") |
| | PINECONE_ENV = os.environ.get("PINECONE_ENV", "us-east-1") |
| | PINECONE_INDEX = os.environ.get("PINECONE_INDEX", '3gpp-r16-hg') |
| |
|
| | EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "hkunlp/instructor-large") |
| | EMBEDDING_LOADER = os.environ.get("EMBEDDING_LOADER", "HuggingFaceInstructEmbeddings") |
| | EMBEDDING_LIST = ["HuggingFaceInstructEmbeddings", "HuggingFaceEmbeddings"] |
| |
|
| | |
| | TOP_K_DEFAULT = 15 |
| | TOP_K_MAX = 30 |
| | SCORE_DEFAULT = 0.33 |
| |
|
| | global g_db |
| | g_db = None |
| |
|
| | def init_db(emb_name, emb_loader, db_api_key, db_env, db_index): |
| |
|
| | embeddings = eval(emb_loader)(model_name=emb_name) |
| |
|
| | pinecone.init(api_key = db_api_key, |
| | environment = db_env) |
| |
|
| | global g_db |
| |
|
| | g_db = Pinecone.from_existing_index(index_name = db_index, |
| | embedding = embeddings) |
| | return str(g_db) |
| |
|
| |
|
| | def get_db(): |
| | return g_db |
| |
|
| |
|
| | def remove_duplicates(documents, score_min): |
| | seen_content = set() |
| | unique_documents = [] |
| | for (doc, score) in documents: |
| | if (doc.page_content not in seen_content) and (score >= score_min): |
| | seen_content.add(doc.page_content) |
| | unique_documents.append(doc) |
| | return unique_documents |
| |
|
| |
|
| | def get_data(query, top_k, score): |
| | if not query: |
| | return "Please init db in configuration" |
| |
|
| | print("Use db: " + str(g_db)) |
| |
|
| | docs = g_db.similarity_search_with_score(query = query, |
| | k=top_k) |
| | |
| | |
| | udocs = remove_duplicates(docs, score) |
| | return udocs |
| |
|
| | with gr.Blocks( |
| | title = "3GPP Database", |
| | theme = "Base", |
| | css = """.bigbox { |
| | min-height:250px; |
| | } |
| | """) as demo: |
| | with gr.Tab("Matching"): |
| | with gr.Accordion("Vector similarity"): |
| | with gr.Row(): |
| | with gr.Column(): |
| | top_k = gr.Slider(1, |
| | TOP_K_MAX, |
| | value=TOP_K_DEFAULT, |
| | step=1, |
| | label="Vector similarity top_k", |
| | interactive=True) |
| | with gr.Column(): |
| | score = gr.Slider(0.01, |
| | 0.99, |
| | value=SCORE_DEFAULT, |
| | step=0.01, |
| | label="Vector similarity score", |
| | interactive=True) |
| |
|
| | with gr.Row(): |
| | inp = gr.Textbox(label = "Input", |
| | placeholder="What are you looking for?") |
| | out = gr.Textbox(label = "Output") |
| |
|
| | btn_run = gr.Button("Run", variant="primary") |
| |
|
| | with gr.Tab("Configuration"): |
| | with gr.Row(): |
| | loading = gr.Textbox(get_db, max_lines=1, show_label=False) |
| | btn_init = gr.Button("Init") |
| | with gr.Accordion("Embedding"): |
| | with gr.Row(): |
| | with gr.Column(): |
| | emb_textbox = gr.Textbox( |
| | label = "Embedding Model", |
| | |
| | value = EMBEDDING_MODEL, |
| | placeholder = "Paste Your Embedding Model Repo on HuggingFace", |
| | lines=1, |
| | interactive=True, |
| | type='email') |
| |
|
| | with gr.Column(): |
| | emb_dropdown = gr.Dropdown( |
| | EMBEDDING_LIST, |
| | value=EMBEDDING_LOADER, |
| | multiselect=False, |
| | interactive=True, |
| | label="Embedding Loader") |
| |
|
| | with gr.Accordion("Pinecone Database"): |
| | with gr.Row(): |
| | db_api_textbox = gr.Textbox( |
| | label = "Pinecone API Key", |
| | |
| | value = PINECONE_KEY, |
| | placeholder = "Paste Your Pinecone API Key (xx-xx-xx-xx-xx) and Hit ENTER", |
| | lines=1, |
| | interactive=True, |
| | type='password') |
| | with gr.Row(): |
| | db_env_textbox = gr.Textbox( |
| | label = "Pinecone Environment", |
| | |
| | value = PINECONE_ENV, |
| | placeholder = "Paste Your Pinecone Environment (xx-xx-xx) and Hit ENTER", |
| | lines=1, |
| | interactive=True, |
| | type='email') |
| | db_index_textbox = gr.Textbox( |
| | label = "Pinecone Index", |
| | |
| | value = PINECONE_INDEX, |
| | placeholder = "Paste Your Pinecone Index (xxxx) and Hit ENTER", |
| | lines=1, |
| | interactive=True, |
| | type='email') |
| |
|
| | btn_init.click(fn=init_db, inputs=[emb_textbox, emb_dropdown, db_api_textbox, db_env_textbox, db_index_textbox], outputs=loading) |
| | btn_run.click(fn=get_data, inputs=[inp, top_k, score], outputs=out) |
| |
|
| | if __name__ == "__main__": |
| | demo.queue() |
| | demo.launch(inbrowser = True) |
| |
|