import pandas as pd import numpy as np import torch from textblob import TextBlob import openai import gradio as gr from pinecone import Pinecone from langdetect import detect import os OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") openai.api_key = OPENAI_API_KEY os.environ['LANGCHAIN_VERBOSE'] = 'true' pc = Pinecone(api_key=PINECONE_API_KEY, environment="us-east-1") def get_embedding(text, model="text-embedding-ada-002"): return openai.embeddings.create(input=[text], model=model).data[0].embedding def check_and_correct_spelling(query): blob = TextBlob(query) corrected_query = str(blob.correct()) return corrected_query def correct_and_complete_query(text): blob = TextBlob(text) corrected_text = str(blob.correct()) # Use OpenAI to complete the query completion_prompt = f"Complete the following query in a way that is related to product search: '{corrected_text}'" response = openai.completions.create( model="gpt-3.5-turbo-instruct", prompt=completion_prompt, max_tokens=100, temperature=0.5 ) return response.choices[0].text.strip() def translate_to_english(text): if detect(text) != 'en': translation_prompt = f"Translate the following text to English:\n\n'{text}'" response = openai.completions.create( model="gpt-3.5-turbo-instruct", prompt=translation_prompt, max_tokens=100, temperature=0.5 ) return response.choices[0].text.strip() return text def is_query_relevant(query, relevant_keywords): for keyword in relevant_keywords: if keyword.lower() in query.lower(): return True return False def process_query(query): query = check_and_correct_spelling(query) query = correct_and_complete_query(query) query = translate_to_english(query) # Step 4: Check if the query is relevant # if not is_query_relevant(query): # return "The query is not relevant. Please enter a different query." return query def search_in_pinecone2(query): processed_query = process_query(query) embedding = get_embedding(query) search_results = index.query(vector=embedding, top_k=5, include_metadata=True) result_strings = [] for result in search_results['matches']: product_name = result['metadata'].get('product_name', 'No name available') product_link = result['metadata'].get('product_url', 'No link available') score = result['score'] result_string = f"Product: {product_name}\nLink: {product_link}\nScore: {score}\n" result_strings.append(result_string) return "\n".join(result_strings) index = pc.Index('zepto') interface = gr.Interface( fn=search_in_pinecone2, inputs=gr.Textbox(label="Enter your query"), outputs=gr.Textbox(label="Top 5 Similar Products"), title="Product Similarity Search", description="Enter a query to find the top 5 similar products based on your search." ) # Launch the interface interface.launch(debug = True, share = True)