import os import gradio as gr import redis import numpy as np import json from openai import AzureOpenAI from sentence_transformers import SentenceTransformer # Redis Cloud connection redis_client = redis.Redis( host="redis-12628.c14.us-east-1-2.ec2.redns.redis-cloud.com", port=12628, decode_responses=True, username="default", password=os.getenv("REDIS_PASSWORD") ) # ๐Ÿงน Clear Redis DB on startup redis_client.flushdb() # Azure OpenAI client (only for chat, not embeddings anymore) client = AzureOpenAI( api_key=os.getenv("AZURE_OPENAI_API_KEY").strip(), api_version="2025-01-01-preview", azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip() ) CHAT_DEPLOYMENT = "gpt-4.1" # your Azure chat deployment # ๐Ÿš€ Better embedding model from HF embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") # Helper: get embedding from HF def get_embedding(text): return embedder.encode(text, convert_to_numpy=True).astype(np.float32) # Helper: cosine similarity def cosine_similarity(vec1, vec2): return float(np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))) def search_cache(user_input, threshold=0.8): query_vec = get_embedding(user_input) best_key, best_score, best_val = None, -1, None for key, val in redis_client.hgetall("cache").items(): entry = json.loads(val) vec = np.array(entry["embedding"], dtype=np.float32) score = cosine_similarity(query_vec, vec) if score > best_score: best_score, best_key, best_val = score, key, entry["output"] if best_score >= threshold: return best_val return None def store_cache(user_input, output): vec = get_embedding(user_input).tolist() redis_client.hset("cache", user_input, json.dumps({ "embedding": vec, "output": output })) def chat_with_ai(user_input): if not user_input: return "Please type something." # ๐Ÿ” Check Redis semantic cache cached = search_cache(user_input) if cached: return f"[From Redis] {cached}" # Otherwise query Azure OpenAI response = client.chat.completions.create( model=CHAT_DEPLOYMENT, messages=[{"role": "user", "content": user_input}], temperature=0.8, max_tokens=700 ) output = response.choices[0].message.content.strip() # ๐Ÿ’พ Save with embedding in Redis store_cache(user_input, output) return f"[From OpenAI] {output}" # Gradio UI with gr.Blocks(title="Azure OpenAI + Redis Cloud Chat") as demo: gr.Markdown("# ๐Ÿ’ฌ Azure OpenAI + Redis Cloud (Semantic Cache) Demo") with gr.Row(): chatbot = gr.Chatbot(type="messages") with gr.Row(): msg = gr.Textbox(placeholder="Type your message here...") send = gr.Button("Send") def respond(message, history): bot_reply = chat_with_ai(message) history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": bot_reply}) return history, "" send.click(respond, [msg, chatbot], [chatbot, msg]) msg.submit(respond, [msg, chatbot], [chatbot, msg]) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, pwa=True)