import os import asyncio import json import urllib3 import requests from dotenv import load_dotenv from openai import AzureOpenAI # crawl4ai / Playwright from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig from crawl4ai.content_filter_strategy import PruningContentFilter from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator import gradio as gr # --- Disable SSL warnings (keep if your SERPER endpoint dislikes verification) --- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # --- Load .env (also set these as HF Space Secrets) --- load_dotenv() # --- Azure OpenAI client --- client = AzureOpenAI( api_key=os.getenv("AZURE_OPENAI_KEY").strip(), api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2025-01-01-preview"), azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip(), ) SERPER_API_KEY = os.getenv("SERPER_API_KEY").strip() DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT", "gpt-4.1").strip() # Your Azure model deployment name # ------------------------- # Search (Serper) utilities # ------------------------- def search_serper(query: str): """ Returns a short list of {title, snippet, url} for the query. """ if not SERPER_API_KEY: raise RuntimeError("SERPER_API_KEY is not set") headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"} payload = {"q": query} # verify=False because the original code disabled SSL warnings resp = requests.post("https://google.serper.dev/search", headers=headers, json=payload, verify=False) resp.raise_for_status() results = resp.json() out = [] for result in results.get("organic", [])[:3]: out.append({ "title": result.get("title", ""), "snippet": result.get("snippet", ""), "url": result.get("link", "") }) return out # ------------------------- # Crawl utilities # ------------------------- async def crawl_to_markdown(url: str) -> str: """ Crawl a URL and return markdown (fallback to raw if needed). Assumes Playwright + Chromium is available in the Docker image. """ try: browser_conf = BrowserConfig(headless=True, verbose=False) filter_strategy = PruningContentFilter() md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy) run_conf = CrawlerRunConfig(markdown_generator=md_gen) async with AsyncWebCrawler(config=browser_conf) as crawler: result = await crawler.arun(url=url, config=run_conf) return result.markdown.fit_markdown or result.markdown.raw_markdown or "" except Exception as e: return f"Crawl error for {url}: {e}" # ------------------------- # LLM orchestration # ------------------------- async def generate_answer_with_crawling(question: str): """ Deep mode: search + crawl + synthesize with Azure OpenAI. Returns (answer, sources_list) """ try: search_results = search_serper(question) crawled_pieces = [] for r in search_results: url = r["url"] title = r["title"] or url md = await crawl_to_markdown(url) # Keep it small to avoid tokens blow-up snippet = (md or r["snippet"])[:2000] block = f"## {title}\nSource: {url}\n\n{snippet}\n\n" crawled_pieces.append(block) context = "\n".join(crawled_pieces) or "No crawl content available." messages = [ {"role": "system", "content": "You are a helpful assistant that answers questions using detailed web content. Provide citations with URLs when possible."}, {"role": "user", "content": f"Based on the following web content, answer the question. Include relevant citations.\n\nContent:\n{context}\n\nQuestion: {question}"} ] resp = client.chat.completions.create( model=DEPLOYMENT_NAME, messages=messages, temperature=0.8, max_tokens=800, ) answer = resp.choices[0].message.content return answer, search_results except Exception as e: return f"Error (deep): {e}", [] def generate_answer_quick(question: str): """ Quick mode: search snippets only + Azure OpenAI. """ search_results = search_serper(question) snippets = [] for r in search_results: title = r["title"] snippet = r["snippet"] url = r["url"] snippets.append(f"{title}: {snippet} ({url})") context = "\n".join(snippets) or "No search snippets available." messages = [ {"role": "system", "content": "You are a helpful assistant that answers using real-time search context."}, {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"} ] resp = client.chat.completions.create( model=DEPLOYMENT_NAME, messages=messages, temperature=0.8, max_tokens=800, ) return resp.choices[0].message.content, search_results # ------------------------- # Gradio function # ------------------------- async def search_fn(question: str, mode: str): """ Gradio-servable function. Returns: - Markdown answer - JSON of sources """ mode = (mode or "quick").lower() if not question.strip(): return "⚠️ Please enter a question.", "[]" if mode == "deep": answer, sources = await generate_answer_with_crawling(question) else: # run sync function in a thread so the Gradio loop is not blocked answer, sources = await asyncio.to_thread(generate_answer_quick, question) return answer, json.dumps(sources, indent=2) # ------------------------- # Gradio UI # ------------------------- with gr.Blocks(title="Search Assistant") as demo: gr.Markdown("# 🔎 Search Assistant\nAsk a question. Pick **Quick** or **Deep** (crawls the top results).") with gr.Row(): txt = gr.Textbox(label="Your question", placeholder="e.g., What's new in Python 3.12?", lines=3) with gr.Row(): mode = gr.Radio(choices=["quick", "deep"], value="quick", label="Mode") run_btn = gr.Button("Search") with gr.Row(): answer_out = gr.Markdown(label="Answer") with gr.Row(): sources_out = gr.JSON(label="Sources (top 3)") run_btn.click( fn=search_fn, inputs=[txt, mode], outputs=[answer_out, sources_out] ) # Expose API (Gradio does this automatically). In Spaces: # POST /run/predict with {"data": ["your question", "quick"]} if __name__ == "__main__": # In HF Spaces Docker, Gradio is launched by this script. demo.launch(server_name="0.0.0.0", server_port=7860, pwa=True)