Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os, requests, asyncio | |
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig | |
from crawl4ai.content_filter_strategy import PruningContentFilter | |
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator | |
from openai import AzureOpenAI | |
from dotenv import load_dotenv | |
load_dotenv() | |
client = AzureOpenAI( | |
api_key=os.getenv("AZURE_OPENAI_KEY").strip(), | |
api_version="2025-01-01-preview", | |
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip() | |
) | |
DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT").strip() | |
SERPER_API_KEY = os.getenv("SERPER_API_KEY").strip() | |
def search_company_interviews(company): | |
headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"} | |
query = f"{company} interview experience site:glassdoor.com OR site:geeksforgeeks.org OR site:prepinsta.com" | |
r = requests.post("https://google.serper.dev/search", headers=headers, json={"q": query}) | |
return [res["link"] for res in r.json().get("organic", [])[:3]] | |
async def crawl_url(url): | |
browser_conf = BrowserConfig(headless=True) | |
filter_strategy = PruningContentFilter() | |
md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy) | |
run_conf = CrawlerRunConfig(markdown_generator=md_gen) | |
async with AsyncWebCrawler(config=browser_conf) as crawler: | |
result = await crawler.arun(url=url, config=run_conf) | |
return result.markdown.fit_markdown or result.markdown.raw_markdown | |
async def fetch_and_summarize(company): | |
urls = search_company_interviews(company) | |
contents = [] | |
for url in urls: | |
contents.append(await crawl_url(url)) | |
context = "\n".join(contents)[:4000] | |
messages = [ | |
{"role": "system", "content": "You summarize interview experiences for job seekers."}, | |
{"role": "user", "content": f"Summarize interview process for {company} based on:\n{context}"} | |
] | |
response = client.chat.completions.create(model=DEPLOYMENT_NAME, messages=messages, max_tokens=800) | |
return response.choices[0].message.content | |
def get_interview_experience(company): | |
return asyncio.run(fetch_and_summarize(company)) | |
with gr.Blocks() as demo: | |
gr.Markdown("## 💼 Interview Process and Expericence Finder") | |
company = gr.Textbox(label="Company Name") | |
output = gr.Textbox(label="Interview Insights", lines=15) | |
btn = gr.Button("Fetch") | |
btn.click(get_interview_experience, inputs=[company], outputs=[output]) | |
if __name__ == "__main__": | |
demo.launch(share=False, server_name="0.0.0.0", server_port=7860, pwa=True) | |