Spaces:
Sleeping
Sleeping
File size: 2,571 Bytes
cf5efab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import gradio as gr
import os, requests, asyncio
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
from crawl4ai.content_filter_strategy import PruningContentFilter
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
from openai import AzureOpenAI
from dotenv import load_dotenv
load_dotenv()
client = AzureOpenAI(
api_key=os.getenv("AZURE_OPENAI_KEY").strip(),
api_version="2025-01-01-preview",
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
)
DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT").strip()
SERPER_API_KEY = os.getenv("SERPER_API_KEY").strip()
def search_company_interviews(company):
headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
query = f"{company} interview experience site:glassdoor.com OR site:geeksforgeeks.org OR site:prepinsta.com"
r = requests.post("https://google.serper.dev/search", headers=headers, json={"q": query})
return [res["link"] for res in r.json().get("organic", [])[:3]]
async def crawl_url(url):
browser_conf = BrowserConfig(headless=True)
filter_strategy = PruningContentFilter()
md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy)
run_conf = CrawlerRunConfig(markdown_generator=md_gen)
async with AsyncWebCrawler(config=browser_conf) as crawler:
result = await crawler.arun(url=url, config=run_conf)
return result.markdown.fit_markdown or result.markdown.raw_markdown
async def fetch_and_summarize(company):
urls = search_company_interviews(company)
contents = []
for url in urls:
contents.append(await crawl_url(url))
context = "\n".join(contents)[:4000]
messages = [
{"role": "system", "content": "You summarize interview experiences for job seekers."},
{"role": "user", "content": f"Summarize interview process for {company} based on:\n{context}"}
]
response = client.chat.completions.create(model=DEPLOYMENT_NAME, messages=messages, max_tokens=800)
return response.choices[0].message.content
def get_interview_experience(company):
return asyncio.run(fetch_and_summarize(company))
with gr.Blocks() as demo:
gr.Markdown("## 💼 Interview Process and Expericence Finder")
company = gr.Textbox(label="Company Name")
output = gr.Textbox(label="Interview Insights", lines=15)
btn = gr.Button("Fetch")
btn.click(get_interview_experience, inputs=[company], outputs=[output])
if __name__ == "__main__":
demo.launch(share=False, server_name="0.0.0.0", server_port=7860, pwa=True)
|