Spaces:
Runtime error
Runtime error
from googlesearch import search | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
from functools import lru_cache | |
# Clean HTML tags | |
TAG_CLEANER = re.compile(r"<[^>]+>") | |
def extract_metadata(url): | |
"""Extract title and description from URL""" | |
try: | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)', | |
'Accept-Language': 'en-US,en;q=0.9' | |
} | |
response = requests.get(url, timeout=5, headers=headers) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'lxml') | |
title = soup.title.string.strip() if soup.title else url | |
title = title[:200] | |
# Try to get description | |
description = "" | |
if meta_desc := soup.find("meta", attrs={"name": "description"}): | |
description = meta_desc.get("content", "")[:300] | |
return { | |
"url": url, | |
"title": title, | |
"description": description | |
} | |
except Exception as e: | |
return {"url": url, "title": f"Error: {str(e)[:30]}", "description": ""} | |
def search_google(query, num_results=5): | |
"""Search with enhanced result parsing""" | |
try: | |
# Get search results | |
urls = list(search(query, num_results=num_results, advanced=False)) | |
# Extract metadata for each URL | |
results = [] | |
for url in urls[:num_results]: | |
if metadata := extract_metadata(url): | |
results.append(metadata) | |
return results | |
except Exception as e: | |
print(f"Search error: {e}") | |
return [] |