|
from typing import List, Callable |
|
from duckduckgo_search import DDGS |
|
import re |
|
import time |
|
|
|
|
|
def tighten(q: str) -> str: |
|
quoted = re.findall(r'"([^"]+)"', q) |
|
caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', q) |
|
short = " ".join(quoted + caps) |
|
return short or q |
|
|
|
def _raw_search(query: str, max_results: int = 5) -> List[str]: |
|
"""Internal function that performs the actual DuckDuckGo search.""" |
|
with DDGS() as ddgs: |
|
raw = list(ddgs.text(query, max_results=max_results)) |
|
out = [] |
|
for r in raw: |
|
try: |
|
title = r.get("title", "") |
|
link = r.get("href") or r.get("link", "") |
|
snippet = r.get("body") or r.get("snippet", "") |
|
out.append(f"{title} – {link}\n{snippet}") |
|
except Exception: |
|
pass |
|
return out |
|
|
|
def retry_ddg( |
|
query: str, |
|
max_results: int = 5, |
|
attempts: int = 4, |
|
delay_sec: int = 10, |
|
search_fn: Callable[[str, int], List[str]] = _raw_search, |
|
) -> List[str]: |
|
""" |
|
Retry DuckDuckGo search up to *attempts* times, waiting *delay_sec* seconds |
|
between attempts if no results were returned or an exception was raised. |
|
|
|
Parameters |
|
---------- |
|
query : str |
|
Search query. |
|
max_results : int, default 5 |
|
Number of results to return. |
|
attempts : int, default 4 |
|
Maximum number of attempts before giving up. |
|
delay_sec : int, default 10 |
|
Seconds to sleep between attempts. |
|
search_fn : Callable |
|
A function with signature (query: str, max_results: int) -> List[str]. |
|
Defaults to _raw_search. |
|
|
|
Returns |
|
------- |
|
List[str] |
|
List of result strings; may be empty if every attempt failed. |
|
""" |
|
last_err = None |
|
for i in range(1, attempts + 1): |
|
try: |
|
results = search_fn(query, max_results) |
|
if results: |
|
return results |
|
print(f"Attempt {i}/{attempts}: no results, retrying in {delay_sec}s…") |
|
except Exception as e: |
|
last_err = e |
|
print(f"Attempt {i}/{attempts} failed: {e}. Retrying in {delay_sec}s…") |
|
|
|
if i < attempts: |
|
time.sleep(delay_sec) |
|
|
|
|
|
if last_err: |
|
print(f"All {attempts} attempts failed. Last exception: {last_err}") |
|
else: |
|
print(f"All {attempts} attempts returned empty results.") |
|
return [] |
|
|
|
|
|
def simple_search(query: str, max_results: int = 5) -> List[str]: |
|
""" |
|
Perform a web search using DuckDuckGo and return formatted results. |
|
Includes retry logic and better error handling. |
|
""" |
|
def _raw_search(q: str, max_results: int) -> List[str]: |
|
try: |
|
|
|
if not q or not q.strip(): |
|
print("Warning: Empty search query") |
|
return [] |
|
|
|
|
|
q = q.strip() |
|
if len(q) < 2: |
|
print("Warning: Query too short") |
|
return [] |
|
|
|
with DDGS() as ddgs: |
|
results = [] |
|
for r in ddgs.text(q, max_results=max_results): |
|
|
|
title = r.get('title', 'No title') |
|
link = r.get('link', r.get('href', 'No link')) |
|
body = r.get('body', r.get('snippet', 'No description')) |
|
|
|
|
|
result = f"{title} – {link}\n{body}" |
|
results.append(result) |
|
return results |
|
except Exception as e: |
|
print(f"Search error: {str(e)}") |
|
return [] |
|
|
|
|
|
max_attempts = 4 |
|
rate_limit_delay = 20 |
|
|
|
|
|
query = query.strip() |
|
if not query: |
|
print("Error: Empty search query provided") |
|
return [] |
|
|
|
for attempt in range(max_attempts): |
|
try: |
|
results = _raw_search(query, max_results) |
|
if results: |
|
return results |
|
print(f"Attempt {attempt + 1}/{max_attempts}: No results found") |
|
except Exception as e: |
|
error_msg = str(e) |
|
print(f"Attempt {attempt + 1}/{max_attempts} failed: {error_msg}") |
|
|
|
|
|
if "Ratelimit" in error_msg or "202" in error_msg: |
|
print(f"Rate limit detected. Waiting {rate_limit_delay} seconds...") |
|
time.sleep(rate_limit_delay) |
|
elif attempt < max_attempts - 1: |
|
|
|
delay = 30 * (2 ** attempt) |
|
print(f"Retrying in {delay}s...") |
|
time.sleep(delay) |
|
else: |
|
print(f"All {max_attempts} attempts failed. Last exception: {error_msg}") |
|
return [] |
|
|
|
return [] |
|
|