Spaces:

michaellupo74
/

grants-rag

Running

grants-rag / app /sources /grantsgov_api.py

Gen. Overseer Lupo

Add local extra JSON source and update config

99589b3 2 days ago

5.46 kB

	# app/sources/grantsgov_api.py
	from __future__ import annotations
	from typing import Dict, List, Any, Optional
	from datetime import datetime
	import requests

	# Official Grants.gov Search2 endpoint (JSON POST)
	API_URL = "https://api.grants.gov/v1/api/search2"

	def _coerce_pipe(v: Any) -> str:
	"""Accept list/tuple/set/str/None and return pipe-delimited string."""
	if v is None:
	return ""
	if isinstance(v, (list, tuple, set)):
	return "\|".join([str(x) for x in v if x])
	return str(v)

	def _first(x: Any) -> Optional[str]:
	if isinstance(x, (list, tuple)) and x:
	return str(x[0])
	return str(x) if x is not None else None

	def _parse_date(d: Any) -> Optional[str]:
	"""Return YYYY-MM-DD or None (be tolerant to formats)."""
	if not d:
	return None
	s = str(d)
	# common formats seen in the API
	for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f"):
	try:
	return datetime.strptime(s, fmt).date().isoformat()
	except Exception:
	pass
	try:
	return datetime.fromisoformat(s).date().isoformat()
	except Exception:
	return None

	# Map common config keys → API keys so older configs still work
	_KEY_MAP = {
	"opportunityStatuses": "oppStatuses",
	"agencyCodes": "agencies",
	"agencies": "agencies",
	"alns": "aln",
	}

	def _remap_payload_keys(payload: Dict[str, Any]) -> Dict[str, Any]:
	out = dict(payload or {})
	for k, v in list(out.items()):
	if k in _KEY_MAP:
	out[_KEY_MAP[k]] = v
	return out

	def search_grants(
	_unused_url: str,
	payload: Dict[str, Any],
	page_size: int = 100,
	max_pages: int = 10,
	timeout: int = 30,
	) -> Dict[str, Any]:
	"""
	Calls Grants.gov Search2 API with pagination and returns normalized results:

	Returns:
	{
	"hits": [ { unified schema per record }, ... ],
	"hitCount": int
	}
	"""
	all_hits: List[Dict[str, Any]] = []
	start = 0
	pages = 0
	hit_count: Optional[int] = None

	# Bridge payload keys and coerce to API expectations
	payload = _remap_payload_keys(payload or {})
	keyword = payload.get("keyword", "") or payload.get("keywords", "")
	oppNum = payload.get("oppNum", "")
	eligibilities = _coerce_pipe(payload.get("eligibilities", ""))
	agencies = _coerce_pipe(payload.get("agencies", ""))
	oppStatuses = _coerce_pipe(payload.get("oppStatuses", "")) or "forecasted\|posted"
	aln = _coerce_pipe(payload.get("aln", ""))
	fundingCategories = _coerce_pipe(payload.get("fundingCategories", ""))

	session = requests.Session()
	headers = {"Content-Type": "application/json"}

	while pages < max_pages:
	req_body = {
	"rows": page_size,
	"startRecordNum": start, # pagination
	"keyword": keyword,
	"oppNum": oppNum,
	"eligibilities": eligibilities,
	"agencies": agencies,
	"oppStatuses": oppStatuses,
	"aln": aln,
	"fundingCategories": fundingCategories,
	}

	resp = session.post(API_URL, json=req_body, headers=headers, timeout=timeout)
	resp.raise_for_status()
	j = resp.json() or {}

	data = j.get("data") or {}
	if hit_count is None:
	try:
	hit_count = int(data.get("hitCount", 0))
	except Exception:
	hit_count = 0

	opp_hits = data.get("oppHits") or []
	if not opp_hits:
	break

	# ---- Normalize each record to unified schema ----
	for h in opp_hits:
	gg_id = h.get("id")
	num = h.get("number")
	aln_list = h.get("alnist", []) or []

	norm = {
	# unified schema (stable id avoids duplicates across configs)
	"id": f"gg:{num or gg_id}",
	"source": "grants.gov",
	"title": h.get("title"),
	"agency": h.get("agencyName") or h.get("agencyCode"),
	"program_number": _first(aln_list), # Assistance Listing (ALN/CFDA)
	"posted_date": _parse_date(h.get("openDate")),
	"deadline": _parse_date(h.get("closeDate")),
	"synopsis": h.get("synopsis") or h.get("summary"),
	"location_scope": ["US"], # Grants.gov is US-wide by default
	"tags": [], # to be extended by ingest with config categories
	"url": f"https://www.grants.gov/search-results-detail/{gg_id}" if gg_id else None,
	"raw": h, # keep full source blob for traceability
	}

	# Optional award fields if present (keep None if absent)
	if "awardFloor" in h:
	norm["award_floor"] = h.get("awardFloor")
	if "awardCeiling" in h:
	norm["award_ceiling"] = h.get("awardCeiling")
	if "expectedNumberOfAwards" in h:
	norm["expected_awards"] = h.get("expectedNumberOfAwards")
	if "eligibility" in h:
	norm["eligibility"] = h.get("eligibility")

	all_hits.append(norm)

	got = len(opp_hits)
	start += got
	pages += 1
	if hit_count is not None and start >= hit_count:
	break

	return {"hits": all_hits, "hitCount": hit_count or 0}