Spaces:

michaellupo74
/

grants-rag

Running

File size: 5,460 Bytes

# app/sources/grantsgov_api.py
from __future__ import annotations
from typing import Dict, List, Any, Optional
from datetime import datetime
import requests

# Official Grants.gov Search2 endpoint (JSON POST)
API_URL = "https://api.grants.gov/v1/api/search2"

def _coerce_pipe(v: Any) -> str:
    """Accept list/tuple/set/str/None and return pipe-delimited string."""
    if v is None:
        return ""
    if isinstance(v, (list, tuple, set)):
        return "|".join([str(x) for x in v if x])
    return str(v)

def _first(x: Any) -> Optional[str]:
    if isinstance(x, (list, tuple)) and x:
        return str(x[0])
    return str(x) if x is not None else None

def _parse_date(d: Any) -> Optional[str]:
    """Return YYYY-MM-DD or None (be tolerant to formats)."""
    if not d:
        return None
    s = str(d)
    # common formats seen in the API
    for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f"):
        try:
            return datetime.strptime(s, fmt).date().isoformat()
        except Exception:
            pass
    try:
        return datetime.fromisoformat(s).date().isoformat()
    except Exception:
        return None

# Map common config keys → API keys so older configs still work
_KEY_MAP = {
    "opportunityStatuses": "oppStatuses",
    "agencyCodes": "agencies",
    "agencies": "agencies",
    "alns": "aln",
}

def _remap_payload_keys(payload: Dict[str, Any]) -> Dict[str, Any]:
    out = dict(payload or {})
    for k, v in list(out.items()):
        if k in _KEY_MAP:
            out[_KEY_MAP[k]] = v
    return out

def search_grants(
    _unused_url: str,
    payload: Dict[str, Any],
    page_size: int = 100,
    max_pages: int = 10,
    timeout: int = 30,
) -> Dict[str, Any]:
    """
    Calls Grants.gov Search2 API with pagination and returns normalized results:

    Returns:
        {
          "hits": [ { unified schema per record }, ... ],
          "hitCount": int
        }
    """
    all_hits: List[Dict[str, Any]] = []
    start = 0
    pages = 0
    hit_count: Optional[int] = None

    # Bridge payload keys and coerce to API expectations
    payload = _remap_payload_keys(payload or {})
    keyword = payload.get("keyword", "") or payload.get("keywords", "")
    oppNum = payload.get("oppNum", "")
    eligibilities = _coerce_pipe(payload.get("eligibilities", ""))
    agencies     = _coerce_pipe(payload.get("agencies", ""))
    oppStatuses  = _coerce_pipe(payload.get("oppStatuses", "")) or "forecasted|posted"
    aln          = _coerce_pipe(payload.get("aln", ""))
    fundingCategories = _coerce_pipe(payload.get("fundingCategories", ""))

    session = requests.Session()
    headers = {"Content-Type": "application/json"}

    while pages < max_pages:
        req_body = {
            "rows": page_size,
            "startRecordNum": start,  # pagination
            "keyword": keyword,
            "oppNum": oppNum,
            "eligibilities": eligibilities,
            "agencies": agencies,
            "oppStatuses": oppStatuses,
            "aln": aln,
            "fundingCategories": fundingCategories,
        }

        resp = session.post(API_URL, json=req_body, headers=headers, timeout=timeout)
        resp.raise_for_status()
        j = resp.json() or {}

        data = j.get("data") or {}
        if hit_count is None:
            try:
                hit_count = int(data.get("hitCount", 0))
            except Exception:
                hit_count = 0

        opp_hits = data.get("oppHits") or []
        if not opp_hits:
            break

        # ---- Normalize each record to unified schema ----
        for h in opp_hits:
            gg_id = h.get("id")
            num   = h.get("number")
            aln_list = h.get("alnist", []) or []

            norm = {
                # unified schema (stable id avoids duplicates across configs)
                "id": f"gg:{num or gg_id}",
                "source": "grants.gov",
                "title": h.get("title"),
                "agency": h.get("agencyName") or h.get("agencyCode"),
                "program_number": _first(aln_list),          # Assistance Listing (ALN/CFDA)
                "posted_date": _parse_date(h.get("openDate")),
                "deadline": _parse_date(h.get("closeDate")),
                "synopsis": h.get("synopsis") or h.get("summary"),
                "location_scope": ["US"],                    # Grants.gov is US-wide by default
                "tags": [],                                  # to be extended by ingest with config categories
                "url": f"https://www.grants.gov/search-results-detail/{gg_id}" if gg_id else None,
                "raw": h,                                    # keep full source blob for traceability
            }

            # Optional award fields if present (keep None if absent)
            if "awardFloor" in h:
                norm["award_floor"] = h.get("awardFloor")
            if "awardCeiling" in h:
                norm["award_ceiling"] = h.get("awardCeiling")
            if "expectedNumberOfAwards" in h:
                norm["expected_awards"] = h.get("expectedNumberOfAwards")
            if "eligibility" in h:
                norm["eligibility"] = h.get("eligibility")

            all_hits.append(norm)

        got = len(opp_hits)
        start += got
        pages += 1
        if hit_count is not None and start >= hit_count:
            break

    return {"hits": all_hits, "hitCount": hit_count or 0}