File size: 5,460 Bytes
598f5cb
1e417f3
99589b3
 
598f5cb
 
99589b3
 
598f5cb
1e417f3
99589b3
1e417f3
 
 
 
 
598f5cb
99589b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598f5cb
99589b3
 
 
 
 
 
 
598f5cb
1e417f3
 
 
99589b3
1e417f3
99589b3
 
1e417f3
 
 
99589b3
 
 
1e417f3
 
 
 
 
 
 
 
99589b3
1e417f3
 
 
 
 
 
 
 
 
 
 
99589b3
1e417f3
99589b3
1e417f3
99589b3
 
 
 
1e417f3
99589b3
 
 
 
 
1e417f3
99589b3
 
 
 
 
 
 
1e417f3
99589b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e417f3
 
 
 
99589b3
1e417f3
598f5cb
1e417f3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# app/sources/grantsgov_api.py
from __future__ import annotations
from typing import Dict, List, Any, Optional
from datetime import datetime
import requests

# Official Grants.gov Search2 endpoint (JSON POST)
API_URL = "https://api.grants.gov/v1/api/search2"

def _coerce_pipe(v: Any) -> str:
    """Accept list/tuple/set/str/None and return pipe-delimited string."""
    if v is None:
        return ""
    if isinstance(v, (list, tuple, set)):
        return "|".join([str(x) for x in v if x])
    return str(v)

def _first(x: Any) -> Optional[str]:
    if isinstance(x, (list, tuple)) and x:
        return str(x[0])
    return str(x) if x is not None else None

def _parse_date(d: Any) -> Optional[str]:
    """Return YYYY-MM-DD or None (be tolerant to formats)."""
    if not d:
        return None
    s = str(d)
    # common formats seen in the API
    for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f"):
        try:
            return datetime.strptime(s, fmt).date().isoformat()
        except Exception:
            pass
    try:
        return datetime.fromisoformat(s).date().isoformat()
    except Exception:
        return None

# Map common config keys → API keys so older configs still work
_KEY_MAP = {
    "opportunityStatuses": "oppStatuses",
    "agencyCodes": "agencies",
    "agencies": "agencies",
    "alns": "aln",
}

def _remap_payload_keys(payload: Dict[str, Any]) -> Dict[str, Any]:
    out = dict(payload or {})
    for k, v in list(out.items()):
        if k in _KEY_MAP:
            out[_KEY_MAP[k]] = v
    return out

def search_grants(
    _unused_url: str,
    payload: Dict[str, Any],
    page_size: int = 100,
    max_pages: int = 10,
    timeout: int = 30,
) -> Dict[str, Any]:
    """
    Calls Grants.gov Search2 API with pagination and returns normalized results:

    Returns:
        {
          "hits": [ { unified schema per record }, ... ],
          "hitCount": int
        }
    """
    all_hits: List[Dict[str, Any]] = []
    start = 0
    pages = 0
    hit_count: Optional[int] = None

    # Bridge payload keys and coerce to API expectations
    payload = _remap_payload_keys(payload or {})
    keyword = payload.get("keyword", "") or payload.get("keywords", "")
    oppNum = payload.get("oppNum", "")
    eligibilities = _coerce_pipe(payload.get("eligibilities", ""))
    agencies     = _coerce_pipe(payload.get("agencies", ""))
    oppStatuses  = _coerce_pipe(payload.get("oppStatuses", "")) or "forecasted|posted"
    aln          = _coerce_pipe(payload.get("aln", ""))
    fundingCategories = _coerce_pipe(payload.get("fundingCategories", ""))

    session = requests.Session()
    headers = {"Content-Type": "application/json"}

    while pages < max_pages:
        req_body = {
            "rows": page_size,
            "startRecordNum": start,  # pagination
            "keyword": keyword,
            "oppNum": oppNum,
            "eligibilities": eligibilities,
            "agencies": agencies,
            "oppStatuses": oppStatuses,
            "aln": aln,
            "fundingCategories": fundingCategories,
        }

        resp = session.post(API_URL, json=req_body, headers=headers, timeout=timeout)
        resp.raise_for_status()
        j = resp.json() or {}

        data = j.get("data") or {}
        if hit_count is None:
            try:
                hit_count = int(data.get("hitCount", 0))
            except Exception:
                hit_count = 0

        opp_hits = data.get("oppHits") or []
        if not opp_hits:
            break

        # ---- Normalize each record to unified schema ----
        for h in opp_hits:
            gg_id = h.get("id")
            num   = h.get("number")
            aln_list = h.get("alnist", []) or []

            norm = {
                # unified schema (stable id avoids duplicates across configs)
                "id": f"gg:{num or gg_id}",
                "source": "grants.gov",
                "title": h.get("title"),
                "agency": h.get("agencyName") or h.get("agencyCode"),
                "program_number": _first(aln_list),          # Assistance Listing (ALN/CFDA)
                "posted_date": _parse_date(h.get("openDate")),
                "deadline": _parse_date(h.get("closeDate")),
                "synopsis": h.get("synopsis") or h.get("summary"),
                "location_scope": ["US"],                    # Grants.gov is US-wide by default
                "tags": [],                                  # to be extended by ingest with config categories
                "url": f"https://www.grants.gov/search-results-detail/{gg_id}" if gg_id else None,
                "raw": h,                                    # keep full source blob for traceability
            }

            # Optional award fields if present (keep None if absent)
            if "awardFloor" in h:
                norm["award_floor"] = h.get("awardFloor")
            if "awardCeiling" in h:
                norm["award_ceiling"] = h.get("awardCeiling")
            if "expectedNumberOfAwards" in h:
                norm["expected_awards"] = h.get("expectedNumberOfAwards")
            if "eligibility" in h:
                norm["eligibility"] = h.get("eligibility")

            all_hits.append(norm)

        got = len(opp_hits)
        start += got
        pages += 1
        if hit_count is not None and start >= hit_count:
            break

    return {"hits": all_hits, "hitCount": hit_count or 0}