Spaces:
Running
Running
File size: 5,460 Bytes
598f5cb 1e417f3 99589b3 598f5cb 99589b3 598f5cb 1e417f3 99589b3 1e417f3 598f5cb 99589b3 598f5cb 99589b3 598f5cb 1e417f3 99589b3 1e417f3 99589b3 1e417f3 99589b3 1e417f3 99589b3 1e417f3 99589b3 1e417f3 99589b3 1e417f3 99589b3 1e417f3 99589b3 1e417f3 99589b3 1e417f3 99589b3 1e417f3 99589b3 1e417f3 598f5cb 1e417f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# app/sources/grantsgov_api.py
from __future__ import annotations
from typing import Dict, List, Any, Optional
from datetime import datetime
import requests
# Official Grants.gov Search2 endpoint (JSON POST)
API_URL = "https://api.grants.gov/v1/api/search2"
def _coerce_pipe(v: Any) -> str:
"""Accept list/tuple/set/str/None and return pipe-delimited string."""
if v is None:
return ""
if isinstance(v, (list, tuple, set)):
return "|".join([str(x) for x in v if x])
return str(v)
def _first(x: Any) -> Optional[str]:
if isinstance(x, (list, tuple)) and x:
return str(x[0])
return str(x) if x is not None else None
def _parse_date(d: Any) -> Optional[str]:
"""Return YYYY-MM-DD or None (be tolerant to formats)."""
if not d:
return None
s = str(d)
# common formats seen in the API
for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f"):
try:
return datetime.strptime(s, fmt).date().isoformat()
except Exception:
pass
try:
return datetime.fromisoformat(s).date().isoformat()
except Exception:
return None
# Map common config keys → API keys so older configs still work
_KEY_MAP = {
"opportunityStatuses": "oppStatuses",
"agencyCodes": "agencies",
"agencies": "agencies",
"alns": "aln",
}
def _remap_payload_keys(payload: Dict[str, Any]) -> Dict[str, Any]:
out = dict(payload or {})
for k, v in list(out.items()):
if k in _KEY_MAP:
out[_KEY_MAP[k]] = v
return out
def search_grants(
_unused_url: str,
payload: Dict[str, Any],
page_size: int = 100,
max_pages: int = 10,
timeout: int = 30,
) -> Dict[str, Any]:
"""
Calls Grants.gov Search2 API with pagination and returns normalized results:
Returns:
{
"hits": [ { unified schema per record }, ... ],
"hitCount": int
}
"""
all_hits: List[Dict[str, Any]] = []
start = 0
pages = 0
hit_count: Optional[int] = None
# Bridge payload keys and coerce to API expectations
payload = _remap_payload_keys(payload or {})
keyword = payload.get("keyword", "") or payload.get("keywords", "")
oppNum = payload.get("oppNum", "")
eligibilities = _coerce_pipe(payload.get("eligibilities", ""))
agencies = _coerce_pipe(payload.get("agencies", ""))
oppStatuses = _coerce_pipe(payload.get("oppStatuses", "")) or "forecasted|posted"
aln = _coerce_pipe(payload.get("aln", ""))
fundingCategories = _coerce_pipe(payload.get("fundingCategories", ""))
session = requests.Session()
headers = {"Content-Type": "application/json"}
while pages < max_pages:
req_body = {
"rows": page_size,
"startRecordNum": start, # pagination
"keyword": keyword,
"oppNum": oppNum,
"eligibilities": eligibilities,
"agencies": agencies,
"oppStatuses": oppStatuses,
"aln": aln,
"fundingCategories": fundingCategories,
}
resp = session.post(API_URL, json=req_body, headers=headers, timeout=timeout)
resp.raise_for_status()
j = resp.json() or {}
data = j.get("data") or {}
if hit_count is None:
try:
hit_count = int(data.get("hitCount", 0))
except Exception:
hit_count = 0
opp_hits = data.get("oppHits") or []
if not opp_hits:
break
# ---- Normalize each record to unified schema ----
for h in opp_hits:
gg_id = h.get("id")
num = h.get("number")
aln_list = h.get("alnist", []) or []
norm = {
# unified schema (stable id avoids duplicates across configs)
"id": f"gg:{num or gg_id}",
"source": "grants.gov",
"title": h.get("title"),
"agency": h.get("agencyName") or h.get("agencyCode"),
"program_number": _first(aln_list), # Assistance Listing (ALN/CFDA)
"posted_date": _parse_date(h.get("openDate")),
"deadline": _parse_date(h.get("closeDate")),
"synopsis": h.get("synopsis") or h.get("summary"),
"location_scope": ["US"], # Grants.gov is US-wide by default
"tags": [], # to be extended by ingest with config categories
"url": f"https://www.grants.gov/search-results-detail/{gg_id}" if gg_id else None,
"raw": h, # keep full source blob for traceability
}
# Optional award fields if present (keep None if absent)
if "awardFloor" in h:
norm["award_floor"] = h.get("awardFloor")
if "awardCeiling" in h:
norm["award_ceiling"] = h.get("awardCeiling")
if "expectedNumberOfAwards" in h:
norm["expected_awards"] = h.get("expectedNumberOfAwards")
if "eligibility" in h:
norm["eligibility"] = h.get("eligibility")
all_hits.append(norm)
got = len(opp_hits)
start += got
pages += 1
if hit_count is not None and start >= hit_count:
break
return {"hits": all_hits, "hitCount": hit_count or 0}
|