File size: 3,516 Bytes
99589b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from typing import Any, Dict, Callable, Optional
from datetime import datetime

def _iso(d: Any) -> Optional[str]:
    if not d:
        return None
    s = str(d)
    for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f"):
        try:
            return datetime.strptime(s, fmt).date().isoformat()
        except Exception:
            pass
    try:
        return datetime.fromisoformat(s).date().isoformat()
    except Exception:
        return None

def _first(x: Any) -> Any:
    return (x[0] if isinstance(x, (list, tuple)) and x else x)

def _list(x: Any) -> list:
    if x is None:
        return []
    if isinstance(x, list):
        return x
    if isinstance(x, (set, tuple)):
        return list(x)
    return [x]

# Registry of source mappers: raw -> unified schema
MAPPERS: Dict[str, Callable[[Dict[str, Any]], Dict[str, Any]]] = {}

def mapper(name: str):
    def _wrap(fn: Callable[[Dict[str, Any]], Dict[str, Any]]):
        MAPPERS[name] = fn
        return fn
    return _wrap

@mapper("grants_gov")
def _map_grants_gov(h: Dict[str, Any]) -> Dict[str, Any]:
    gg_id = h.get("id")
    num = h.get("number")
    aln_list = h.get("alnist") or h.get("aln") or []

    out: Dict[str, Any] = {
        "id": f"gg:{num or gg_id}",
        "source": "grants.gov",
        "title": h.get("title"),
        "agency": h.get("agencyName") or h.get("agencyCode") or h.get("agency"),
        "program_number": _first(aln_list) or h.get("program_number"),
        "posted_date": _iso(h.get("openDate") or h.get("posted_date")),
        "deadline": _iso(h.get("closeDate") or h.get("deadline")),
        "synopsis": h.get("synopsis") or h.get("summary"),
        "location_scope": h.get("location_scope") or ["US"],
        "tags": h.get("tags") or [],
        "url": h.get("url") or (f"https://www.grants.gov/search-results-detail/{gg_id}" if gg_id else None),
        "raw": h,
    }
    # Optionals if present on the raw record
    for k_src, k_dst in [
        ("awardFloor", "award_floor"),
        ("awardCeiling", "award_ceiling"),
        ("expectedNumberOfAwards", "expected_awards"),
        ("eligibility", "eligibility"),
    ]:
        if h.get(k_src) is not None or h.get(k_dst) is not None:
            out[k_dst] = h.get(k_dst) if h.get(k_dst) is not None else h.get(k_src)
    return out

@mapper("local_sample")
def _map_local_sample(op: Dict[str, Any]) -> Dict[str, Any]:
    return {
        "id": f"sample:{op.get('opportunityNumber')}",
        "source": "sample_local",
        "title": op.get("opportunityTitle"),
        "agency": op.get("agency"),
        "program_number": None,
        "posted_date": _iso(op.get("postedDate")),
        "deadline": _iso(op.get("closeDate")),
        "synopsis": op.get("synopsis"),
        "location_scope": ["US"],
        "tags": [],
        "url": None,
        "raw": op,
    }

def normalize(source_key: str, raw: Dict[str, Any], static: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
    if source_key not in MAPPERS:
        raise KeyError("No mapper registered for %r" % source_key)
    rec = MAPPERS[source_key](raw)
    static = static or {}
    # attach geo
    if static.get("geo"):
        rec["geo"] = static["geo"]
    # attach categories and mirror into tags
    cats = _list(static.get("categories"))
    rec.setdefault("categories", [])
    for c in cats:
        if c not in rec["categories"]:
            rec["categories"].append(c)
    rec["tags"] = list(set(_list(rec.get("tags")) + cats))
    return rec