Spaces:
Sleeping
Sleeping
| """Text processing utilities for LegisQA""" | |
| import re | |
| CONGRESS_GOV_TYPE_MAP = { | |
| "hconres": "house-concurrent-resolution", | |
| "hjres": "house-joint-resolution", | |
| "hr": "house-bill", | |
| "hres": "house-resolution", | |
| "s": "senate-bill", | |
| "sconres": "senate-concurrent-resolution", | |
| "sjres": "senate-joint-resolution", | |
| "sres": "senate-resolution", | |
| } | |
| def escape_markdown(text: str) -> str: | |
| """Escape markdown special characters in text""" | |
| MD_SPECIAL_CHARS = r"\`*_{}[]()#+-.!$" | |
| for char in MD_SPECIAL_CHARS: | |
| text = text.replace(char, "\\" + char) | |
| return text | |
| def get_sponsor_url(bioguide_id: str) -> str: | |
| """Generate URL for a sponsor's bioguide page""" | |
| return f"https://bioguide.congress.gov/search/bio/{bioguide_id}" | |
| def get_congress_gov_url(congress_num: int, legis_type: str, legis_num: int) -> str: | |
| """Generate Congress.gov URL for a piece of legislation""" | |
| lt = CONGRESS_GOV_TYPE_MAP[legis_type] | |
| return f"https://www.congress.gov/bill/{int(congress_num)}th-congress/{lt}/{int(legis_num)}" | |
| def legis_id_to_link(legis_id: str) -> str: | |
| """Convert a legislation ID to a Congress.gov URL""" | |
| congress_num, legis_type, legis_num = legis_id.split("-") | |
| return get_congress_gov_url(congress_num, legis_type, legis_num) | |
| def legis_id_match_to_link(matchobj): | |
| """Convert a regex match object to a markdown link""" | |
| mstring = matchobj.string[matchobj.start() : matchobj.end()] | |
| url = legis_id_to_link(mstring) | |
| link = f"[{mstring}]({url})" | |
| return link | |
| def replace_legis_ids_with_urls(text: str) -> str: | |
| """Replace legislation IDs in text with markdown links""" | |
| pattern = "1[12][3456789]-[a-z]+-\\d{1,5}" | |
| rtext = re.sub(pattern, legis_id_match_to_link, text) | |
| return rtext | |