File size: 1,551 Bytes
96b6673 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import re
from bs4 import BeautifulSoup
def parse_html_prompt(input_str):
soup = BeautifulSoup(input_str, "html.parser")
# 处理 <p></p> 内的内容
p_content = soup.find("p").decode_contents().replace("<br>", "\n")
p_content = re.sub(r'<span[^>]*>(.*?)</span>', r'<\1>', p_content)
template = p_content.strip().replace(' <br/>', '').replace(' ', '').replace('<br/>', '')
# 解析 component-item
components = {}
for item in soup.find_all("div", class_="component-item"):
key_span = item.find("div", class_="component-key").find("span")
key = key_span.get_text(strip=True) if key_span else ""
value_div = item.find("div", class_="component-value")
value_content = value_div.decode_contents()
value_content = re.sub(r'<span[^>]*>(.*?)</span>', r'{\1}', value_content)
components[key] = value_content.strip().replace(' <br/>', '').replace('<br/>', '')
# 解析 self-info-item
self_prompt = {}
for item in soup.find_all("div", class_="self-info-item"):
key_span = item.find("div", class_="component-key").find("span")
key = key_span.get_text(strip=True) if key_span else ""
value_div = item.find("div", class_="component-value")
value = value_div.get_text(strip=True) if value_div else ""
self_prompt[key] = value.replace(' <br/>', '').replace('<br/>', '')
return {
'template': template,
'components': components,
'self_prompt': self_prompt
}
#print(parse_html(info)) |