import re
from bs4 import BeautifulSoup
def parse_html_prompt(input_str):
soup = BeautifulSoup(input_str, "html.parser")
# 处理
内的内容
p_content = soup.find("p").decode_contents().replace("
", "\n")
p_content = re.sub(r']*>(.*?)', r'<\1>', p_content)
template = p_content.strip().replace('
', '').replace(' ', '').replace('
', '')
# 解析 component-item
components = {}
for item in soup.find_all("div", class_="component-item"):
key_span = item.find("div", class_="component-key").find("span")
key = key_span.get_text(strip=True) if key_span else ""
value_div = item.find("div", class_="component-value")
value_content = value_div.decode_contents()
value_content = re.sub(r']*>(.*?)', r'{\1}', value_content)
components[key] = value_content.strip().replace('
', '').replace('
', '')
# 解析 self-info-item
self_prompt = {}
for item in soup.find_all("div", class_="self-info-item"):
key_span = item.find("div", class_="component-key").find("span")
key = key_span.get_text(strip=True) if key_span else ""
value_div = item.find("div", class_="component-value")
value = value_div.get_text(strip=True) if value_div else ""
self_prompt[key] = value.replace('
', '').replace('
', '')
return {
'template': template,
'components': components,
'self_prompt': self_prompt
}
#print(parse_html(info))