File size: 1,551 Bytes
96b6673
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import re
from bs4 import BeautifulSoup

def parse_html_prompt(input_str):
    soup = BeautifulSoup(input_str, "html.parser")
    
    # 处理 <p></p> 内的内容
    p_content = soup.find("p").decode_contents().replace("<br>", "\n")
    p_content = re.sub(r'<span[^>]*>(.*?)</span>', r'<\1>', p_content)
    template = p_content.strip().replace(' <br/>', '').replace(' ', '').replace('<br/>', '')
    
    # 解析 component-item
    components = {}
    for item in soup.find_all("div", class_="component-item"):
        key_span = item.find("div", class_="component-key").find("span")
        key = key_span.get_text(strip=True) if key_span else ""
        value_div = item.find("div", class_="component-value")
        value_content = value_div.decode_contents()
        value_content = re.sub(r'<span[^>]*>(.*?)</span>', r'{\1}', value_content)
        components[key] = value_content.strip().replace(' <br/>', '').replace('<br/>', '')
    
    # 解析 self-info-item
    self_prompt = {}
    for item in soup.find_all("div", class_="self-info-item"):
        key_span = item.find("div", class_="component-key").find("span")
        key = key_span.get_text(strip=True) if key_span else ""
        value_div = item.find("div", class_="component-value")
        value = value_div.get_text(strip=True) if value_div else ""
        self_prompt[key] = value.replace(' <br/>', '').replace('<br/>', '')
    
    return {
        'template': template,
        'components': components,
        'self_prompt': self_prompt
    }

#print(parse_html(info))