Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from norm_html import normalized_html_table | |
| import re | |
| def clear_table_cells(pred): | |
| # 清空 <td> 和 <th> 标签之间的内容 | |
| pred = re.sub(r'(<td[^>]*>).*?(</td>)', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE) | |
| pred = re.sub(r'(<th[^>]*>).*?(</th>)', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE) | |
| return pred | |
| def add_merged_cell_lines(html_content): | |
| """为合并的单元格添加虚线标识""" | |
| import re | |
| from bs4 import BeautifulSoup | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| table = soup.find('table') | |
| if not table: | |
| return html_content | |
| # 获取所有行 | |
| rows = table.find_all('tr') | |
| if not rows: | |
| return html_content | |
| # 为每个单元格添加虚线样式 | |
| for row_idx, row in enumerate(rows): | |
| cells = row.find_all(['td', 'th']) | |
| for cell in cells: | |
| colspan = int(cell.get('colspan', 1)) | |
| rowspan = int(cell.get('rowspan', 1)) | |
| # 只为合并的单元格添加虚线 | |
| if colspan > 1 or rowspan > 1: | |
| # 设置单元格为相对定位 | |
| style = cell.get('style', '') | |
| if 'position: relative' not in style: | |
| cell['style'] = f"{style}; position: relative;".strip('; ') | |
| # 添加colspan > 1时的垂直线 | |
| if colspan > 1: | |
| for i in range(1, colspan): | |
| line = soup.new_tag('div') | |
| line['style'] = f""" | |
| position: absolute; | |
| top: 0; | |
| bottom: 0; | |
| left: {i * 100 / colspan}%; | |
| width: 0; | |
| border-left: 2px dashed #666; | |
| pointer-events: none; | |
| z-index: 10; | |
| """ | |
| cell.append(line) | |
| # 添加rowspan > 1时的水平线 | |
| if rowspan > 1: | |
| for i in range(1, rowspan): | |
| line = soup.new_tag('div') | |
| line['style'] = f""" | |
| position: absolute; | |
| left: 0; | |
| right: 0; | |
| top: {i * 100 / rowspan}%; | |
| height: 0; | |
| border-top: 2px dashed #666; | |
| pointer-events: none; | |
| z-index: 10; | |
| """ | |
| cell.append(line) | |
| return str(soup) | |
| def show_html(pred_input, show_structure, show_merged_cell): | |
| show_html = normalized_html_table(pred_input) | |
| if show_structure: | |
| show_html = clear_table_cells(show_html) | |
| show_html = add_table_border(show_html) | |
| if show_merged_cell: | |
| show_html = add_merged_cell_lines(show_html) | |
| return show_html | |
| def add_table_border(pred_html): | |
| pred_html = pred_html.replace("<table>", "<table class='table table-bordered' style='border-collapse: collapse; border: 3px solid #333;'>") | |
| pred_html = pred_html.replace("<td", "<td style='border: 2px solid #333;'") | |
| pred_html = pred_html.replace("<th", "<th style='border: 2px solid #333;'") | |
| return pred_html | |
| if __name__ == "__main__": | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| pred_input = gr.Textbox(label='HTML Table', placeholder='type table html code here', interactive=True) | |
| with gr.Row(): | |
| show_structure = gr.Checkbox(label="只显示表格结构", value=True) | |
| show_merged_cell = gr.Checkbox(label="显示合并单元格虚线", value=True) | |
| show_html_btn = gr.Button("显示HTML") | |
| pred_html = gr.HTML("<table><td>input HTML here.</td></table>") | |
| show_html_btn.click(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html]) | |
| show_structure.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html]) | |
| show_merged_cell.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html]) | |
| demo.launch(server_name="0.0.0.0", server_port=7860, debug=True) |