File size: 4,108 Bytes
fca8dbe
1e9419d
 
ef2a8f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1abd1f8
4a60195
1abd1f8
ef2a8f8
 
9bc3953
206364e
3cd0e82
 
1e76dd5
ef2a8f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1abd1f8
ef2a8f8
 
4a60195
ef2a8f8
 
 
1e9419d
ef2a8f8
 
 
 
 
 
 
 
1abd1f8
ef2a8f8
 
1abd1f8
ef2a8f8
 
 
1abd1f8
 
 
 
 
 
 
 
 
 
ef2a8f8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
import pandas as pd

# 引入自定义CSS以调整页面布局
st.markdown(
    """
    <style>
        /* 控制内容的宽度和居中 */
        .reportview-container {
            max-width: 800px;  /* 控制最大宽度 */
            margin-left: auto;  /* 居中 */
            margin-right: auto;
        }
        /* 新增:减少页面两边的空白 */
        .streamlit-container {
            padding: 0px 10px;  /* 左右边距 */
        }
        /* 控制TXT标签内的字体大小 */
        h2, h3, h4, h5, h6 {
            font-size: 16px;  /* 适当减小字体大小 */
        }
        /* 控制表格样式 */
        .dataframe {
            width: 100% !important;  /* 使表格宽度100% */
            border: none;  /* 去掉表格边框 */
        }
    </style>
    """,
    unsafe_allow_html=True
)

# 设置页面标题
st.title("🏆 Dyn-VQA Leaderboard")

# 使用 container 来减少空白
with st.container():
    # 数据集简介
    st.subheader("📑 Dataset Description")
    st.markdown('🌟 Dataset for [*Benchmarking Multimodal Retrieval Augmented Generation with Dynamic VQA Dataset and Self-adaptive Planning Agent*](https://arxiv.org/abs/2411.02937).')
    st.markdown('🌟 This dataset is linked to GitHub at [this URL](https://github.com/Alibaba-NLP/OmniSearch)')

    # 实验Leaderboard榜单数据
    data = {
        "Model": [
            "Omnisearch(gpt-4o)", "gpt-4o Two-Step mRAG", "gpt-4o Original LLMs",
            "qwen-vl-max Two-Step mRAG", "qwen25-vl-7b Two-Step mRAG",
            "gpt-4o Retrieving Images with Input Images", "deepseek-vl-7b-chat Two-Step mRAG",
            "qwen-vl-max Original LLMs", "deepseek-vl2 Two-Step mRAG",
            "qwen-vl-max Retrieving Images with Input Images", "qwen25-vl-7b Retrieving Images with Input Images",
            "qwen25-vl-7b Original LLMs", "deepseek-vl-7b-chat Retrieving Images with Input Images",
            "deepseek-vl2 Retrieving Images with Input Images", "deepseek-vl2 Original LLMs",
            "deepseek-vl-7b-chat Original LLMs"
        ],
        "zh_Dynvqa": [
            54.23, 52.78, 46.54, 50.75, 46.27,
            40.84, 39.48, 32.84, 28.36, 25.37,
            21.98, 18.86, 13.03, 9.91, 9.50,
            8.68
        ],
        "en_Dynvqa": [
            47.17, 45.03, 42.66, 37.76, 35.24,
            40.42, 28.11, 32.87, 26.01, 25.17,
            21.26, 19.71, 10.77, 12.73, 12.87,
            8.67
        ],
        "average": [
            50.7, 48.905, 44.6, 44.255, 40.755,
            40.63, 33.795, 32.855, 27.185, 25.27,
            21.62, 19.285, 11.9, 11.32, 11.185,
            8.675
        ]
    }

    # 将数据转换为DataFrame
    df = pd.DataFrame(data)

    # 显示Leaderboard表格
    st.subheader("🕹️ Experiment Leaderboard")
    st.dataframe(df)

    # 数据格式示例
    st.subheader("Data Format")
    st.json({
        "image_url": "https://www.pcarmarket.com/static/media/uploads/galleries/photos/uploads/galleries/22387-pasewark-1986-porsche-944/.thumbnails/IMG_7102.JPG.jpg",
        "question": "What is the model of car from this brand?",
        "question_id": 'qid',
        "answer": ["保时捷 944", "Porsche 944."]
    })

    # 更新信息
    st.markdown("🔥 The Dyn-VQA **will be updated regularly.** Latest version: 202502.")

    # 引用信息
    st.subheader("📝 Citation")
    st.code("""
@article{li2024benchmarkingmultimodalretrievalaugmented,
      title={Benchmarking Multimodal Retrieval Augmented Generation with Dynamic VQA Dataset and Self-adaptive Planning Agent}, 
      author={Yangning Li and Yinghui Li and Xinyu Wang and Yong Jiang and Zhen Zhang and Xinran Zheng and Hui Wang and Hai-Tao Zheng and Pengjun Xie and Philip S. Yu and Fei Huang and Jingren Zhou},
      year={2024},
      eprint={2411.02937},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2411.02937}, 
}
""")
    st.write("When citing our work, please kindly consider citing the original papers. The relevant citation information is listed here.")