File size: 4,499 Bytes
2a275ae 1f70be8 a938b8a 1f70be8 a938b8a 2a275ae 1f70be8 a938b8a 2a275ae a938b8a 1f70be8 6c8936b 2a275ae 6c8936b 1f70be8 6c8936b a938b8a 6c8936b 2a275ae 1f70be8 2a275ae a938b8a 1f70be8 2a275ae a938b8a 6c8936b 1f70be8 2a275ae a938b8a 6c8936b 1f70be8 2a275ae a938b8a 6c8936b 1f70be8 a938b8a 2a275ae 6c8936b a938b8a 6c8936b 1f70be8 6c8936b 1f70be8 7cb31c4 6c8936b 7cb31c4 1f70be8 2a275ae a938b8a 2a275ae 1f70be8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import subprocess
import gradio as gr
def run_scripts():
try:
# Execute both scripts
subprocess.run(
["python", "hub_datasets_by_language.py"],
capture_output=True,
text=True,
check=True,
)
# subprocess.run(
# ["python", "hub_models_by_language.py"],
# capture_output=True,
# text=True,
# check=True,
# )
return "Scripts executed successfully! All plots have been updated."
except subprocess.CalledProcessError as e:
return f"Failed to execute scripts: {str(e.stderr)}"
def create_app():
with gr.Blocks() as app:
gr.Markdown(
"""
# Visualizing The Language Gap In The Hugging Face Hub
The open-source community is creating more and more resources in languages other than English but there is still a huge gap. This Space showcases plots that can help visualize this gap in the case of Spanish and can easily be adapted to other languages.
"""
)
gr.Markdown(
"""
## English vs Spanish Monolingual Datasets
Note: We consider only **monolingual** resources in these plots, i.e. datasets and models that only contain data in one language. This is because *most* of the multilingual resources are usually machine-translated and we want to focus on original data.
"""
)
with gr.Row():
with gr.Column():
gr.Image(
value="plots/datasets_bar_plot_horizontal.png",
label="Distribution of Datasets by Year (Horizontal)",
show_label=True,
show_download_button=True,
show_share_button=True,
)
gr.Image(
value="plots/datasets_stack_area_en_es.png",
label="Cumulative Growth of Datasets (Stacked)",
show_label=True,
show_download_button=True,
show_share_button=True,
)
with gr.Column():
gr.Image(
value="plots/datasets_bar_plot_vertical.png",
label="Distribution of Datasets by Year (Vertical)",
show_label=True,
show_download_button=True,
show_share_button=True,
)
gr.Image(
value="plots/datasets_time_series.png",
label="Cumulative Growth of Datasets (Line)",
show_label=True,
show_download_button=True,
show_share_button=True,
)
# gr.Markdown(
# """
# ## English vs Spanish Models
# """
# )
# with gr.Row():
# gr.Image(
# value="plots/models_stack_area_en_es.png",
# label="Cumulative Growth of Models",
# show_label=True,
# show_download_button=True,
# show_share_button=True,
# )
with gr.Row():
update_button = gr.Button("Update Plots with Latest Data")
output_label = gr.Label()
gr.Markdown(
"""
## Adapt to other languages
This Space is WIP and more languages and visuals will be included shortly. Meanwhile, you can clone the Space, adapt the code in the scripts and run it to generate plots for other languages.
"""
)
gr.Markdown("## Citation")
with gr.Accordion("Citation information", open=False):
gr.Markdown(
r"""
If you use these plots or the code please cite:
```
@misc{grandury2024gaphf,
author = {María Grandury},
title = {Visualizing The Language Gap In The Hugging Face Hub},
year = {2024},
publisher = {Hugging Face},
howpublished = {\url{https://huggingface.co/spaces/mariagrandury/language-gap-in-hf-hub}},
}
```
"""
)
update_button.click(
fn=run_scripts,
outputs=output_label,
)
return app
app = create_app()
app.launch()
|