Spaces:
Runtime error
Runtime error
import gradio as gr | |
import wikipedia | |
import numpy as np | |
import pandas as pd | |
from os import path | |
from PIL import Image | |
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator | |
import matplotlib.pyplot as plt | |
def wikipediaScrap(article_name, wikipedia_language = "en - English"): | |
wikipedia_language = wikipedia_language.split(" - ")[0] | |
if wikipedia_language: | |
wikipedia.set_lang(wikipedia_language) | |
# rem_sp = article_name.replace(" ", "") | |
et_page = wikipedia.page(article_name) | |
title = et_page.title | |
content = et_page.content | |
page_url = et_page.url | |
linked_pages = et_page.links | |
text = content | |
# Create and generate a word cloud image: | |
wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text) | |
# Display the generated image: | |
plt.imshow(wordcloud, interpolation='bilinear') | |
plt.axis("off") | |
return title, content, page_url, "\n". join(linked_pages), plt | |
css = """ | |
footer {display:none !important} | |
.output-markdown{display:none !important} | |
footer {visibility: hidden} | |
.gr-button-lg { | |
z-index: 14; | |
width: 113px; | |
height: 30px; | |
left: 0px; | |
top: 0px; | |
padding: 0px; | |
cursor: pointer !important; | |
background: none rgb(17, 20, 45) !important; | |
border: none !important; | |
text-align: center !important; | |
font-size: 14px !important; | |
font-weight: 500 !important; | |
color: rgb(255, 255, 255) !important; | |
line-height: 1 !important; | |
border-radius: 6px !important; | |
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important; | |
box-shadow: none !important; | |
} | |
.gr-button-lg:hover{ | |
z-index: 14; | |
width: 113px; | |
height: 30px; | |
left: 0px; | |
top: 0px; | |
padding: 0px; | |
cursor: pointer !important; | |
background: none rgb(66, 133, 244) !important; | |
border: none !important; | |
text-align: center !important; | |
font-size: 14px !important; | |
font-weight: 500 !important; | |
color: rgb(255, 255, 255) !important; | |
line-height: 1 !important; | |
border-radius: 6px !important; | |
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important; | |
box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important; | |
} | |
#component-14 textarea[data-testid="textbox"] { height: 178px !important} | |
#component-17 textarea[data-testid="textbox"] { height: 178px !important} | |
#component-21 textarea[data-testid="textbox"] { height: 178px !important} | |
#component-20 tr:hover{ | |
background-color: rgb(229,225,255) !important; | |
} | |
.output-image {max-height: 11rem !important;} | |
.output-image img {max-height: 17rem !important;} | |
.hover\:bg-orange-50:hover { | |
--tw-bg-opacity: 1 !important; | |
background-color: rgb(229,225,255) !important; | |
} | |
""" | |
ini_dict = wikipedia.languages() | |
# split dictionary into keys and values | |
keys = [] | |
values = [] | |
language=[] | |
items = ini_dict.items() | |
for item in items: | |
keys.append(item[0]), values.append(item[1]) | |
language.append(item[0]+" - "+item[1]) | |
with gr.Blocks(title="Wikipedia Article Scrape | Data Science Dojo", css = css) as demo: | |
with gr.Row(): | |
inp = gr.Textbox(placeholder="Enter the name of wikipedia article", label="Wikipedia article name") | |
lan = gr.Dropdown(label=" Select Language", choices=language, value=language[108], interactive=True) | |
btn = gr.Button("Start scraping", elem_id="dsd_button") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("""## About""") | |
title = gr.Textbox(label="Article title") | |
url = gr.Textbox(label="Article URL") | |
with gr.Column(): | |
gr.Markdown("""## Wordcloud""") | |
wordcloud = gr.Plot() | |
gr.Markdown("""### Content""") | |
with gr.Row(): | |
content = gr.Textbox(label="Content") | |
gr.Markdown("""### Linked Articles""") | |
with gr.Row(): | |
linked = gr.Textbox(label="Linked Articles") | |
btn.click(fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud]) | |
with gr.Row(): | |
gr.Examples(examples = [["Eiffel Tower", "en - English"], ["Eiffel tower", 'ur - اردو']], fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud], cache_examples=True) | |
demo.launch() |