Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,17 +13,11 @@ def extract_keywords_from_url(url: str) -> list[str]:
|
|
| 13 |
split on '/', then on whitespace, lowercase each token, and return the list.
|
| 14 |
"""
|
| 15 |
parsed = urlparse(url)
|
| 16 |
-
path = parsed.path.strip("/")
|
| 17 |
-
|
| 18 |
-
# Remove common file extensions if they appear at the end:
|
| 19 |
for ext in (".html", ".htm", ".php"):
|
| 20 |
if path.endswith(ext):
|
| 21 |
path = path[: -len(ext)]
|
| 22 |
-
|
| 23 |
-
# Replace underscores, hyphens, and dots with spaces:
|
| 24 |
cleaned = path.replace("_", " ").replace("-", " ").replace(".", " ")
|
| 25 |
-
|
| 26 |
-
# Split on "/" to get segments, then split each segment on whitespace:
|
| 27 |
segments = cleaned.split("/") if cleaned else []
|
| 28 |
keywords: list[str] = []
|
| 29 |
for seg in segments:
|
|
@@ -44,19 +38,36 @@ def process_urls(input_text: str):
|
|
| 44 |
for url in urls:
|
| 45 |
kws = extract_keywords_from_url(url)
|
| 46 |
results.append({"url": url, "keywords": ", ".join(kws)})
|
| 47 |
-
|
| 48 |
df = pd.DataFrame(results, columns=["url", "keywords"])
|
| 49 |
df.to_csv(CSV_OUTPUT_PATH, index=False, encoding="utf-8")
|
| 50 |
return df, CSV_OUTPUT_PATH
|
| 51 |
|
| 52 |
|
| 53 |
-
#
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
gr.Markdown(
|
| 57 |
"Enter one or more URLs (one per line) below, then click **Generate** to see extracted keywords."
|
| 58 |
)
|
| 59 |
-
|
| 60 |
url_input = gr.Textbox(
|
| 61 |
lines=5,
|
| 62 |
placeholder="https://dejan.ai/labs/interactive-demo\nhttps://example.com/foo-bar",
|
|
@@ -69,16 +80,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 69 |
label="Extracted URL Keywords",
|
| 70 |
interactive=False,
|
| 71 |
)
|
| 72 |
-
download_csv = gr.File(
|
| 73 |
-
label="EXPORT CSV",
|
| 74 |
-
file_types=[".csv"],
|
| 75 |
-
)
|
| 76 |
|
| 77 |
-
generate_btn.click(
|
| 78 |
-
fn=process_urls,
|
| 79 |
-
inputs=url_input,
|
| 80 |
-
outputs=[df_output, download_csv],
|
| 81 |
-
)
|
| 82 |
|
| 83 |
if __name__ == "__main__":
|
| 84 |
demo.launch()
|
|
|
|
| 13 |
split on '/', then on whitespace, lowercase each token, and return the list.
|
| 14 |
"""
|
| 15 |
parsed = urlparse(url)
|
| 16 |
+
path = parsed.path.strip("/")
|
|
|
|
|
|
|
| 17 |
for ext in (".html", ".htm", ".php"):
|
| 18 |
if path.endswith(ext):
|
| 19 |
path = path[: -len(ext)]
|
|
|
|
|
|
|
| 20 |
cleaned = path.replace("_", " ").replace("-", " ").replace(".", " ")
|
|
|
|
|
|
|
| 21 |
segments = cleaned.split("/") if cleaned else []
|
| 22 |
keywords: list[str] = []
|
| 23 |
for seg in segments:
|
|
|
|
| 38 |
for url in urls:
|
| 39 |
kws = extract_keywords_from_url(url)
|
| 40 |
results.append({"url": url, "keywords": ", ".join(kws)})
|
|
|
|
| 41 |
df = pd.DataFrame(results, columns=["url", "keywords"])
|
| 42 |
df.to_csv(CSV_OUTPUT_PATH, index=False, encoding="utf-8")
|
| 43 |
return df, CSV_OUTPUT_PATH
|
| 44 |
|
| 45 |
|
| 46 |
+
# Aggressive CSS override: everything white bg + black text,
|
| 47 |
+
# then re-style buttons to keep accent color + white text.
|
| 48 |
+
custom_css = """
|
| 49 |
+
/* 1) Force every element inside the Gradio container to white bg + black text */
|
| 50 |
+
.gradio-container * {
|
| 51 |
+
background-color: #ffffff !important;
|
| 52 |
+
color: #000000 !important;
|
| 53 |
+
border-color: #cccccc !important;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/* 2) Restore button accent + white text */
|
| 57 |
+
.gradio-container .gr-button,
|
| 58 |
+
.gradio-container .gr-button:hover,
|
| 59 |
+
.gradio-container .gr-button:focus {
|
| 60 |
+
background-color: #1f6feb !important;
|
| 61 |
+
color: #ffffff !important;
|
| 62 |
+
border-color: #1f6feb !important;
|
| 63 |
+
}
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
with gr.Blocks(theme=gr.themes.Default(), css=custom_css) as demo:
|
| 67 |
+
gr.Markdown("## URL Keywords Extractor by DEJAN")
|
| 68 |
gr.Markdown(
|
| 69 |
"Enter one or more URLs (one per line) below, then click **Generate** to see extracted keywords."
|
| 70 |
)
|
|
|
|
| 71 |
url_input = gr.Textbox(
|
| 72 |
lines=5,
|
| 73 |
placeholder="https://dejan.ai/labs/interactive-demo\nhttps://example.com/foo-bar",
|
|
|
|
| 80 |
label="Extracted URL Keywords",
|
| 81 |
interactive=False,
|
| 82 |
)
|
| 83 |
+
download_csv = gr.File(label="EXPORT CSV", file_types=[".csv"])
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
+
generate_btn.click(fn=process_urls, inputs=url_input, outputs=[df_output, download_csv])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
if __name__ == "__main__":
|
| 88 |
demo.launch()
|