Spaces:

dejanseo
/

url-keyword-extraction

Sleeping

dejanseo commited on Jun 2

Commit

561915c

verified ·

1 Parent(s): 25a1afc

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import streamlit as st
+import pandas as pd
+from urllib.parse import urlparse
+def extract_keywords_from_url(url: str) -> list[str]:
+    parsed = urlparse(url)
+    path = parsed.path.strip("/")
+    for ext in (".html", ".htm", ".php"):
+        if path.endswith(ext):
+            path = path[: -len(ext)]
+    cleaned = path.replace("_", " ").replace("-", " ").replace(".", " ")
+    segments = cleaned.split("/") if cleaned else []
+    keywords = []
+    for seg in segments:
+        tokens = seg.split()
+        keywords.extend([token.lower() for token in tokens if token.strip()])
+    return keywords
+st.title("🔗 URL Keywords Extractor")
+urls_input = st.text_area("Enter one or more URLs (one per line):", height=200)
+if st.button("Extract Keywords"):
+    if urls_input.strip():
+        urls = [url.strip() for url in urls_input.strip().splitlines() if url.strip()]
+        results = [{"URL": url, "Keywords": extract_keywords_from_url(url)} for url in urls]
+        df = pd.DataFrame(results)
+        st.dataframe(df, use_container_width=True)
+    else:
+        st.warning("Please enter at least one URL.")