dejanseo's picture
Create app.py
561915c verified
raw
history blame
1.12 kB
import streamlit as st
import pandas as pd
from urllib.parse import urlparse
def extract_keywords_from_url(url: str) -> list[str]:
parsed = urlparse(url)
path = parsed.path.strip("/")
for ext in (".html", ".htm", ".php"):
if path.endswith(ext):
path = path[: -len(ext)]
cleaned = path.replace("_", " ").replace("-", " ").replace(".", " ")
segments = cleaned.split("/") if cleaned else []
keywords = []
for seg in segments:
tokens = seg.split()
keywords.extend([token.lower() for token in tokens if token.strip()])
return keywords
st.title("πŸ”— URL Keywords Extractor")
urls_input = st.text_area("Enter one or more URLs (one per line):", height=200)
if st.button("Extract Keywords"):
if urls_input.strip():
urls = [url.strip() for url in urls_input.strip().splitlines() if url.strip()]
results = [{"URL": url, "Keywords": extract_keywords_from_url(url)} for url in urls]
df = pd.DataFrame(results)
st.dataframe(df, use_container_width=True)
else:
st.warning("Please enter at least one URL.")