File size: 1,115 Bytes
561915c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import streamlit as st
import pandas as pd
from urllib.parse import urlparse

def extract_keywords_from_url(url: str) -> list[str]:
    parsed = urlparse(url)
    path = parsed.path.strip("/")

    for ext in (".html", ".htm", ".php"):
        if path.endswith(ext):
            path = path[: -len(ext)]

    cleaned = path.replace("_", " ").replace("-", " ").replace(".", " ")
    segments = cleaned.split("/") if cleaned else []

    keywords = []
    for seg in segments:
        tokens = seg.split()
        keywords.extend([token.lower() for token in tokens if token.strip()])
    return keywords

st.title("πŸ”— URL Keywords Extractor")

urls_input = st.text_area("Enter one or more URLs (one per line):", height=200)

if st.button("Extract Keywords"):
    if urls_input.strip():
        urls = [url.strip() for url in urls_input.strip().splitlines() if url.strip()]
        results = [{"URL": url, "Keywords": extract_keywords_from_url(url)} for url in urls]
        df = pd.DataFrame(results)
        st.dataframe(df, use_container_width=True)
    else:
        st.warning("Please enter at least one URL.")