File size: 2,114 Bytes
4e5b597
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import requests
from bs4 import BeautifulSoup
import gradio as gr

def convert_to_mobile_url(url):
    """
    PC URL์„ ๋ชจ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜.
    """
    if "m.blog.naver.com" not in url:
        if "blog.naver.com" in url:
            url_parts = url.split("/")
            if len(url_parts) >= 5:
                user_id = url_parts[3]
                post_id = url_parts[4]
                return f"https://m.blog.naver.com/{user_id}/{post_id}"
    return url

def scrape_naver_blog(url):
    """
    ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ(ํ…์ŠคํŠธ๋งŒ) ์Šคํฌ๋ž˜ํ•‘.
    """
    try:
        # ๋ชจ๋ฐ”์ผ URL ๋ณ€ํ™˜
        mobile_url = convert_to_mobile_url(url)
        print(f"Converted Mobile URL: {mobile_url}")

        response = requests.get(mobile_url)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')

        # ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
        title_element = soup.find("div", class_="se-module se-module-text se-title-text")
        title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"

        # ๋ณธ๋ฌธ ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
        content_elements = soup.find_all("div", class_="se-module se-module-text")
        content = "\n".join(
            elem.get_text(strip=True) for elem in content_elements
        ) if content_elements else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"

        # ๋””๋ฒ„๊น… ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
        print(f"Scraped Title: {title}")
        print(f"Scraped Content: {content}")

        # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
        result = f"์ œ๋ชฉ: {title}\n\n๋‚ด์šฉ: {content}"
        return result

    except Exception as e:
        print(f"Error: {e}")
        return f"Error: {e}"

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
def run_scraper(url):
    return scrape_naver_blog(url)

interface = gr.Interface(
    fn=run_scraper,
    inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL"),
    outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
    title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘",
    description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค."
)

if __name__ == "__main__":
    interface.launch()