|
import requests |
|
from bs4 import BeautifulSoup |
|
import gradio as gr |
|
|
|
def convert_to_mobile_url(url): |
|
""" |
|
PC URL์ ๋ชจ๋ฐ์ผ URL๋ก ๋ณํ. |
|
""" |
|
if "m.blog.naver.com" not in url: |
|
if "blog.naver.com" in url: |
|
url_parts = url.split("/") |
|
if len(url_parts) >= 5: |
|
user_id = url_parts[3] |
|
post_id = url_parts[4] |
|
return f"https://m.blog.naver.com/{user_id}/{post_id}" |
|
return url |
|
|
|
def scrape_naver_blog(url): |
|
""" |
|
๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ(ํ
์คํธ๋ง) ์คํฌ๋ํ. |
|
""" |
|
try: |
|
|
|
mobile_url = convert_to_mobile_url(url) |
|
print(f"Converted Mobile URL: {mobile_url}") |
|
|
|
response = requests.get(mobile_url) |
|
response.raise_for_status() |
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
title_element = soup.find("div", class_="se-module se-module-text se-title-text") |
|
title = title_element.get_text(strip=True) if title_element else "์ ๋ชฉ์ ์ฐพ์ ์ ์์" |
|
|
|
|
|
content_elements = soup.find_all("div", class_="se-module se-module-text") |
|
content = "\n".join( |
|
elem.get_text(strip=True) for elem in content_elements |
|
) if content_elements else "๋ด์ฉ์ ์ฐพ์ ์ ์์" |
|
|
|
|
|
print(f"Scraped Title: {title}") |
|
print(f"Scraped Content: {content}") |
|
|
|
|
|
result = f"์ ๋ชฉ: {title}\n\n๋ด์ฉ: {content}" |
|
return result |
|
|
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return f"Error: {e}" |
|
|
|
|
|
def run_scraper(url): |
|
return scrape_naver_blog(url) |
|
|
|
interface = gr.Interface( |
|
fn=run_scraper, |
|
inputs=gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL"), |
|
outputs=gr.Textbox(label="์คํฌ๋ํ ๊ฒฐ๊ณผ"), |
|
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ", |
|
description="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ์ ์คํฌ๋ํํฉ๋๋ค." |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |
|
|