File size: 2,114 Bytes
4e5b597 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import requests
from bs4 import BeautifulSoup
import gradio as gr
def convert_to_mobile_url(url):
"""
PC URL์ ๋ชจ๋ฐ์ผ URL๋ก ๋ณํ.
"""
if "m.blog.naver.com" not in url:
if "blog.naver.com" in url:
url_parts = url.split("/")
if len(url_parts) >= 5:
user_id = url_parts[3]
post_id = url_parts[4]
return f"https://m.blog.naver.com/{user_id}/{post_id}"
return url
def scrape_naver_blog(url):
"""
๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ(ํ
์คํธ๋ง) ์คํฌ๋ํ.
"""
try:
# ๋ชจ๋ฐ์ผ URL ๋ณํ
mobile_url = convert_to_mobile_url(url)
print(f"Converted Mobile URL: {mobile_url}")
response = requests.get(mobile_url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# ์ ๋ชฉ ์คํฌ๋ํ
title_element = soup.find("div", class_="se-module se-module-text se-title-text")
title = title_element.get_text(strip=True) if title_element else "์ ๋ชฉ์ ์ฐพ์ ์ ์์"
# ๋ณธ๋ฌธ ๋ด์ฉ ์คํฌ๋ํ
content_elements = soup.find_all("div", class_="se-module se-module-text")
content = "\n".join(
elem.get_text(strip=True) for elem in content_elements
) if content_elements else "๋ด์ฉ์ ์ฐพ์ ์ ์์"
# ๋๋ฒ๊น
๋ฉ์์ง ์ถ๋ ฅ
print(f"Scraped Title: {title}")
print(f"Scraped Content: {content}")
# ๊ฒฐ๊ณผ ๋ฐํ
result = f"์ ๋ชฉ: {title}\n\n๋ด์ฉ: {content}"
return result
except Exception as e:
print(f"Error: {e}")
return f"Error: {e}"
# Gradio ์ธํฐํ์ด์ค ์ ์
def run_scraper(url):
return scrape_naver_blog(url)
interface = gr.Interface(
fn=run_scraper,
inputs=gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL"),
outputs=gr.Textbox(label="์คํฌ๋ํ ๊ฒฐ๊ณผ"),
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ",
description="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ์ ์ ๋ชฉ๊ณผ ๋ด์ฉ์ ์คํฌ๋ํํฉ๋๋ค."
)
if __name__ == "__main__":
interface.launch()
|