Spaces:
Sleeping
Sleeping
import os | |
import requests | |
import pandas as pd | |
import streamlit as st | |
import json | |
import time | |
from pytrends.request import TrendReq | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from tenacity import retry, wait_exponential, stop_after_attempt | |
# Set up Streamlit app title | |
st.title("🐣MOMO 🆚 PCHOME 商品搜索和 Google Trends 分析👁️🗨️") | |
# Get user input for keyword | |
keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電") | |
# Get date range input for Google Trends | |
start_date = st.date_input("請選擇開始日期", value=pd.to_datetime("2024-08-01")) | |
end_date = st.date_input("請選擇結束日期", value=pd.to_datetime("2024-08-11")) | |
page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1) | |
# Format timeframe for Google Trends | |
search_timeframe = f"{start_date} {end_date}" | |
# Create a button to start the scraping process | |
if st.button("開始搜索"): | |
start_time = time.time() | |
# MOMO scraping | |
momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch" | |
momo_headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36" | |
} | |
momo_payload = { | |
"host": "momoshop", | |
"flag": "searchEngine", | |
"data": { | |
"searchValue": keyword, | |
"curPage": str(page_number), | |
"priceS": "0", | |
"priceE": "9999999", | |
"searchType": "1" | |
} | |
} | |
momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload) | |
if momo_response.status_code == 200: | |
momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', []) | |
momo_product_list = [] | |
for product in momo_data: | |
name = product.get('goodsName', '') | |
price = product.get('goodsPrice', '') | |
price_str = str(price).split('(')[0].replace(',', '').replace('$', '') | |
try: | |
product_price = float(price_str) | |
except ValueError: | |
product_price = 0 | |
momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'}) | |
momo_df = pd.DataFrame(momo_product_list) | |
st.write("MOMO 商品數據:", momo_df) | |
# MOMO data analysis | |
momo_avg_price = momo_df['price'].mean() | |
st.write(f"MOMO 平均價格: {momo_avg_price:.2f}") | |
st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}") | |
st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}") | |
# MOMO visualization with Plotly | |
fig = px.scatter(momo_df[:70], x='title', y='price', hover_data=['title'], | |
title=f'MOMO 電商網站上 "{keyword}" 的銷售價格', | |
labels={'title': '商品名稱', 'price': '價格'}) | |
fig.update_xaxes(tickangle=45, tickmode='array', tickvals=list(range(len(momo_df[:70]))), ticktext=momo_df['title'][:70]) | |
fig.add_hline(y=momo_avg_price, line_dash="dash", line_color="red", | |
annotation_text=f"參考價格: {momo_avg_price:.2f}", | |
annotation_position="bottom right") | |
fig.update_layout(height=600) | |
st.plotly_chart(fig) | |
# MOMO Sunburst Chart | |
momo_sunburst_data = momo_df.copy() | |
momo_sunburst_data['price_range'] = pd.cut(momo_sunburst_data['price'], | |
bins=[0, 1000, 5000, 10000, 50000, float('inf')], | |
labels=['0-1000', '1001-5000', '5001-10000', '10001-50000', '50000+']) | |
fig = px.sunburst(momo_sunburst_data, path=['price_range', 'title'], values='price', | |
title=f'MOMO "{keyword}" 價格分佈 (Sunburst 圖)') | |
fig.update_layout(height=800) | |
st.plotly_chart(fig) | |
else: | |
st.error(f"MOMO 請求失敗,狀態碼: {momo_response.status_code}") | |
# PCHOME scraping | |
pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q=' | |
pchome_data = pd.DataFrame() | |
for i in range(1, page_number + 1): | |
pchome_url = f'{pchome_base_url}{keyword}&page={i}&sort=sale/dc' | |
pchome_response = requests.get(pchome_url) | |
if pchome_response.status_code == 200: | |
pchome_json_data = json.loads(pchome_response.content) | |
pchome_df = pd.DataFrame(pchome_json_data['prods']) | |
# Safely select only available columns | |
available_columns = ['name', 'describe', 'price', 'orig'] | |
selected_columns = [col for col in available_columns if col in pchome_df.columns] | |
pchome_df = pchome_df[selected_columns] | |
if 'orig' in pchome_df.columns: | |
pchome_df = pchome_df.rename(columns={'orig': 'original_price'}) | |
pchome_df['platform'] = 'PCHOME' # Add platform identifier | |
pchome_df['price'] = pchome_df['price'].astype(float) # Ensure price is float | |
pchome_data = pd.concat([pchome_data, pchome_df]) | |
time.sleep(1) | |
else: | |
st.error(f"PCHOME 請求失敗,狀態碼: {pchome_response.status_code}") | |
if not pchome_data.empty: | |
st.write("PCHOME 商品數據:", pchome_data) | |
# PCHOME data analysis | |
pchome_avg_price = pchome_data['price'].mean() | |
st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}") | |
st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}") | |
st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}") | |
# PCHOME visualization with Plotly | |
fig = px.scatter(pchome_data[:70], x='name', y='price', hover_data=['name'], | |
title=f'PCHOME 電商網站上 "{keyword}" 的銷售價格', | |
labels={'name': '商品名稱', 'price': '價格'}) | |
fig.update_xaxes(tickangle=45, tickmode='array', tickvals=list(range(len(pchome_data[:70]))), ticktext=pchome_data['name'][:70]) | |
fig.add_hline(y=pchome_avg_price, line_dash="dash", line_color="red", | |
annotation_text=f"參考價格: {pchome_avg_price:.2f}", | |
annotation_position="bottom right") | |
fig.update_layout(height=600) | |
st.plotly_chart(fig) | |
# PCHOME Sunburst Chart | |
pchome_sunburst_data = pchome_data.copy() | |
pchome_sunburst_data['price_range'] = pd.cut(pchome_sunburst_data['price'], | |
bins=[0, 1000, 5000, 10000, 50000, float('inf')], | |
labels=['0-1000', '1001-5000', '5001-10000', '10001-50000', '50000+']) | |
fig = px.sunburst(pchome_sunburst_data, path=['price_range', 'name'], values='price', | |
title=f'PCHOME "{keyword}" 價格分佈 (Sunburst 圖)') | |
fig.update_layout(height=800) | |
st.plotly_chart(fig) | |
# Combine MOMO and PCHOME data | |
combined_data = pd.concat([momo_df, pchome_data], ignore_index=True) | |
st.write("合併的商品數據:", combined_data) | |
# Data analysis on combined data | |
combined_avg_price = combined_data['price'].mean() | |
st.write(f"合併後的平均價格: {combined_avg_price:.2f}") | |
# Google Trends analysis | |
st.subheader("Google趨勢分析") | |
# Retry mechanism with exponential backoff | |
def fetch_trends_data(pytrend): | |
return pytrend.interest_over_time() | |
pytrend = TrendReq(hl="zh-TW", tz=-480) | |
pytrend.build_payload( | |
kw_list=[keyword], | |
cat=3, | |
timeframe=search_timeframe, | |
geo="TW", | |
gprop="" | |
) | |
try: | |
trends_df = fetch_trends_data(pytrend) | |
trends_df = trends_df.drop(["isPartial"], axis=1) | |
# 使用Plotly創建趨勢圖 | |
fig = px.line(trends_df, x=trends_df.index, y=keyword, | |
title=f"Google趨勢 - '{keyword}' 的趨勢分析") | |
fig.update_traces(mode='lines+markers') | |
fig.update_layout(xaxis_title="時間", yaxis_title="興趣指數", height=600) | |
st.plotly_chart(fig) | |
# 顯示趨勢數據統計 | |
st.write("趨勢數據統計:") | |
st.write(trends_df.describe()) | |
except Exception as e: | |
st.error(f"獲取Google趨勢數據時出錯: {e}") | |
end_time = time.time() | |
st.write(f"執行時間: {end_time - start_time:.2f} 秒") # |