import streamlit as st import requests import json import pandas as pd import time import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime import matplotlib as mpl import matplotlib.font_manager as fm import os import urllib.request # 下載並設定中文字型 def setup_chinese_font(): font_path = 'TaipeiSansTCBeta-Regular.ttf' if not os.path.exists(font_path): font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download" urllib.request.urlretrieve(font_url, font_path) # 設定字型 fm.fontManager.addfont(font_path) plt.rcParams['font.family'] = 'Taipei Sans TC Beta' mpl.rc('font', family='Taipei Sans TC Beta') # 設定中文字型 setup_chinese_font() # Set page config st.set_page_config( page_title="PChome 商品分析器", page_icon="📊", layout="wide" ) # Title and description st.title("PChome 商品分析器") st.markdown("這個應用程式可以爬取並分析 PChome 上的商品資訊") # Input section with st.sidebar: st.header("搜尋設定") keyword = st.text_input("請輸入搜尋關鍵字", "行李箱") page_num = st.number_input("要爬取的頁數", min_value=1, max_value=10, value=1) # Function to scrape PChome data def scrape_pchome(keyword, page_num): alldata = pd.DataFrame() with st.spinner(f'正在爬取 {page_num} 頁的資料...'): for i in range(1, page_num + 1): # Progress bar progress = st.progress((i - 1) / page_num) url = f'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q={keyword}&page={i}&sort=sale/dc' try: list_req = requests.get(url) getdata = json.loads(list_req.content) if 'prods' in getdata and getdata['prods']: todataFrame = pd.DataFrame(getdata['prods']) alldata = pd.concat([alldata, todataFrame]) time.sleep(2) # Reduced sleep time for better user experience except Exception as e: st.error(f"爬取第 {i} 頁時發生錯誤: {str(e)}") break progress.progress((i) / page_num) return alldata # Function to create analysis plots def create_analysis_plots(df): # 設定全域字型樣式 plt.rcParams['font.sans-serif'] = ['Taipei Sans TC Beta'] plt.rcParams['axes.unicode_minus'] = False # Basic statistics st.subheader("基本統計資訊") col1, col2, col3 = st.columns(3) with col1: st.metric("平均價格", f"NT$ {df['price'].mean():,.0f}") with col2: st.metric("最高價格", f"NT$ {df['price'].max():,.0f}") with col3: st.metric("最低價格", f"NT$ {df['price'].min():,.0f}") # Price trend plot st.subheader("價格趨勢圖") fig, ax = plt.subplots(figsize=(15, 8)) df['price'][:70].plot( color='skyblue', linewidth=2, marker='o', markersize=8, ax=ax ) mean_price = df['price'].mean() ax.axhline(y=mean_price, color='red', linestyle='--', linewidth=2, label=f'平均價格: NT$ {mean_price:,.0f}') plt.title(f'{datetime.now().strftime("%Y%m%d")} PChome {keyword} 售價分析', fontsize=20, fontweight='bold') plt.xlabel('商品編號', fontsize=14) plt.ylabel('價格 (NT$)', fontsize=14) plt.xticks(rotation=45) plt.grid(True, alpha=0.3) plt.legend() st.pyplot(fig) # Price distribution plot st.subheader("價格分布圖") fig2, ax2 = plt.subplots(figsize=(12, 6)) sns.histplot(data=df['price'], bins=30, kde=True, ax=ax2) plt.title('商品價格分布', fontsize=16) plt.xlabel('價格 (NT$)', fontsize=12) plt.ylabel('數量', fontsize=12) st.pyplot(fig2) # Main app logic if st.sidebar.button('開始分析'): # Record start time start_time = time.time() # Scrape data data = scrape_pchome(keyword, page_num) if not data.empty: # Display raw data st.subheader("原始資料") st.dataframe(data[['name', 'price']]) # Create analysis plots create_analysis_plots(data) # Download button for CSV csv = data.to_csv(index=False).encode('utf-8-sig') st.download_button( label="下載完整資料 (CSV)", data=csv, file_name=f'pchome_{keyword}_{datetime.now().strftime("%Y%m%d")}.csv', mime='text/csv' ) # Display execution time end_time = time.time() st.info(f'分析完成!執行時間:{end_time - start_time:.2f} 秒') else: st.error("沒有找到相關商品資料") # Footer st.markdown("---") st.markdown("Made with ❤️ by Your Name")