Spaces:
Sleeping
Sleeping
import os | |
import requests | |
import pandas as pd | |
import matplotlib as mpl | |
import matplotlib.font_manager as fm | |
import matplotlib.pyplot as plt | |
import streamlit as st | |
import json | |
import time | |
from pytrends.request import TrendReq | |
import plotly.express as px | |
from datetime import datetime, timedelta | |
# 設置 Streamlit 應用標題 | |
st.title("🐣MOMO 🆚 PCHOME 商品搜索和價格分析👁️🗨️") | |
# 獲取用戶輸入 | |
search_keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電") | |
page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1) | |
# Pytrends 的日期範圍輸入 | |
start_date = st.date_input("選擇開始日期", value=datetime.today() - timedelta(days=30)) | |
end_date = st.date_input("選擇結束日期", value=datetime.today()) | |
# 創建一個按鈕來開始爬取過程 | |
if st.button("開始搜索"): | |
start_time = time.time() | |
# MOMO 爬取 | |
momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch" | |
momo_headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36" | |
} | |
momo_payload = { | |
"host": "momoshop", | |
"flag": "searchEngine", | |
"data": { | |
"searchValue": search_keyword, | |
"curPage": str(page_number), | |
"priceS": "0", | |
"priceE": "9999999", | |
"searchType": "1" | |
} | |
} | |
momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload) | |
if momo_response.status_code == 200: | |
momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', []) | |
momo_product_list = [] | |
for product in momo_data: | |
name = product.get('goodsName', '') | |
price = product.get('goodsPrice', '') | |
price_str = str(price).split('(')[0].replace(',', '').replace('$', '') | |
try: | |
product_price = float(price_str) | |
except ValueError: | |
product_price = 0 | |
momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'}) | |
momo_df = pd.DataFrame(momo_product_list) | |
st.write("MOMO 商品數據:", momo_df) | |
# MOMO 數據分析 | |
momo_avg_price = momo_df['price'].mean() | |
st.write(f"MOMO 平均價格: {momo_avg_price:.2f}") | |
st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}") | |
st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}") | |
# MOMO 視覺化 | |
font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download" | |
font_response = requests.get(font_url) | |
with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file: | |
font_file.write(font_response.content) | |
fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf") | |
mpl.rc('font', family='Taipei Sans TC Beta') | |
fig, ax = plt.subplots(figsize=(15, 8)) | |
ax.plot(momo_df.index[:70], momo_df['price'][:70], 'o', color='skyblue', markersize=8) | |
ax.set_title(f'MOMO 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold') | |
ax.axhline(y=momo_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {momo_avg_price:.2f}') | |
ax.set_xlabel('商品索引', fontsize=14) | |
ax.set_ylabel('價格', fontsize=14) | |
ax.tick_params(axis='x', rotation=45, labelsize=12) | |
ax.tick_params(axis='y', labelsize=12) | |
ax.legend(fontsize=12, loc='upper left') | |
ax.grid(axis='y', linestyle='--', alpha=0.7) | |
plt.tight_layout() | |
st.pyplot(fig) | |
# MOMO Sunburst圖 | |
if not momo_df.empty: | |
momo_sunburst = momo_df.groupby(['title', 'price']).size().reset_index(name='count') | |
fig_momo_sunburst = px.sunburst( | |
momo_sunburst, | |
path=['title', 'price'], | |
values='count', | |
title='MOMO 商品價格分佈' | |
) | |
fig_momo_sunburst.update_layout(margin=dict(t=50, l=25, r=25, b=25)) | |
st.plotly_chart(fig_momo_sunburst) | |
else: | |
st.error(f"MOMO 請求失敗,狀態碼: {momo_response.status_code}") | |
# PCHOME 爬取 | |
pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q=' | |
pchome_data = pd.DataFrame() | |
for i in range(1, page_number + 1): | |
pchome_url = f'{pchome_base_url}{search_keyword}&page={i}&sort=sale/dc' | |
pchome_response = requests.get(pchome_url) | |
if pchome_response.status_code == 200: | |
pchome_json_data = json.loads(pchome_response.content) | |
pchome_df = pd.DataFrame(pchome_json_data['prods']) | |
available_columns = ['name', 'describe', 'price', 'orig'] | |
selected_columns = [col for col in available_columns if col in pchome_df.columns] | |
pchome_df = pchome_df[selected_columns] | |
if 'orig' in pchome_df.columns: | |
pchome_df = pchome_df.rename(columns={'orig': 'original_price'}) | |
pchome_df['platform'] = 'PCHOME' # 添加平台識別符 | |
pchome_data = pd.concat([pchome_data, pchome_df]) | |
time.sleep(1) | |
else: | |
st.error(f"PCHOME 請求失敗,狀態碼: {pchome_response.status_code}") | |
if not pchome_data.empty: | |
st.write("PCHOME 商品數據:", pchome_data) | |
# PCHOME 數據分析 | |
pchome_avg_price = pchome_data['price'].mean() | |
st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}") | |
st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}") | |
st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}") | |
# PCHOME 視覺化 | |
fig, ax = plt.subplots(figsize=(15, 8)) | |
ax.plot(pchome_data.index[:70], pchome_data['price'][:70], 'o', color='skyblue', markersize=8) | |
ax.set_title(f'PCHOME 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold') | |
ax.axhline(y=pchome_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {pchome_avg_price:.2f}') | |
ax.set_xlabel('商品索引', fontsize=14) | |
ax.set_ylabel('價格', fontsize=14) | |
ax.tick_params(axis='x', rotation=45, labelsize=12) | |
ax.tick_params(axis='y', labelsize=12) | |
ax.legend(fontsize=12, loc='upper left') | |
ax.grid(axis='y', linestyle='--', alpha=0.7) | |
plt.tight_layout() | |
st.pyplot(fig) | |
# PCHOME Sunburst圖 | |
if not pchome_data.empty: | |
pchome_sunburst = pchome_data.groupby(['name', 'price']).size().reset_index(name='count') | |
fig_pchome_sunburst = px.sunburst( | |
pchome_sunburst, | |
path=['name', 'price'], | |
values='count', | |
title='PCHOME 商品價格分佈' | |
) | |
fig_pchome_sunburst.update_layout(margin=dict(t=50, l=25, r=25, b=25)) | |
st.plotly_chart(fig_pchome_sunburst) | |
# 合併 MOMO 和 PCHOME 的數據 | |
combined_data = pd.concat([momo_df, pchome_data], ignore_index=True) | |
# 添加一個按鈕以CSV格式下載合併後的數據 | |
csv = combined_data.to_csv(index=False, encoding='utf-8-sig').encode('utf-8-sig') | |
st.download_button( | |
label="下載CSV檔案", | |
data=csv, | |
file_name=f'{search_keyword}_combined_data.csv', | |
mime='text/csv' | |
) | |
end_time = time.time() | |
st.write(f"執行時間: {end_time - start_time:.2f} 秒") | |
# Pytrends 分析 | |
pytrend = TrendReq(hl="zh-TW", tz=-480) | |
keywords = [search_keyword] # 使用列表形式 | |
pytrend.build_payload( | |
kw_list=keywords, | |
cat=3, | |
timeframe=f"{start_date} {end_date}", | |
geo="TW", | |
gprop="" | |
) | |
df = pytrend.interest_over_time() | |
if "isPartial" in df.columns: | |
df = df.drop(["isPartial"], axis=1) | |
# 繪製趨勢數據 | |
fig, ax = plt.subplots(figsize=(12, 8), dpi=80) | |
ax.plot(df.index, df[keywords[0]], label=keywords[0], lw=3.0, marker='o', markersize=8, color='#4285F4', linestyle='-') | |
ax.set_title("Interest Over Time for " + search_keyword, fontsize=20, fontweight='bold', color='#4285F4') | |
ax.set_xlabel("時間", fontsize=14, fontweight='bold', color='#4285F4') | |
ax.set_ylabel("熱搜度", fontsize=14, fontweight='bold', color='#4285F4') | |
ax.legend() | |
ax.grid(True, linestyle='--', alpha=0.6) | |
# 移除軸線 | |
ax.spines['top'].set_visible(False) | |
ax.spines['right'].set_visible(False) | |
# 自訂刻度顏色 | |
ax.tick_params(axis='x', colors='#4285F4') | |
ax.tick_params(axis='y', colors='#4285F4') | |
# 自訂圖例 | |
legend = ax.legend() | |
legend.get_frame().set_alpha(0.5) | |
legend.get_lines()[0].set_linestyle('-') | |
# 顯示圖表 | |
plt.tight_layout() | |
st.pyplot(fig) | |
# 輸出統計數據 | |
st.write(df.describe()) | |