Roberta2024's picture
Update app.py
68a4670 verified
import os
import requests
import pandas as pd
import matplotlib as mpl
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import streamlit as st
import json
import time
# Set up Streamlit app title
st.title("🐣MOMO 🆚 PCHOME 商品搜索和價格分析👁️‍🗨️")
# Get user input
search_keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電")
page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1)
# Create a button to start the scraping process
if st.button("開始搜索"):
start_time = time.time()
# MOMO scraping
momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch"
momo_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
}
momo_payload = {
"host": "momoshop",
"flag": "searchEngine",
"data": {
"searchValue": search_keyword,
"curPage": str(page_number),
"priceS": "0",
"priceE": "9999999",
"searchType": "1"
}
}
momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload)
if momo_response.status_code == 200:
momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', [])
momo_product_list = []
for product in momo_data:
name = product.get('goodsName', '')
price = product.get('goodsPrice', '')
price_str = str(price).split('(')[0].replace(',', '').replace('$', '')
try:
product_price = float(price_str)
except ValueError:
product_price = 0
momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'})
momo_df = pd.DataFrame(momo_product_list)
st.write("MOMO 商品數據:", momo_df)
# MOMO data analysis
momo_avg_price = momo_df['price'].mean()
st.write(f"MOMO 平均價格: {momo_avg_price:.2f}")
st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}")
st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}")
# MOMO visualization - Unified with PCHOME
font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
font_response = requests.get(font_url)
with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
font_file.write(font_response.content)
fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf")
mpl.rc('font', family='Taipei Sans TC Beta')
fig, ax = plt.subplots(figsize=(15, 8))
ax.plot(momo_df.index[:70], momo_df['price'][:70], 'o', color='skyblue', markersize=8)
ax.set_title(f'MOMO 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold')
ax.axhline(y=momo_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {momo_avg_price:.2f}')
ax.set_xlabel('商品索引', fontsize=14)
ax.set_ylabel('價格', fontsize=14)
ax.tick_params(axis='x', rotation=45, labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.legend(fontsize=12, loc='upper left')
ax.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
st.pyplot(fig)
else:
st.error(f"MOMO 請求失敗,狀態碼: {momo_response.status_code}")
# PCHOME scraping
pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
pchome_data = pd.DataFrame()
for i in range(1, page_number + 1):
pchome_url = f'{pchome_base_url}{search_keyword}&page={i}&sort=sale/dc'
pchome_response = requests.get(pchome_url)
if pchome_response.status_code == 200:
pchome_json_data = json.loads(pchome_response.content)
pchome_df = pd.DataFrame(pchome_json_data['prods'])
# Safely select only available columns
available_columns = ['name', 'describe', 'price', 'orig']
selected_columns = [col for col in available_columns if col in pchome_df.columns]
pchome_df = pchome_df[selected_columns]
if 'orig' in pchome_df.columns:
pchome_df = pchome_df.rename(columns={'orig': 'original_price'})
pchome_df['platform'] = 'PCHOME' # Add platform identifier
pchome_data = pd.concat([pchome_data, pchome_df])
time.sleep(1)
else:
st.error(f"PCHOME 請求失敗,狀態碼: {pchome_response.status_code}")
if not pchome_data.empty:
st.write("PCHOME 商品數據:", pchome_data)
# PCHOME data analysis
pchome_avg_price = pchome_data['price'].mean()
st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}")
st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}")
st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}")
# PCHOME visualization
fig, ax = plt.subplots(figsize=(15, 8))
ax.plot(pchome_data.index[:70], pchome_data['price'][:70], 'o', color='skyblue', markersize=8)
ax.set_title(f'PCHOME 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold')
ax.axhline(y=pchome_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {pchome_avg_price:.2f}')
ax.set_xlabel('商品索引', fontsize=14)
ax.set_ylabel('價格', fontsize=14)
ax.tick_params(axis='x', rotation=45, labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.legend(fontsize=12, loc='upper left')
ax.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
st.pyplot(fig)
# Combine MOMO and PCHOME data
combined_data = pd.concat([momo_df, pchome_data], ignore_index=True)
# Add a button to download the combined data as CSV
csv = combined_data.to_csv(index=False, encoding='utf-8-sig').encode('utf-8-sig')
st.download_button(
label="下載CSV檔案",
data=csv,
file_name=f'{search_keyword}_combined_data.csv',
mime='text/csv'
)
end_time = time.time()
st.write(f"Execution time: {end_time - start_time:.2f} seconds")