File size: 5,779 Bytes
de712b3 9774ac6 de712b3 3226ecb de712b3 3226ecb 045f2fb de712b3 0e2310d de712b3 a1b9884 de712b3 a1b9884 045f2fb de712b3 90310fc de712b3 0e2310d de712b3 9208784 de712b3 85f3012 de712b3 624e519 de712b3 624e519 4a8f878 85f3012 624e519 de712b3 6d727dd de712b3 fb5ba9c de712b3 fb5ba9c de712b3 0e2310d de712b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import quote
import streamlit as st
import json
import time
@st.cache_data
def scrape_e_commerce(nama_barang, num_items):
products = []
page = 1
query = quote(nama_barang)
progress_text = "Scraping in progress. Please wait."
my_bar = st.progress(len(products), text=progress_text)
for percent_complete in range(num_items):
#while len(products) < num_items :
prop = min(len(products) / num_items,1)
my_bar.progress(prop, text=progress_text)
if len (products) > num_items :
products = products[:num_items]
break
url = f"https://www.klikindomaret.com/search/?key={query}&categories=&productbrandid=&sortcol=&pagesize=54&page={page}&startprice=&endprice=&attributes=&ShowItem="
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
product_list = soup.find_all('a', href=True)
for product in product_list:
product_href = product['href']
if '/product/' in product_href:
product_name = product.find('div', class_='title').text.strip()
product_price = product.find('span', class_='normal price-value').text.strip()
# Cek apakah ada harga sebelum diskon dan persentase diskon
discount_element = product.find('span', class_='strikeout disc-price')
discount_percentage = ""
original_price = ""
if discount_element:
discount_percentage = discount_element.find('span', class_='discount').text.strip()
original_price = discount_element.text.replace(discount_percentage, '').strip()
else:
# Jika tidak ada diskon, set discount_percentage ke "0%" dan original_price ke product_price
discount_percentage = "0%"
original_price = product_price
product_link = f"https://www.klikindomaret.com{product_href}"
products.append({
'product': product_name,
'original_price': original_price,
'discount_percentage': discount_percentage,
'price': product_price,
'link': product_link
})
page += 1
time.sleep(1)
my_bar.empty()
return products
#---------------------------------------------------User Interface----------------------------------------------------------------------
# Streamlit UI
st.title("Scraping E-Commerce")
with st.expander("Settings :"):
# Pilihan untuk memilih situs web
selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id(under maintenance)"])
nama_barang = st.text_input("Masukkan Nama Barang :")
num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
download_format = st.selectbox("Pilih Format Unduhan :", ["XLSX", "CSV", "JSON"])
st.info('Tekan "Mulai Scraping" kembali jika tampilan menghilang ', icon="ℹ️")
# Variabel tersembunyi untuk menyimpan hasil scraping
hidden_data = []
scraping_done = False # Tambahkan variabel ini
if selected_site == "klikindomaret.com":
if st.button("Mulai Scraping"):
if not nama_barang:
st.error("Mohon isi Nama Barang.")
else:
scraped_products = scrape_e_commerce(nama_barang, num_items)
hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
scraping_done = True # Set scraping_done menjadi True
if selected_site == "shopee.co.id(under maintenance)":
st.error("Sedang dalam pengembangan. Silahkan pilih situs yang lain")
# Simpan DataFrame ke dalam file
output_file = f"scraped_{nama_barang}.xlsx"
output_file_csv = f"scraped_{nama_barang}.csv"
output_file_json = f"scraped_{nama_barang}.json"
#---------------------------------------------------Download File & Hasil Scraping----------------------------------------------------------------------
# Tampilkan hasil scraping
if scraping_done:
if hidden_data:
# Menampilkan hasil sentimen dalam kotak yang dapat diperluas
with st.expander(f"Hasil Scraping {selected_site} :"):
st.write(pd.DataFrame(scraped_products))
if download_format == "XLSX":
df = pd.DataFrame(scraped_products)
df.to_excel(output_file, index=False)
st.download_button(label=f"Unduh XLSX ({len(hidden_data)} data)", data=open(output_file, "rb").read(), key="xlsx_download", file_name=output_file)
elif download_format == "CSV":
df = pd.DataFrame(scraped_products)
csv = df.to_csv(index=False)
st.download_button(label=f"Unduh CSV ({len(hidden_data)} data)", data=csv, key="csv_download", file_name=output_file_csv)
elif download_format == "JSON":
json_data = pd.DataFrame(scraped_products).to_json(orient="records")
st.download_button(label=f"Unduh JSON ({len(hidden_data)} data)", data=json_data, key="json_download", file_name=output_file_json)
elif not hidden_data:
st.warning(f"Tidak ada data pada query '{query}'", icon="⚠️")
if not scraping_done:
st.write("Tidak ada data untuk diunduh.")
st.divider()
github_link = "https://github.com/naufalnashif/"
st.markdown(f"GitHub: [{github_link}]({github_link})")
instagram_link = "https://www.instagram.com/naufal.nashif/"
st.markdown(f"Instagram: [{instagram_link}]({instagram_link})")
st.write('Terima kasih telah mencoba demo ini!')
|