Commit
·
e8b6098
1
Parent(s):
a405279
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ import json
|
|
7 |
import time
|
8 |
|
9 |
from selenium import webdriver
|
10 |
-
from selenium.
|
11 |
|
12 |
@st.cache_data
|
13 |
def scrape_klikindomaret(nama_barang, num_items):
|
@@ -89,63 +89,73 @@ def scrape_shopee(nama_barang, num_items):
|
|
89 |
options.add_argument('--headless')
|
90 |
options.add_argument('--disable-notifications')
|
91 |
options.add_argument('--disable-infobars')
|
92 |
-
|
93 |
-
#driver = webdriver.Chrome(executable_path = path, options = chrome_options)
|
94 |
-
driver = webdriver.Chrome(options = options)
|
95 |
-
|
96 |
-
while len(products) < num_items :
|
97 |
-
#Cek agar produk sesuai jumlah yang diminta
|
98 |
-
if len (products) > num_items :
|
99 |
-
products = products[:num_items]
|
100 |
-
break
|
101 |
-
|
102 |
-
driver.get(url)
|
103 |
-
|
104 |
-
html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
|
105 |
-
soup = BeautifulSoup(html, "html.parser")
|
106 |
|
107 |
-
product_list = soup.find_all('li', class_="col-xs-2-4 shopee-search-item-result__item" )
|
108 |
-
for product in product_list:
|
109 |
-
# Mencari tag <a> di dalam setiap tag <li>
|
110 |
-
a_tag = product.find_all('a', href=True)
|
111 |
-
|
112 |
-
for product_info in a_tag:
|
113 |
-
# Mendapatkan URL dari atribut 'href'
|
114 |
-
product_href = product_info['href']
|
115 |
-
product_name = product.find('div', class_="ie3A+n bM+7UW Cve6sh").text.strip()
|
116 |
-
product_price = product.find('div', class_="vioxXd rVLWG6").text.strip()
|
117 |
-
product_terjual = product.find('div', class_="r6HknA uEPGHT").text.strip()
|
118 |
-
product_asal = product.find('div', class_="zGGwiV").text.strip()
|
119 |
-
|
120 |
-
# Cek apakah ada harga sebelum diskon dan persentase diskon
|
121 |
-
#discount_element = product.find('span', class_='strikeout disc-price')
|
122 |
-
#discount_percentage = ""
|
123 |
-
#original_price = ""
|
124 |
-
#if discount_element:
|
125 |
-
# discount_percentage = discount_element.find('span', class_='discount').text.strip()
|
126 |
-
# original_price = discount_element.text.replace(discount_percentage, '').strip()
|
127 |
-
#else:
|
128 |
-
# # Jika tidak ada diskon, set discount_percentage ke "0%" dan original_price ke product_price
|
129 |
-
# discount_percentage = "0%"
|
130 |
-
# original_price = product_price
|
131 |
-
#
|
132 |
-
product_link = f"https://shopee.co.id/{product_href}"
|
133 |
-
products.append({
|
134 |
-
'product': product_name,
|
135 |
-
#'original_price': original_price,
|
136 |
-
#'discount_percentage': discount_percentage,
|
137 |
-
'price': product_price,
|
138 |
-
'terjual' : product_terjual,
|
139 |
-
'asal' : product_asal,
|
140 |
-
'link': product_link
|
141 |
-
})
|
142 |
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
|
|
|
|
146 |
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
time.sleep(1)
|
150 |
my_bar.empty()
|
151 |
return products
|
|
|
7 |
import time
|
8 |
|
9 |
from selenium import webdriver
|
10 |
+
from selenium.common.exceptions import WebDriverException
|
11 |
|
12 |
@st.cache_data
|
13 |
def scrape_klikindomaret(nama_barang, num_items):
|
|
|
89 |
options.add_argument('--headless')
|
90 |
options.add_argument('--disable-notifications')
|
91 |
options.add_argument('--disable-infobars')
|
92 |
+
options.add_argument('--disable-dev-shm-usage')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
+
try :
|
96 |
+
#driver = webdriver.Chrome(executable_path = path, options = chrome_options)
|
97 |
+
driver = webdriver.Chrome(options = options)
|
98 |
+
|
99 |
+
while len(products) < num_items :
|
100 |
+
#Cek agar produk sesuai jumlah yang diminta
|
101 |
+
if len (products) > num_items :
|
102 |
+
products = products[:num_items]
|
103 |
+
break
|
104 |
|
105 |
+
driver.set_window_size(1080, 720) # Adjust the window size here
|
106 |
+
driver.get(url)
|
107 |
|
108 |
+
html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
|
109 |
+
soup = BeautifulSoup(html, "html.parser")
|
110 |
+
|
111 |
+
product_list = soup.find_all('li', class_="col-xs-2-4 shopee-search-item-result__item" )
|
112 |
+
for product in product_list:
|
113 |
+
# Mencari tag <a> di dalam setiap tag <li>
|
114 |
+
a_tag = product.find_all('a', href=True)
|
115 |
+
|
116 |
+
for product_info in a_tag:
|
117 |
+
# Mendapatkan URL dari atribut 'href'
|
118 |
+
product_href = product_info['href']
|
119 |
+
product_name = product.find('div', class_="ie3A+n bM+7UW Cve6sh").text.strip()
|
120 |
+
product_price = product.find('div', class_="vioxXd rVLWG6").text.strip()
|
121 |
+
product_terjual = product.find('div', class_="r6HknA uEPGHT").text.strip()
|
122 |
+
product_asal = product.find('div', class_="zGGwiV").text.strip()
|
123 |
+
|
124 |
+
# Cek apakah ada harga sebelum diskon dan persentase diskon
|
125 |
+
#discount_element = product.find('span', class_='strikeout disc-price')
|
126 |
+
#discount_percentage = ""
|
127 |
+
#original_price = ""
|
128 |
+
#if discount_element:
|
129 |
+
# discount_percentage = discount_element.find('span', class_='discount').text.strip()
|
130 |
+
# original_price = discount_element.text.replace(discount_percentage, '').strip()
|
131 |
+
#else:
|
132 |
+
# # Jika tidak ada diskon, set discount_percentage ke "0%" dan original_price ke product_price
|
133 |
+
# discount_percentage = "0%"
|
134 |
+
# original_price = product_price
|
135 |
+
#
|
136 |
+
product_link = f"https://shopee.co.id/{product_href}"
|
137 |
+
products.append({
|
138 |
+
'product': product_name,
|
139 |
+
#'original_price': original_price,
|
140 |
+
#'discount_percentage': discount_percentage,
|
141 |
+
'price': product_price,
|
142 |
+
'terjual' : product_terjual,
|
143 |
+
'asal' : product_asal,
|
144 |
+
'link': product_link
|
145 |
+
})
|
146 |
+
|
147 |
+
prop = min(len(products)/num_items, 1)
|
148 |
+
my_bar.progress(prop, text=progress_text)
|
149 |
+
|
150 |
+
|
151 |
+
page += 1
|
152 |
|
153 |
+
except WebDriverException as e:
|
154 |
+
return products
|
155 |
+
finally:
|
156 |
+
if wd:
|
157 |
+
wd.quit()
|
158 |
+
|
159 |
time.sleep(1)
|
160 |
my_bar.empty()
|
161 |
return products
|