Commit
·
6174621
1
Parent(s):
a6136fe
Update app.py
Browse files
app.py
CHANGED
@@ -94,7 +94,7 @@ def scrape_shopee(nama_barang, num_items):
|
|
94 |
driver = webdriver.Chrome(options = options)
|
95 |
url = f'https://shopee.co.id/search?keyword={query}&page={page}'
|
96 |
driver.get(url)
|
97 |
-
|
98 |
|
99 |
# Cari elemen berdasarkan tagname HTML
|
100 |
html_element = driver.find_element(By.TAG_NAME, "html")
|
@@ -110,15 +110,15 @@ def scrape_shopee(nama_barang, num_items):
|
|
110 |
except requests.exceptions.RequestException as e:
|
111 |
logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
112 |
st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
113 |
-
|
114 |
except requests.exceptions.HTTPError as e:
|
115 |
logging.error(f"HTTP Error: {e}")
|
116 |
st.error(f"HTTP Error: {e}")
|
117 |
-
|
118 |
except Exception as e:
|
119 |
logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
120 |
st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
121 |
-
|
122 |
except WebDriverException as e:
|
123 |
st.error(f"An error occurred: {e}")
|
124 |
finally:
|
@@ -216,71 +216,73 @@ def scrape_tokped_with_selenium(nama_barang, num_items):
|
|
216 |
options.add_argument('--disable-infobars')
|
217 |
options.add_argument('--disable-dev-shm-usage')
|
218 |
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
products
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
break
|
265 |
-
|
266 |
-
except requests.exceptions.RequestException as e:
|
267 |
-
logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
268 |
-
st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
269 |
-
break
|
270 |
-
except requests.exceptions.HTTPError as e:
|
271 |
-
logging.error(f"HTTP Error: {e}")
|
272 |
-
st.error(f"HTTP Error: {e}")
|
273 |
-
break
|
274 |
-
except Exception as e:
|
275 |
-
logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
276 |
-
st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
277 |
-
break
|
278 |
-
except WebDriverException as e:
|
279 |
-
st.error(f"An error occurred: {e}")
|
280 |
finally:
|
281 |
if driver:
|
282 |
driver.quit()
|
283 |
-
|
284 |
return products
|
285 |
#---------------------------------------------------User Interface----------------------------------------------------------------------
|
286 |
|
|
|
94 |
driver = webdriver.Chrome(options = options)
|
95 |
url = f'https://shopee.co.id/search?keyword={query}&page={page}'
|
96 |
driver.get(url)
|
97 |
+
|
98 |
|
99 |
# Cari elemen berdasarkan tagname HTML
|
100 |
html_element = driver.find_element(By.TAG_NAME, "html")
|
|
|
110 |
except requests.exceptions.RequestException as e:
|
111 |
logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
112 |
st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
113 |
+
|
114 |
except requests.exceptions.HTTPError as e:
|
115 |
logging.error(f"HTTP Error: {e}")
|
116 |
st.error(f"HTTP Error: {e}")
|
117 |
+
|
118 |
except Exception as e:
|
119 |
logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
120 |
st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
121 |
+
|
122 |
except WebDriverException as e:
|
123 |
st.error(f"An error occurred: {e}")
|
124 |
finally:
|
|
|
216 |
options.add_argument('--disable-infobars')
|
217 |
options.add_argument('--disable-dev-shm-usage')
|
218 |
|
219 |
+
while len(products) < num_items :
|
220 |
+
try :
|
221 |
+
url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
|
222 |
+
|
223 |
+
driver.get(url)
|
224 |
+
time.sleep(10)
|
225 |
+
|
226 |
+
html = driver.page_source
|
227 |
+
soup = BeautifulSoup(html, "html.parser")
|
228 |
+
# Dapatkan HTML dari elemen
|
229 |
+
#html = html_element.get_attribute("innerHTML")
|
230 |
+
#html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
|
231 |
+
product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
|
232 |
+
|
233 |
+
for product_info in product_container_list:
|
234 |
+
link = product_info['href']
|
235 |
+
st.write(link)
|
236 |
+
title_element = product_info.find('div', class_="prd_link-product-name css-3um8ox")
|
237 |
+
title = title_element.text.strip() if title_element else None
|
238 |
+
|
239 |
+
harga_element = product_info.find('div', class_="prd_link-product-price css-h66vau")
|
240 |
+
harga = harga_element.text.strip() if harga_element else None
|
241 |
+
|
242 |
+
terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h")
|
243 |
+
terjual = terjual_element.text if terjual_element else None
|
244 |
+
|
245 |
+
rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
|
246 |
+
rating = rating_element.text if rating_element else None
|
247 |
+
|
248 |
+
toko_element = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip")
|
249 |
+
toko = toko_element.text.strip() if toko_element else None
|
250 |
+
|
251 |
+
asal_product_element = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip")
|
252 |
+
asal_product = asal_product_element.text.strip() if asal_product_element else None
|
253 |
+
|
254 |
+
products.append({
|
255 |
+
'link': link,
|
256 |
+
'produk' : title,
|
257 |
+
'harga' : harga,
|
258 |
+
'terjual' : terjual,
|
259 |
+
'rating' : rating,
|
260 |
+
'toko' : toko,
|
261 |
+
'asal_product' : asal_product,
|
262 |
+
})
|
263 |
+
if len(products) >= num_items:
|
264 |
+
products = products[:num_items]
|
265 |
+
break
|
266 |
+
page += 1
|
267 |
+
except requests.exceptions.RequestException as e:
|
268 |
+
logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
269 |
+
st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
270 |
+
break
|
271 |
+
except requests.exceptions.HTTPError as e:
|
272 |
+
logging.error(f"HTTP Error: {e}")
|
273 |
+
st.error(f"HTTP Error: {e}")
|
274 |
+
break
|
275 |
+
except Exception as e:
|
276 |
+
logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
277 |
+
st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
278 |
+
break
|
279 |
+
except WebDriverException as e:
|
280 |
+
st.error(f"An error occurred: {e}")
|
281 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
finally:
|
283 |
if driver:
|
284 |
driver.quit()
|
285 |
+
|
286 |
return products
|
287 |
#---------------------------------------------------User Interface----------------------------------------------------------------------
|
288 |
|