naufalnashif commited on
Commit
e8b6098
·
1 Parent(s): a405279

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -53
app.py CHANGED
@@ -7,7 +7,7 @@ import json
7
  import time
8
 
9
  from selenium import webdriver
10
- from selenium.webdriver.chrome.options import Options
11
 
12
  @st.cache_data
13
  def scrape_klikindomaret(nama_barang, num_items):
@@ -89,63 +89,73 @@ def scrape_shopee(nama_barang, num_items):
89
  options.add_argument('--headless')
90
  options.add_argument('--disable-notifications')
91
  options.add_argument('--disable-infobars')
92
-
93
- #driver = webdriver.Chrome(executable_path = path, options = chrome_options)
94
- driver = webdriver.Chrome(options = options)
95
-
96
- while len(products) < num_items :
97
- #Cek agar produk sesuai jumlah yang diminta
98
- if len (products) > num_items :
99
- products = products[:num_items]
100
- break
101
-
102
- driver.get(url)
103
-
104
- html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
105
- soup = BeautifulSoup(html, "html.parser")
106
 
107
- product_list = soup.find_all('li', class_="col-xs-2-4 shopee-search-item-result__item" )
108
- for product in product_list:
109
- # Mencari tag <a> di dalam setiap tag <li>
110
- a_tag = product.find_all('a', href=True)
111
-
112
- for product_info in a_tag:
113
- # Mendapatkan URL dari atribut 'href'
114
- product_href = product_info['href']
115
- product_name = product.find('div', class_="ie3A+n bM+7UW Cve6sh").text.strip()
116
- product_price = product.find('div', class_="vioxXd rVLWG6").text.strip()
117
- product_terjual = product.find('div', class_="r6HknA uEPGHT").text.strip()
118
- product_asal = product.find('div', class_="zGGwiV").text.strip()
119
-
120
- # Cek apakah ada harga sebelum diskon dan persentase diskon
121
- #discount_element = product.find('span', class_='strikeout disc-price')
122
- #discount_percentage = ""
123
- #original_price = ""
124
- #if discount_element:
125
- # discount_percentage = discount_element.find('span', class_='discount').text.strip()
126
- # original_price = discount_element.text.replace(discount_percentage, '').strip()
127
- #else:
128
- # # Jika tidak ada diskon, set discount_percentage ke "0%" dan original_price ke product_price
129
- # discount_percentage = "0%"
130
- # original_price = product_price
131
- #
132
- product_link = f"https://shopee.co.id/{product_href}"
133
- products.append({
134
- 'product': product_name,
135
- #'original_price': original_price,
136
- #'discount_percentage': discount_percentage,
137
- 'price': product_price,
138
- 'terjual' : product_terjual,
139
- 'asal' : product_asal,
140
- 'link': product_link
141
- })
142
 
143
- prop = min(len(products)/num_items, 1)
144
- my_bar.progress(prop, text=progress_text)
 
 
 
 
 
 
 
145
 
 
 
146
 
147
- page += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
 
 
 
 
 
 
149
  time.sleep(1)
150
  my_bar.empty()
151
  return products
 
7
  import time
8
 
9
  from selenium import webdriver
10
+ from selenium.common.exceptions import WebDriverException
11
 
12
  @st.cache_data
13
  def scrape_klikindomaret(nama_barang, num_items):
 
89
  options.add_argument('--headless')
90
  options.add_argument('--disable-notifications')
91
  options.add_argument('--disable-infobars')
92
+ options.add_argument('--disable-dev-shm-usage')
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ try :
96
+ #driver = webdriver.Chrome(executable_path = path, options = chrome_options)
97
+ driver = webdriver.Chrome(options = options)
98
+
99
+ while len(products) < num_items :
100
+ #Cek agar produk sesuai jumlah yang diminta
101
+ if len (products) > num_items :
102
+ products = products[:num_items]
103
+ break
104
 
105
+ driver.set_window_size(1080, 720) # Adjust the window size here
106
+ driver.get(url)
107
 
108
+ html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
109
+ soup = BeautifulSoup(html, "html.parser")
110
+
111
+ product_list = soup.find_all('li', class_="col-xs-2-4 shopee-search-item-result__item" )
112
+ for product in product_list:
113
+ # Mencari tag <a> di dalam setiap tag <li>
114
+ a_tag = product.find_all('a', href=True)
115
+
116
+ for product_info in a_tag:
117
+ # Mendapatkan URL dari atribut 'href'
118
+ product_href = product_info['href']
119
+ product_name = product.find('div', class_="ie3A+n bM+7UW Cve6sh").text.strip()
120
+ product_price = product.find('div', class_="vioxXd rVLWG6").text.strip()
121
+ product_terjual = product.find('div', class_="r6HknA uEPGHT").text.strip()
122
+ product_asal = product.find('div', class_="zGGwiV").text.strip()
123
+
124
+ # Cek apakah ada harga sebelum diskon dan persentase diskon
125
+ #discount_element = product.find('span', class_='strikeout disc-price')
126
+ #discount_percentage = ""
127
+ #original_price = ""
128
+ #if discount_element:
129
+ # discount_percentage = discount_element.find('span', class_='discount').text.strip()
130
+ # original_price = discount_element.text.replace(discount_percentage, '').strip()
131
+ #else:
132
+ # # Jika tidak ada diskon, set discount_percentage ke "0%" dan original_price ke product_price
133
+ # discount_percentage = "0%"
134
+ # original_price = product_price
135
+ #
136
+ product_link = f"https://shopee.co.id/{product_href}"
137
+ products.append({
138
+ 'product': product_name,
139
+ #'original_price': original_price,
140
+ #'discount_percentage': discount_percentage,
141
+ 'price': product_price,
142
+ 'terjual' : product_terjual,
143
+ 'asal' : product_asal,
144
+ 'link': product_link
145
+ })
146
+
147
+ prop = min(len(products)/num_items, 1)
148
+ my_bar.progress(prop, text=progress_text)
149
+
150
+
151
+ page += 1
152
 
153
+ except WebDriverException as e:
154
+ return products
155
+ finally:
156
+ if wd:
157
+ wd.quit()
158
+
159
  time.sleep(1)
160
  my_bar.empty()
161
  return products