naufalnashif commited on
Commit
46125ea
·
1 Parent(s): 74d0899

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -54
app.py CHANGED
@@ -121,62 +121,60 @@ def scrape_shopee(nama_barang, num_items):
121
 
122
  @st.cache_data
123
  def scrape_tokped(nama_barang, num_items):
124
- products = []
125
- page = 1
126
- query = quote(nama_barang)
127
-
128
- while len(products) < num_items :
129
- url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
130
-
131
- headers = {
132
- 'User-Agent' : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
133
- 'Accept-Language' : 'en-US, en;q-0.5',
134
- 'Accept-Encoding' : 'grip, deflate, bt',
135
- 'Connection': 'keep-alive'
136
- }
137
- timeout = 10
138
- try :
 
139
 
140
- response = requests.get(url, headers = headers, timeout = timeout)
141
- response.raise_for_status()
142
-
143
- soup = BeautifulSoup(response.text, 'html.parser')
144
- product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
145
-
146
- for product_info in product_container_list:
147
- link = product_info['href']
148
- title = product_info.find('div', class_="prd_link-product-name css-3um8ox" ).text.strip()
149
- harga = product_info.find('div', class_="prd_link-product-price css-h66vau").text.strip()
150
- #Dapatkan terjual
151
- terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h").text.strip()
152
- terjual = terjual_element if terjual_element else None
153
- # Dapatkan rating
154
- rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
155
- rating = rating_element.text if rating_element else None
156
-
157
- toko = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip").text.strip()
158
- asal_product = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip" ).text.strip()
159
-
160
- products.append({
161
- 'link': link,
162
- 'produk' : title,
163
- 'harga' : harga,
164
- 'terjual' : terjual,
165
- 'rating' : rating,
166
- 'toko' : toko,
167
- 'asal_product' : asal_product,
168
- })
169
- if len(products) >= num_items:
170
- products = products[:num_items]
 
171
  break
172
-
173
- except requests.exceptions.RequestException as e:
174
- st.error("Terjadi kesalahan")
175
- break
176
- except requests.exceptions.HTTPError as e:
177
- st.error("HTTP Error :", str(e))
178
- break
179
- page += 1
180
  return products
181
  #---------------------------------------------------User Interface----------------------------------------------------------------------
182
 
 
121
 
122
  @st.cache_data
123
  def scrape_tokped(nama_barang, num_items):
124
+ products = []
125
+ page = 1
126
+ query = quote(nama_barang)
127
+ while len(products) < num_items :
128
+ url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
129
+
130
+ headers = {
131
+ 'User-Agent' : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
132
+ 'Accept-Language' : 'en-US, en;q-0.5',
133
+ 'Accept-Encoding' : 'grip, deflate, bt',
134
+ 'Connection': 'keep-alive'
135
+ }
136
+ timeout = 10
137
+ try :
138
+ response = requests.get(url, headers = headers, timeout = timeout)
139
+ response.raise_for_status()
140
 
141
+ soup = BeautifulSoup(response.text, 'html.parser')
142
+ product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
143
+
144
+ for product_info in product_container_list:
145
+ link = product_info['href']
146
+ st.write(link)
147
+ title = product_info.find('div', class_="prd_link-product-name css-3um8ox" ).text.strip()
148
+ harga = product_info.find('div', class_="prd_link-product-price css-h66vau").text.strip()
149
+ #Dapatkan terjual
150
+ terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h").text.strip()
151
+ terjual = terjual_element if terjual_element else None
152
+ # Dapatkan rating
153
+ rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
154
+ rating = rating_element.text if rating_element else None
155
+
156
+ toko = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip").text.strip()
157
+ asal_product = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip" ).text.strip()
158
+
159
+ products.append({
160
+ 'link': link,
161
+ 'produk' : title,
162
+ 'harga' : harga,
163
+ 'terjual' : terjual,
164
+ 'rating' : rating,
165
+ 'toko' : toko,
166
+ 'asal_product' : asal_product,
167
+ })
168
+ if len(products) >= num_items:
169
+ products = products[:num_items]
170
+ break
171
+ except requests.exceptions.RequestException as e:
172
+ st.error("Terjadi kesalahan")
173
  break
174
+ except requests.exceptions.HTTPError as e:
175
+ st.error("HTTP Error :", str(e))
176
+ break
177
+ page += 1
 
 
 
 
178
  return products
179
  #---------------------------------------------------User Interface----------------------------------------------------------------------
180