naufalnashif commited on
Commit
7602f66
·
1 Parent(s): 402be75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -6
app.py CHANGED
@@ -122,14 +122,88 @@ def scrape_tokped(nama_barang, num_items):
122
  products = products[:num_items]
123
  break
124
 
125
- except requests.exceptions.RequestException as e:
126
- st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
127
- break
128
- except requests.exceptions.HTTPError as e:
129
- st.error(f"HTTP Error: {e}")
130
- break
131
  except Exception as e:
132
  st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  break
134
  page += 1
135
  return products
 
122
  products = products[:num_items]
123
  break
124
 
 
 
 
 
 
 
125
  except Exception as e:
126
  st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
127
+ st.write("Jalankan script ini di IDE/colab.research.google.com Anda :")
128
+ code = '''!pip install beautifulsoup4
129
+ !pip install requests
130
+ !pip install streamlit
131
+ from bs4 import BeautifulSoup
132
+ import requests
133
+ from urllib.parse import quote
134
+ import pandas as pd
135
+ import streamlit as st
136
+ def scrape_tokped(nama_barang, num_items):
137
+ products = []
138
+ page = 1
139
+ query = quote(nama_barang)
140
+ while len(products) < num_items :
141
+ url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
142
+
143
+ headers = {
144
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
145
+ }
146
+ timeout = 10
147
+ try :
148
+ response = requests.get(url, headers = headers, timeout = timeout)
149
+ response.raise_for_status()
150
+
151
+ soup = BeautifulSoup(response.text, 'html.parser')
152
+
153
+ product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
154
+
155
+ for product_info in product_container_list:
156
+ link = product_info['href']
157
+ title_element = product_info.find('div', class_="prd_link-product-name css-3um8ox")
158
+ title = title_element.text.strip() if title_element else None
159
+
160
+ harga_element = product_info.find('div', class_="prd_link-product-price css-h66vau")
161
+ harga = harga_element.text.strip() if harga_element else None
162
+
163
+ terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h")
164
+ terjual = terjual_element.text if terjual_element else None
165
+
166
+ rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
167
+ rating = rating_element.text if rating_element else None
168
+
169
+ toko_element = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip")
170
+ toko = toko_element.text.strip() if toko_element else None
171
+
172
+ asal_product_element = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip")
173
+ asal_product = asal_product_element.text.strip() if asal_product_element else None
174
+
175
+ products.append({
176
+ 'link': link,
177
+ 'produk' : title,
178
+ 'harga' : harga,
179
+ 'terjual' : terjual,
180
+ 'rating' : rating,
181
+ 'toko' : toko,
182
+ 'asal_product' : asal_product,
183
+ })
184
+ if len(products) >= num_items:
185
+ products = products[:num_items]
186
+ break
187
+
188
+ except requests.exceptions.RequestException as e:
189
+ logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
190
+ break
191
+ except requests.exceptions.HTTPError as e:
192
+ logging.error(f"HTTP Error: {e}")
193
+ break
194
+ except Exception as e:
195
+ logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
196
+ break
197
+ page += 1
198
+ return products)
199
+
200
+ nama_barang = input("Masukkan nama barang: ")
201
+ num_items = int(input("Masukkan jumlah barang yang ingin diambil: "))
202
+
203
+ # Melakukan scraping menggunakan fungsi scrape_tokped
204
+ hasil = scrape_tokped(nama_barang, num_items)
205
+ pd.DataFrame(hasil)'''
206
+ st.code(code, language='python')
207
  break
208
  page += 1
209
  return products