Spaces:
Sleeping
Sleeping
move browse_folder to scrape_3gpp.py
Browse files- scrape_3gpp.py +14 -0
scrape_3gpp.py
CHANGED
@@ -8,6 +8,20 @@ import zipfile
|
|
8 |
import textract
|
9 |
import gradio as gr
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def scrape(url, excel_file, folder_name,progress=gr.Progress()):
|
12 |
filenames = []
|
13 |
# Check if the excel_file argument is provided and if the file exists.
|
|
|
8 |
import textract
|
9 |
import gradio as gr
|
10 |
|
11 |
+
|
12 |
+
def browse_folder(url):
|
13 |
+
if url.lower().endswith(('docs', 'docs/')):
|
14 |
+
return gr.update(choices=[])
|
15 |
+
response = requests.get(url)
|
16 |
+
response.raise_for_status() # This will raise an exception if there's an error
|
17 |
+
|
18 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
19 |
+
|
20 |
+
excel_links = [a['href'] + '/' for a in soup.find_all('a', href=True) if a['href'].startswith(url)]
|
21 |
+
|
22 |
+
return gr.update(choices=excel_links)
|
23 |
+
|
24 |
+
|
25 |
def scrape(url, excel_file, folder_name,progress=gr.Progress()):
|
26 |
filenames = []
|
27 |
# Check if the excel_file argument is provided and if the file exists.
|