MaksG commited on
Commit
ed31679
1 Parent(s): 81011d1

Update excel_chat.py

Browse files
Files changed (1) hide show
  1. excel_chat.py +0 -43
excel_chat.py CHANGED
@@ -5,49 +5,6 @@ import os
5
  import pandas as pd
6
  import numpy as np
7
  from groq import Groq
8
- import requests
9
- from bs4 import BeautifulSoup
10
-
11
-
12
-
13
-
14
- def extract_statuses(url):
15
- # Send a GET request to the webpage
16
- response = requests.get(url)
17
-
18
- # Parse the webpage content
19
- soup = BeautifulSoup(response.content, 'html.parser')
20
-
21
- # Find all links in the webpage
22
- links = soup.find_all('a')
23
-
24
- # Identify and download the Excel file
25
- for link in links:
26
- href = link.get('href')
27
- if href and (href.endswith('.xls') or href.endswith('.xlsx')):
28
- excel_url = href if href.startswith('http') else url + href
29
- excel_response = requests.get(excel_url)
30
- file_name = 'guide_status.xlsx' #excel_url.split('/')[-1]
31
-
32
- # Save the file
33
- with open(file_name, 'wb') as f:
34
- f.write(excel_response.content)
35
-
36
- # Read the Excel file
37
- df = pd.read_excel(file_name)
38
-
39
- # Check if 'TDoc Status' column exists and extract unique statuses
40
- if 'TDoc Status' in df.columns:
41
- unique_statuses = df['TDoc Status'].unique().tolist()
42
- print(f'Downloaded {file_name} and extracted statuses: {unique_statuses}')
43
-
44
-
45
- if 'withdrawn' in unique_statuses:
46
- unique_statuses.remove('withdrawn')
47
- return unique_statuses
48
- else:
49
- print(f"'TDoc Status' column not found in {file_name}")
50
- return []
51
 
52
 
53
 
 
5
  import pandas as pd
6
  import numpy as np
7
  from groq import Groq
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10