MaksG commited on
Commit
92d0a3c
1 Parent(s): 7cb2e91

Update excel_chat.py

Browse files
Files changed (1) hide show
  1. excel_chat.py +45 -0
excel_chat.py CHANGED
@@ -5,6 +5,51 @@ import os
5
  import pandas as pd
6
  import numpy as np
7
  from groq import Groq
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def ask_llm(query, input, client_index):
10
  messages = [
 
5
  import pandas as pd
6
  import numpy as np
7
  from groq import Groq
8
+ import requests
9
+ from bs4 import BeautifulSoup
10
+
11
+
12
+
13
+
14
+ def extract_statuses(url):
15
+ # Send a GET request to the webpage
16
+ response = requests.get(url)
17
+
18
+ # Parse the webpage content
19
+ soup = BeautifulSoup(response.content, 'html.parser')
20
+
21
+ # Find all links in the webpage
22
+ links = soup.find_all('a')
23
+
24
+ # Identify and download the Excel file
25
+ for link in links:
26
+ href = link.get('href')
27
+ if href and (href.endswith('.xls') or href.endswith('.xlsx')):
28
+ excel_url = href if href.startswith('http') else url + href
29
+ excel_response = requests.get(excel_url)
30
+ file_name = 'guide_status.xlsx' #excel_url.split('/')[-1]
31
+
32
+ # Save the file
33
+ with open(file_name, 'wb') as f:
34
+ f.write(excel_response.content)
35
+
36
+ # Read the Excel file
37
+ df = pd.read_excel(file_name)
38
+
39
+ # Check if 'TDoc Status' column exists and extract unique statuses
40
+ if 'TDoc Status' in df.columns:
41
+ unique_statuses = df['TDoc Status'].unique().tolist()
42
+ print(f'Downloaded {file_name} and extracted statuses: {unique_statuses}')
43
+
44
+
45
+ if 'withdrawn' in unique_statuses:
46
+ unique_statuses.remove('withdrawn')
47
+ return unique_statuses
48
+ else:
49
+ print(f"'TDoc Status' column not found in {file_name}")
50
+ return []
51
+
52
+
53
 
54
  def ask_llm(query, input, client_index):
55
  messages = [