Spaces:
Sleeping
Sleeping
import gradio as gr | |
from mistralai.client import MistralClient | |
from mistralai.models.chat_completion import ChatMessage | |
import os | |
import pandas as pd | |
import numpy as np | |
from groq import Groq | |
import requests | |
from bs4 import BeautifulSoup | |
def extract_statuses(url): | |
# Send a GET request to the webpage | |
response = requests.get(url) | |
# Parse the webpage content | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# Find all links in the webpage | |
links = soup.find_all('a') | |
# Identify and download the Excel file | |
for link in links: | |
href = link.get('href') | |
if href and (href.endswith('.xls') or href.endswith('.xlsx')): | |
excel_url = href if href.startswith('http') else url + href | |
excel_response = requests.get(excel_url) | |
file_name = 'guide_status.xlsx' #excel_url.split('/')[-1] | |
# Save the file | |
with open(file_name, 'wb') as f: | |
f.write(excel_response.content) | |
# Read the Excel file | |
df = pd.read_excel(file_name) | |
# Check if 'TDoc Status' column exists and extract unique statuses | |
if 'TDoc Status' in df.columns: | |
unique_statuses = df['TDoc Status'].unique().tolist() | |
print(f'Downloaded {file_name} and extracted statuses: {unique_statuses}') | |
if 'withdrawn' in unique_statuses: | |
unique_statuses.remove('withdrawn') | |
return unique_statuses | |
else: | |
print(f"'TDoc Status' column not found in {file_name}") | |
return [] | |
def ask_llm(query, input, client_index): | |
messages = [ | |
{ | |
"role": "system", | |
"content": f"You are a helpful assistant. Only show your final response to the **User Query**! Do not provide any explanations or details: \n# User Query:\n{query}." | |
}, | |
{ | |
"role": "user", | |
"content": f"{input}", | |
} | |
] | |
if client_index == 0: | |
client = Groq(api_key=os.environ["GROQ_API_KEY"]) | |
chat_completion = client.chat.completions.create( | |
messages=messages, | |
model='mixtral-8x7b-32768', | |
) | |
else: | |
client = MistralClient(api_key=os.environ['MISTRAL_API_KEY']) | |
chat_completion = client.chat( | |
messages=messages, | |
model='mistral-small-latest', | |
) | |
return chat_completion.choices[0].message.content | |
def filter_df(df, column_name, keywords): | |
if len(keywords)>0: | |
if column_name in df.columns: | |
contains_keyword = lambda x: any(keyword.lower() in (x.lower() if type(x)==str else '') for keyword in keywords) | |
filtered_df = df[df[column_name].apply(contains_keyword)] | |
else: | |
contains_keyword = lambda row: any(keyword.lower() in (str(cell).lower() if isinstance(cell, str) else '') for keyword in keywords for cell in row) | |
filtered_df = df[df.apply(contains_keyword, axis=1)] | |
else: | |
filtered_df = df | |
return filtered_df | |
def chat_with_mistral(source_cols, dest_col, prompt, excel_file, url, search_col, keywords, client): | |
print(f'xlsxfile = {excel_file}') | |
df = pd.read_excel(excel_file) | |
df[dest_col] = "" | |
try: | |
file_name = url.split("/")[-2] + ".xlsx" | |
except: | |
file_name = excel_file | |
print(f"Keywords: {keywords}") | |
filtred_df = filter_df(df, search_col, keywords) | |
for index, row in filtred_df.iterrows(): | |
concatenated_content = "\n\n".join(f"{column_name}: {str(row[column_name])}" for column_name in source_cols) | |
llm_answer = ask_llm(prompt, concatenated_content, client) | |
print(f"QUERY:\n{prompt}\nCONTENT:\n{concatenated_content[:200]}...\n\nANSWER:\n{llm_answer}") | |
df.at[index, dest_col] = llm_answer | |
df.to_excel(file_name, index=False) | |
return file_name, df.head(5) | |
def get_columns(file): | |
if file is not None: | |
df = pd.read_excel(file) | |
columns = list(df.columns) | |
return gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns + [""]), gr.update(choices=columns + ['[ALL]']), df.head(5) | |
else: | |
return gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), pd.DataFrame() |