Standard_Intelligence_Dev

Sleeping

App Files Files Community

Standard_Intelligence_Dev / excel_chat.py

MaksG

Update excel_chat.py

92d0a3c verified 11 months ago

raw

history blame

4.43 kB

	import gradio as gr
	from mistralai.client import MistralClient
	from mistralai.models.chat_completion import ChatMessage
	import os
	import pandas as pd
	import numpy as np
	from groq import Groq
	import requests
	from bs4 import BeautifulSoup




	def extract_statuses(url):
	# Send a GET request to the webpage
	response = requests.get(url)

	# Parse the webpage content
	soup = BeautifulSoup(response.content, 'html.parser')

	# Find all links in the webpage
	links = soup.find_all('a')

	# Identify and download the Excel file
	for link in links:
	href = link.get('href')
	if href and (href.endswith('.xls') or href.endswith('.xlsx')):
	excel_url = href if href.startswith('http') else url + href
	excel_response = requests.get(excel_url)
	file_name = 'guide_status.xlsx' #excel_url.split('/')[-1]

	# Save the file
	with open(file_name, 'wb') as f:
	f.write(excel_response.content)

	# Read the Excel file
	df = pd.read_excel(file_name)

	# Check if 'TDoc Status' column exists and extract unique statuses
	if 'TDoc Status' in df.columns:
	unique_statuses = df['TDoc Status'].unique().tolist()
	print(f'Downloaded {file_name} and extracted statuses: {unique_statuses}')


	if 'withdrawn' in unique_statuses:
	unique_statuses.remove('withdrawn')
	return unique_statuses
	else:
	print(f"'TDoc Status' column not found in {file_name}")
	return []



	def ask_llm(query, input, client_index):
	messages = [
	{
	"role": "system",
	"content": f"You are a helpful assistant. Only show your final response to the User Query! Do not provide any explanations or details: \n# User Query:\n{query}."
	},
	{
	"role": "user",
	"content": f"{input}",
	}
	]

	if client_index == 0:
	client = Groq(api_key=os.environ["GROQ_API_KEY"])
	chat_completion = client.chat.completions.create(
	messages=messages,
	model='mixtral-8x7b-32768',
	)
	else:
	client = MistralClient(api_key=os.environ['MISTRAL_API_KEY'])
	chat_completion = client.chat(
	messages=messages,
	model='mistral-small-latest',
	)

	return chat_completion.choices[0].message.content

	def filter_df(df, column_name, keywords):
	if len(keywords)>0:
	if column_name in df.columns:
	contains_keyword = lambda x: any(keyword.lower() in (x.lower() if type(x)==str else '') for keyword in keywords)
	filtered_df = df[df[column_name].apply(contains_keyword)]
	else:
	contains_keyword = lambda row: any(keyword.lower() in (str(cell).lower() if isinstance(cell, str) else '') for keyword in keywords for cell in row)
	filtered_df = df[df.apply(contains_keyword, axis=1)]
	else:
	filtered_df = df
	return filtered_df

	def chat_with_mistral(source_cols, dest_col, prompt, excel_file, url, search_col, keywords, client):
	print(f'xlsxfile = {excel_file}')
	df = pd.read_excel(excel_file)
	df[dest_col] = ""
	try:
	file_name = url.split("/")[-2] + ".xlsx"
	except:
	file_name = excel_file
	print(f"Keywords: {keywords}")

	filtred_df = filter_df(df, search_col, keywords)

	for index, row in filtred_df.iterrows():
	concatenated_content = "\n\n".join(f"{column_name}: {str(row[column_name])}" for column_name in source_cols)
	llm_answer = ask_llm(prompt, concatenated_content, client)
	print(f"QUERY:\n{prompt}\nCONTENT:\n{concatenated_content[:200]}...\n\nANSWER:\n{llm_answer}")
	df.at[index, dest_col] = llm_answer

	df.to_excel(file_name, index=False)
	return file_name, df.head(5)

	def get_columns(file):
	if file is not None:
	df = pd.read_excel(file)
	columns = list(df.columns)
	return gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns + [""]), gr.update(choices=columns + ['[ALL]']), df.head(5)
	else:
	return gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), pd.DataFrame()