import os
import pandas as pd
from pandasai import Agent, SmartDataframe
from typing import Tuple
from PIL import Image
from pandasai.llm import HuggingFaceTextGen
from dotenv import load_dotenv
from langchain_groq.chat_models import ChatGroq
from langchain_google_genai import GoogleGenerativeAI
load_dotenv()
Groq_Token = os.environ["GROQ_API_KEY"]
models = {"mixtral": "mixtral-8x7b-32768", "llama": "llama2-70b-4096", "gemma": "gemma-7b-it", "gemini-pro": "gemini-pro"}
hf_token = os.getenv("HF_READ")
gemini_token = os.getenv("GEMINI_TOKEN")
def preprocess_and_load_df(path: str) -> pd.DataFrame:
df = pd.read_csv(path)
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
return df
def load_agent(df: pd.DataFrame, context: str, inference_server: str, name="mixtral") -> Agent:
# llm = HuggingFaceTextGen(
# inference_server_url=inference_server,
# max_new_tokens=250,
# temperature=0.1,
# repetition_penalty=1.2,
# top_k=5,
# )
# llm.client.headers = {"Authorization": f"Bearer {hf_token}"}
if name == "gemini-pro":
llm = GoogleGenerativeAI(model=model, google_api_key=gemini_token, temperature=0.1)
else:
llm = ChatGroq(model=models[name], api_key=os.getenv("GROQ_API"), temperature=0.1)
agent = Agent(df, config={"llm": llm, "enable_cache": False, "options": {"wait_for_model": True}})
agent.add_message(context)
return agent
def load_smart_df(df: pd.DataFrame, inference_server: str, name="mixtral") -> SmartDataframe:
# llm = HuggingFaceTextGen(
# inference_server_url=inference_server,
# )
# llm.client.headers = {"Authorization": f"Bearer {hf_token}"}
llm = ChatGroq(model=models[name], api_key=os.getenv("GROQ_API"), temperature=0.1)
df = SmartDataframe(df, config={"llm": llm, "max_retries": 5, "enable_cache": False})
return df
def get_from_user(prompt):
return {"role": "user", "content": prompt}
def ask_agent(agent: Agent, prompt: str) -> Tuple[str, str, str]:
response = agent.chat(prompt)
gen_code = agent.last_code_generated
ex_code = agent.last_code_executed
last_prompt = agent.last_prompt
return {"role": "assistant", "content": response, "gen_code": gen_code, "ex_code": ex_code, "last_prompt": last_prompt}
def decorate_with_code(response: dict) -> str:
return f"""
Generated Code
```python
{response["gen_code"]}
```
Prompt
{response["last_prompt"]}
"""
def show_response(st, response):
with st.chat_message(response["role"]):
try:
image = Image.open(response["content"])
if "gen_code" in response:
st.markdown(decorate_with_code(response), unsafe_allow_html=True)
st.image(image)
return {"is_image": True}
except Exception as e:
if "gen_code" in response:
display_content = decorate_with_code(response) + f"""
{response["content"]}"""
else:
display_content = response["content"]
st.markdown(display_content, unsafe_allow_html=True)
return {"is_image": False}
def ask_question(model_name, question):
if model_name == "gemini-pro":
llm = GoogleGenerativeAI(model=model, google_api_key=os.environ.get("GOOGLE_API_KEY"), temperature=0)
else:
llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0.1)
df_check = pd.read_csv("Data.csv")
df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
df_check = df_check.head(5)
new_line = "\n"
template = f"""```python
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("Data.csv")
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
# df.dtypes
{new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}
# {question.strip()}
#
```
"""
query = f"""I have a pandas dataframe data of PM2.5 and PM10.
* Frequency of data is daily.
* `pollution` generally means `PM2.5`.
* Save result in a variable `answer` and make it global.
* If result is a plot, save it and save path in `answer`. Example: `answer='plot.png'`
* If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`
Complete the following code.
{template}
"""
if model_name == "gemini-pro":
answer = llm.invoke(query)
else:
answer = llm.invoke(query).content
code = f"""
{template.split("```python")[1].split("```")[0]}
{answer.split("```python")[1].split("```")[0]}
"""
# update variable `answer` when code is executed
exec(code)
return {"role": "assistant", "content": answer.content, "gen_code": code, "ex_code": code, "last_prompt": question}