Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- funcs/googlesheet.py +34 -0
- funcs/llm.py +100 -0
funcs/googlesheet.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from google.oauth2 import service_account
|
3 |
+
from googleapiclient.discovery import build
|
4 |
+
import streamlit as st
|
5 |
+
import gspread
|
6 |
+
from google.oauth2.service_account import Credentials
|
7 |
+
import pandas as pd
|
8 |
+
|
9 |
+
def get_google_sheet_data(sheet_id, range_name):
|
10 |
+
creds = service_account.Credentials.from_service_account_info(
|
11 |
+
st.secrets["gcp_service_account"],
|
12 |
+
scopes=["https://www.googleapis.com/auth/spreadsheets.readonly"],
|
13 |
+
)
|
14 |
+
service = build("sheets", "v4", credentials=creds)
|
15 |
+
sheet = service.spreadsheets()
|
16 |
+
result = sheet.values().get(spreadsheetId=sheet_id, range=range_name).execute()
|
17 |
+
values = result.get("values", [])
|
18 |
+
return pd.DataFrame(values[1:], columns=values[0])
|
19 |
+
|
20 |
+
def update_google_sheet(sheet_id, range_name, data):
|
21 |
+
try:
|
22 |
+
creds = service_account.Credentials.from_service_account_info(
|
23 |
+
st.secrets["gcp_service_account"],
|
24 |
+
scopes=["https://www.googleapis.com/auth/spreadsheets.readonly"],
|
25 |
+
)
|
26 |
+
client = gspread.authorize(creds)
|
27 |
+
sheet = client.open_by_key(sheet_id).worksheet(range_name.split("!")[0])
|
28 |
+
data_to_update = [data.columns.tolist()] + data.values.tolist()
|
29 |
+
sheet.clear()
|
30 |
+
sheet.update(range_name, data_to_update)
|
31 |
+
|
32 |
+
st.success("Data successfully updated in the Google Sheet!")
|
33 |
+
except Exception as e:
|
34 |
+
st.error(f"Error updating Google Sheet: {e}")
|
funcs/llm.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import time
|
3 |
+
from langchain.agents import initialize_agent, Tool
|
4 |
+
from langchain.agents import AgentType
|
5 |
+
from langchain.memory import ConversationBufferWindowMemory
|
6 |
+
from langchain.prompts import PromptTemplate
|
7 |
+
import requests
|
8 |
+
|
9 |
+
class LLM:
|
10 |
+
def __init__(self, tools, model, search):
|
11 |
+
if not isinstance(tools, list) or not all(isinstance(t, Tool) for t in tools):
|
12 |
+
raise ValueError("Tools must be a list of Tool objects")
|
13 |
+
|
14 |
+
web_search_available = any(
|
15 |
+
tool.name == "Web Search" for tool in tools
|
16 |
+
)
|
17 |
+
if not web_search_available:
|
18 |
+
raise ValueError("Web Search tool must be included in tools list")
|
19 |
+
|
20 |
+
self.tools = tools
|
21 |
+
self.model = model
|
22 |
+
self.search = search
|
23 |
+
|
24 |
+
self.agent = initialize_agent(
|
25 |
+
self.tools,
|
26 |
+
self.model,
|
27 |
+
agent_type=AgentType.SELF_ASK_WITH_SEARCH,
|
28 |
+
verbose=True,
|
29 |
+
max_iterations=5,
|
30 |
+
handle_parsing_errors=True,
|
31 |
+
early_stopping_method="generate",
|
32 |
+
memory=ConversationBufferWindowMemory(k=1)
|
33 |
+
)
|
34 |
+
|
35 |
+
def perform_web_search(self, query, max_retries=5, delay=1, timeout=8):
|
36 |
+
retries = 0
|
37 |
+
while retries < max_retries:
|
38 |
+
try:
|
39 |
+
search_results = self.search.run(query, timeout=timeout)
|
40 |
+
if search_results:
|
41 |
+
return search_results[:1500]
|
42 |
+
except requests.exceptions.Timeout:
|
43 |
+
retries += 1
|
44 |
+
st.warning(f"Web search timed out. Retrying ({retries}/{max_retries})...")
|
45 |
+
time.sleep(delay)
|
46 |
+
except Exception as e:
|
47 |
+
retries += 1
|
48 |
+
st.warning(f"Web search failed. Retrying ({retries}/{max_retries})... Error: {e}")
|
49 |
+
time.sleep(delay)
|
50 |
+
return "NaN"
|
51 |
+
|
52 |
+
def get_llm_response(self, entity, query_type, web_results):
|
53 |
+
prompt = PromptTemplate(
|
54 |
+
template="""
|
55 |
+
You are a highly skilled information extractor. Your job is to extract the most relevant {query_type} from the following Web Search Results.
|
56 |
+
Provide the exact value requested and return only that value—no explanations, context, or irrelevant information.
|
57 |
+
|
58 |
+
Entity: {entity}
|
59 |
+
Information to Extract: {query_type}
|
60 |
+
|
61 |
+
Web Search Results:
|
62 |
+
{web_results}
|
63 |
+
|
64 |
+
If you cannot find relevant information, return "NaN". Do not return anything else.
|
65 |
+
|
66 |
+
Your extracted response:
|
67 |
+
""",
|
68 |
+
input_variables=["entity", "query_type", "web_results"]
|
69 |
+
)
|
70 |
+
|
71 |
+
try:
|
72 |
+
response = self.agent.invoke({
|
73 |
+
"input": prompt.format(
|
74 |
+
query_type=query_type,
|
75 |
+
entity=entity,
|
76 |
+
web_results=web_results,
|
77 |
+
)
|
78 |
+
})
|
79 |
+
|
80 |
+
raw_response = response.get("output", "").strip()
|
81 |
+
if raw_response:
|
82 |
+
return raw_response
|
83 |
+
else:
|
84 |
+
return "NaN"
|
85 |
+
|
86 |
+
except Exception as e:
|
87 |
+
st.error(f"Error processing response: {str(e)}")
|
88 |
+
return "NaN"
|
89 |
+
|
90 |
+
def refine_answer_with_searches(self, entity, query_type, max_retries=2):
|
91 |
+
search_query = f"{entity} current {query_type}"
|
92 |
+
search_results = self.perform_web_search(search_query)
|
93 |
+
extracted_answer = self.get_llm_response(entity, query_type, search_results)
|
94 |
+
|
95 |
+
if extracted_answer == "NaN" and max_retries > 0:
|
96 |
+
alternative_query = f"{entity} {query_type} detailed information"
|
97 |
+
search_results = self.perform_web_search(alternative_query)
|
98 |
+
extracted_answer = self.get_llm_response(entity, query_type, search_results)
|
99 |
+
|
100 |
+
return extracted_answer, search_results
|