Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -21,27 +21,11 @@ from langchain.llms.base import LLM
|
|
21 |
from typing import Optional, List, Mapping, Any
|
22 |
|
23 |
import ast
|
24 |
-
from utils import ClaudeLLM
|
25 |
|
26 |
embeddings = HuggingFaceEmbeddings()
|
27 |
db = FAISS.load_local('db_full', embeddings)
|
28 |
-
|
29 |
mp_docs = {}
|
30 |
-
# llm = ClaudeLLM()
|
31 |
-
# ChatOpenAI(
|
32 |
-
# temperature=0,
|
33 |
-
# model='gpt-3.5-turbo-16k'
|
34 |
-
# )
|
35 |
-
|
36 |
-
|
37 |
-
def add_text(history, text):
|
38 |
-
|
39 |
-
print(history)
|
40 |
-
history = history + [(text, None)]
|
41 |
-
|
42 |
-
return history, ""
|
43 |
-
|
44 |
-
# pipeline = {'claude': (ClaudeLLM(), 0), 'gpt-3.5': (ChatOpenAI(temperature=0,model='gpt-3.5-turbo-16k'), 65), 'gpt-4': (ChatOpenAI(temperature=0, model='gpt-4'), 30)}
|
45 |
|
46 |
def retrieve_thoughts(query, n):
|
47 |
|
@@ -50,40 +34,29 @@ def retrieve_thoughts(query, n):
|
|
50 |
df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], )
|
51 |
df = pd.concat((df, pd.DataFrame([dict(doc[0])['page_content'] for doc in docs_with_score], columns = ['page_content'])), axis = 1)
|
52 |
df = pd.concat((df, pd.DataFrame([doc[1] for doc in docs_with_score], columns = ['score'])), axis = 1)
|
|
|
53 |
df.sort_values("score", inplace = True)
|
54 |
|
55 |
# TO-DO: What if user query doesn't match what we provide as documents
|
56 |
|
57 |
tier_1 = df[df['score'] < 1]
|
58 |
|
59 |
-
# tier_2 = df[(df['score'] < 0.95) * (df["score"] > 0.7)]
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
tier_1_adjusted = tier_1.groupby(['title', 'url']).first().reset_index()[['title', 'url', 'score']]
|
64 |
tier_1_adjusted['ref'] = range(1, len(tier_1_adjusted) + 1 )
|
65 |
tier_1_adjusted['chunks'] = chunks_1
|
66 |
-
score = tier_1.groupby(['
|
67 |
tier_1_adjusted['score'] = score
|
68 |
tier_1_adjusted.sort_values("score", inplace = True)
|
69 |
|
70 |
-
# chunks_2 = tier_2.groupby(['title', 'url', '_id']).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
|
71 |
-
# tier_2_adjusted = tier_2.groupby(['title', 'url', '_id']).first().reset_index()[['_id', 'title', 'url']]
|
72 |
-
# tier_2_adjusted['content'] = chunks_2
|
73 |
-
|
74 |
if n:
|
75 |
tier_1_adjusted = tier_1_adjusted[:min(len(tier_1_adjusted), n)]
|
76 |
|
77 |
-
print(len(tier_1_adjusted))
|
78 |
-
# tier_1 = [doc[0] for doc in docs if ((doc[1] < 1))][:5]
|
79 |
-
# tier_2 = [doc[0] for doc in docs if ((doc[1] > 0.7)*(doc[1] < 1.5))][10:15]
|
80 |
-
|
81 |
return {'tier 1':tier_1_adjusted, }
|
82 |
|
83 |
def qa_retrieve(query, llm):
|
84 |
|
85 |
-
# llm = pipeline["claude"][0]
|
86 |
-
|
87 |
docs = ""
|
88 |
|
89 |
global db
|
@@ -99,13 +72,8 @@ def qa_retrieve(query, llm):
|
|
99 |
mp_docs = thoughts
|
100 |
|
101 |
tier_1 = thoughts['tier 1']
|
102 |
-
# tier_2 = thoughts['tier 2']
|
103 |
|
104 |
-
reference = tier_1[['
|
105 |
-
|
106 |
-
# tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n Content: {x.content}", axis = 1).values)
|
107 |
-
# print(len(tier_1))
|
108 |
-
# tier_2 = list(tier_2.apply(lambda x: f"title: {x['title']}\n Content: {x.content}", axis = 1).values)
|
109 |
|
110 |
return {'Reference': reference}
|
111 |
|
@@ -123,5 +91,4 @@ demo = gr.Interface(fn=qa_retrieve, title="cicero-qa-api",
|
|
123 |
gr.components.JSON( label="Reference")],examples=examples)
|
124 |
|
125 |
demo.queue(concurrency_count = 4)
|
126 |
-
demo.launch()
|
127 |
-
|
|
|
21 |
from typing import Optional, List, Mapping, Any
|
22 |
|
23 |
import ast
|
24 |
+
from utils import ClaudeLLM
|
25 |
|
26 |
embeddings = HuggingFaceEmbeddings()
|
27 |
db = FAISS.load_local('db_full', embeddings)
|
|
|
28 |
mp_docs = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
def retrieve_thoughts(query, n):
|
31 |
|
|
|
34 |
df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], )
|
35 |
df = pd.concat((df, pd.DataFrame([dict(doc[0])['page_content'] for doc in docs_with_score], columns = ['page_content'])), axis = 1)
|
36 |
df = pd.concat((df, pd.DataFrame([doc[1] for doc in docs_with_score], columns = ['score'])), axis = 1)
|
37 |
+
df['_id'] = df['_id'].apply(lambda x: str(x))
|
38 |
df.sort_values("score", inplace = True)
|
39 |
|
40 |
# TO-DO: What if user query doesn't match what we provide as documents
|
41 |
|
42 |
tier_1 = df[df['score'] < 1]
|
43 |
|
|
|
44 |
|
45 |
+
chunks_1 = tier_1.groupby(['_id' ]).apply(lambda x: {f"chunk_{i}": row for i, row in enumerate(x.sort_values('id')[['id', 'score','page_content']].to_dict('records'))}).values
|
46 |
+
tier_1_adjusted = tier_1.groupby(['_id']).first().reset_index()[['_id', 'title', 'url', 'score']]
|
|
|
47 |
tier_1_adjusted['ref'] = range(1, len(tier_1_adjusted) + 1 )
|
48 |
tier_1_adjusted['chunks'] = chunks_1
|
49 |
+
score = tier_1.groupby(['_id' ]).apply(lambda x: x['score'].mean()).values
|
50 |
tier_1_adjusted['score'] = score
|
51 |
tier_1_adjusted.sort_values("score", inplace = True)
|
52 |
|
|
|
|
|
|
|
|
|
53 |
if n:
|
54 |
tier_1_adjusted = tier_1_adjusted[:min(len(tier_1_adjusted), n)]
|
55 |
|
|
|
|
|
|
|
|
|
56 |
return {'tier 1':tier_1_adjusted, }
|
57 |
|
58 |
def qa_retrieve(query, llm):
|
59 |
|
|
|
|
|
60 |
docs = ""
|
61 |
|
62 |
global db
|
|
|
72 |
mp_docs = thoughts
|
73 |
|
74 |
tier_1 = thoughts['tier 1']
|
|
|
75 |
|
76 |
+
reference = tier_1[['_id', 'url', 'title', 'chunks', 'score']].to_dict('records')
|
|
|
|
|
|
|
|
|
77 |
|
78 |
return {'Reference': reference}
|
79 |
|
|
|
91 |
gr.components.JSON( label="Reference")],examples=examples)
|
92 |
|
93 |
demo.queue(concurrency_count = 4)
|
94 |
+
demo.launch()
|
|