Spaces:
Runtime error
Runtime error
import gradio as gr | |
import numpy as np | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains import LLMChain | |
from langchain import PromptTemplate | |
import re | |
import pandas as pd | |
from langchain.vectorstores import FAISS | |
import requests | |
from typing import List | |
from langchain.schema import ( | |
SystemMessage, | |
HumanMessage, | |
AIMessage | |
) | |
import os | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.chat_models import ChatOpenAI | |
from langchain.llms.base import LLM | |
from typing import Optional, List, Mapping, Any | |
import ast | |
from utils import ClaudeLLM | |
embeddings = HuggingFaceEmbeddings() | |
db = FAISS.load_local('db_full', embeddings) | |
mp_docs = {} | |
def retrieve_thoughts(query, n): | |
# print(db.similarity_search_with_score(query = query, k = k, fetch_k = k*10)) | |
docs_with_score = db.similarity_search_with_score(query = query, k = len(db.index_to_docstore_id.values()), fetch_k = len(db.index_to_docstore_id.values())) | |
df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], ) | |
df = pd.concat((df, pd.DataFrame([dict(doc[0])['page_content'] for doc in docs_with_score], columns = ['page_content'])), axis = 1) | |
df = pd.concat((df, pd.DataFrame([doc[1] for doc in docs_with_score], columns = ['score'])), axis = 1) | |
df['_id'] = df['_id'].apply(lambda x: str(x)) | |
df.sort_values("score", inplace = True) | |
# TO-DO: What if user query doesn't match what we provide as documents | |
tier_1 = df[df['score'] < 1] | |
chunks_1 = tier_1.groupby(['_id' ]).apply(lambda x: {f"chunk_{i}": row for i, row in enumerate(x.sort_values('id')[['id', 'score','page_content']].to_dict('records'))}).values | |
tier_1_adjusted = tier_1.groupby(['_id']).first().reset_index()[['_id', 'title', 'author','url', 'score']] | |
tier_1_adjusted['ref'] = range(1, len(tier_1_adjusted) + 1 ) | |
tier_1_adjusted['chunks'] = list(chunks_1) | |
score = tier_1.groupby(['_id' ]).apply(lambda x: x['score'].mean()).values | |
tier_1_adjusted['score'] = score | |
tier_1_adjusted.sort_values("score", inplace = True) | |
if n: | |
tier_1_adjusted = tier_1_adjusted[:min(len(tier_1_adjusted), n)] | |
return {'tier 1':tier_1_adjusted, } | |
def qa_retrieve(query,): | |
docs = "" | |
global db | |
print(db) | |
global mp_docs | |
thoughts = retrieve_thoughts(query, 0) | |
if not(thoughts): | |
if mp_docs: | |
thoughts = mp_docs | |
else: | |
mp_docs = thoughts | |
tier_1 = thoughts['tier 1'] | |
reference = tier_1[['_id', 'url', 'author', 'title', 'chunks', 'score']].to_dict('records') | |
return {'Reference': reference} | |
def flush(): | |
return None | |
examples = [ | |
["Will Russia win the war in Ukraine?"], | |
] | |
demo = gr.Interface(fn=qa_retrieve, title="cicero-qa-api", | |
inputs=gr.inputs.Textbox(lines=5, label="what would you like to learn about?"), | |
outputs="json",examples=examples) | |
demo.launch() |