Spaces:
Sleeping
Sleeping
from typing import Annotated | |
from fastapi import APIRouter, UploadFile, File, Body | |
from fastapi.responses import JSONResponse | |
import openai | |
import io | |
import os | |
from pypdf import PdfReader | |
from langchain.schema import Document | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.llms import OpenAI | |
from langchain.text_splitter import SentenceTransformersTokenTextSplitter | |
from db.vector_store import Store | |
router = APIRouter() | |
_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff", verbose=True) | |
async def answer(name: str, query: str): | |
""" Answer a question from the doc | |
Parameters: | |
- `name` of the doc. | |
- `query` to be answered. | |
Return: | |
a string answer to the query | |
""" | |
_db = Store.get_instance().get_collection(name) | |
print(query) | |
docs = _db.similarity_search_with_score(query=query) | |
print(docs) | |
answer = _chain.run(input_documents=[tup[0] for tup in docs], question=query) | |
return JSONResponse(status_code=200, content={"answer": answer, "metadata": [ | |
{"file": d[0].metadata['file'], "page" : d[0].metadata['page'], "score": d[1]} for d in docs]}) | |
async def list() -> list[dict]: | |
""" List all the datasets avaialble to query. | |
:return: | |
list of datasets | |
""" | |
#TODO surface more metadata for individual datasets | |
return Store.get_instance().list_collections() |