Spaces:
Sleeping
Sleeping
File size: 1,458 Bytes
92ef095 51a7f02 1bec7d8 ca2fff7 51a7f02 6de2046 51a7f02 edf2758 cc3240a ca2fff7 edf2758 cc3240a ca2fff7 51a7f02 f2932e2 a0a3c87 c7e10e4 c224647 51a7f02 1bec7d8 edf2758 ca2fff7 edf2758 ca2fff7 edf2758 ca2fff7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from typing import Annotated
from fastapi import APIRouter, UploadFile, File, Body
from fastapi.responses import JSONResponse
import openai
import io
import os
from pypdf import PdfReader
from langchain.schema import Document
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
from db.vector_store import Store
router = APIRouter()
_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff", verbose=True)
@router.get("/v1/datasets/{name}/answer")
async def answer(name: str, query: str):
""" Answer a question from the doc
Parameters:
- `name` of the doc.
- `query` to be answered.
Return:
a string answer to the query
"""
_db = Store.get_instance().get_collection(name)
print(query)
docs = _db.similarity_search_with_score(query=query)
print(docs)
answer = _chain.run(input_documents=[tup[0] for tup in docs], question=query)
return JSONResponse(status_code=200, content={"answer": answer, "metadata": [
{"file": d[0].metadata['file'], "page" : d[0].metadata['page'], "score": d[1]} for d in docs]})
@router.get("/v1/datasets")
async def list() -> list[dict]:
""" List all the datasets avaialble to query.
:return:
list of datasets
"""
#TODO surface more metadata for individual datasets
return Store.get_instance().list_collections() |