File size: 1,663 Bytes
51a7f02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0a3c87
51a7f02
 
 
 
 
 
 
 
 
 
 
 
a0a3c87
51a7f02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from fastapi import APIRouter, UploadFile, File
from fastapi.responses import JSONResponse
import openai
import io
import os
from pypdf import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Qdrant
from langchain.schema import Document
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from db import vector_store

router = APIRouter()
_db = vector_store.get_instance()
_chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce")

@router.post("/v1/docs")
async def index_doc(file: UploadFile = File(...)):
    async for doc in generate_documents(file):
        _db.add_documents([doc])
    #todo return something sensible
    return JSONResponse(status_code=200, content={})

@router.get("/v1/docs")
async def search(query: str):
    print(query)
    docs = _db.similarity_search(query=query)
    print(docs)
    answer = _chain.run(input_documents=docs, question=query)
    return JSONResponse(status_code=200, content={"answer": answer}) 

async def generate_documents(file: UploadFile):
    num=0
    async for txt in convert_documents(file):
        num += 1
        document = Document(page_content=txt,metadata={"page": num})
        yield document
 
async def convert_documents(file: UploadFile):
    #parse pdf document
    if file.content_type == 'application/pdf':
        content = await file.read()
        pdf_reader = PdfReader(io.BytesIO(content))
        try:
            for page in pdf_reader.pages:
                yield page.extract_text()
        except Exception as e:
            print(f"Exception {e}")
    else:
        return