Janar Ramalingam commited on
Commit
92ef095
·
1 Parent(s): a41cfd7

Changes to make search work better

Browse files

1. Move back to llm qa "stuff" mode as map_reduce is clueless
2. Created more metadata around documents.
* allow create different collection spaces for db.
/v1/admin/db -d "name={collection}"
* within collection allow file names so that answers can be annotated
{content: {answer :"I don't know", files:["a1","a2"]}}
would mean openAI doesn't know about the question even though
embedding returned two files.

Lot more work to do

Files changed (2) hide show
  1. api/routes/admin.py +4 -2
  2. api/routes/search.py +17 -11
api/routes/admin.py CHANGED
@@ -1,13 +1,15 @@
1
  #This is to init the vector store
2
 
 
 
3
  from qdrant_client.models import VectorParams, Distance
4
- from fastapi import APIRouter
5
  from db import vector_store
6
 
7
  router = APIRouter()
8
 
9
  @router.put("/admin/v1/db")
10
- async def recreate_collection(name: str = "test"):
11
  print(f"creating collection {name} in db")
12
  return vector_store.client.recreate_collection(collection_name=name,
13
  vectors_config=VectorParams(size=1536, distance=Distance.COSINE))
 
1
  #This is to init the vector store
2
 
3
+ from typing import Annotated
4
+
5
  from qdrant_client.models import VectorParams, Distance
6
+ from fastapi import APIRouter, Body
7
  from db import vector_store
8
 
9
  router = APIRouter()
10
 
11
  @router.put("/admin/v1/db")
12
+ async def recreate_collection(name: Annotated[str, Body(embed=True)]):
13
  print(f"creating collection {name} in db")
14
  return vector_store.client.recreate_collection(collection_name=name,
15
  vectors_config=VectorParams(size=1536, distance=Distance.COSINE))
api/routes/search.py CHANGED
@@ -1,4 +1,6 @@
1
- from fastapi import APIRouter, UploadFile, File
 
 
2
  from fastapi.responses import JSONResponse
3
  import openai
4
  import io
@@ -12,29 +14,33 @@ from langchain.llms import OpenAI
12
  from db import vector_store
13
 
14
  router = APIRouter()
15
- _db = vector_store.get_instance()
16
- _chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce")
17
 
18
  @router.post("/v1/docs")
19
- async def index_doc(file: UploadFile = File(...)):
20
- async for doc in generate_documents(file):
 
 
 
 
21
  _db.add_documents([doc])
22
  #todo return something sensible
23
- return JSONResponse(status_code=200, content={})
24
 
25
- @router.get("/v1/docs")
26
- async def search(query: str):
 
27
  print(query)
28
  docs = _db.similarity_search(query=query)
29
  print(docs)
30
  answer = _chain.run(input_documents=docs, question=query)
31
- return JSONResponse(status_code=200, content={"answer": answer})
32
 
33
- async def generate_documents(file: UploadFile):
34
  num=0
35
  async for txt in convert_documents(file):
36
  num += 1
37
- document = Document(page_content=txt,metadata={"page": num})
38
  yield document
39
 
40
  async def convert_documents(file: UploadFile):
 
1
+ from typing import Annotated
2
+
3
+ from fastapi import APIRouter, UploadFile, File, Body
4
  from fastapi.responses import JSONResponse
5
  import openai
6
  import io
 
14
  from db import vector_store
15
 
16
  router = APIRouter()
17
+ _chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
 
18
 
19
  @router.post("/v1/docs")
20
+ async def index_doc(name: Annotated[str, Body()], fileName: Annotated[str, Body()], file: UploadFile = File(...)):
21
+ _db = vector_store.get_instance(name)
22
+ if not _db:
23
+ return JSONResponse(status_code=404, content={})
24
+ async for doc in generate_documents(file, fileName):
25
+ print(doc)
26
  _db.add_documents([doc])
27
  #todo return something sensible
28
+ return JSONResponse(status_code=200, content={"name": name})
29
 
30
+ @router.get("/v1/answers/{name}")
31
+ async def search(name: str, query: str):
32
+ _db = vector_store.get_instance(name)
33
  print(query)
34
  docs = _db.similarity_search(query=query)
35
  print(docs)
36
  answer = _chain.run(input_documents=docs, question=query)
37
+ return JSONResponse(status_code=200, content={"answer": answer, "files": [d.metadata["file"] for d in docs]})
38
 
39
+ async def generate_documents(file: UploadFile, fileName: str):
40
  num=0
41
  async for txt in convert_documents(file):
42
  num += 1
43
+ document = Document(page_content=txt,metadata={"file": fileName, "page": num})
44
  yield document
45
 
46
  async def convert_documents(file: UploadFile):