Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -42,6 +42,48 @@ mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
|
|
42 |
ds = []
|
43 |
images = []
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
# Rediriger la racine vers /docs
|
46 |
@app.get("/")
|
47 |
def read_root():
|
|
|
42 |
ds = []
|
43 |
images = []
|
44 |
|
45 |
+
@app.post("/index")
|
46 |
+
def index(files: List[UploadFile] = File(...)):
|
47 |
+
global ds, images
|
48 |
+
images = []
|
49 |
+
ds = []
|
50 |
+
for file in files:
|
51 |
+
content = file.read()
|
52 |
+
pdf_image_list = convert_from_path(io.BytesIO(content))
|
53 |
+
images.extend(pdf_image_list)
|
54 |
+
|
55 |
+
dataloader = DataLoader(
|
56 |
+
images,
|
57 |
+
batch_size=4,
|
58 |
+
shuffle=False,
|
59 |
+
collate_fn=lambda x: process_images(processor, x),
|
60 |
+
)
|
61 |
+
for batch_doc in dataloader:
|
62 |
+
with torch.no_grad():
|
63 |
+
batch_doc = {k: v.to(device) for k, v in batch_doc.items()}
|
64 |
+
embeddings_doc = model(**batch_doc)
|
65 |
+
ds.extend(list(torch.unbind(embeddings_doc.to("cpu"))))
|
66 |
+
|
67 |
+
return {"message": f"Uploaded and converted {len(images)} pages"}
|
68 |
+
|
69 |
+
@app.post("/search")
|
70 |
+
def search(query: str, k: int):
|
71 |
+
qs = []
|
72 |
+
with torch.no_grad():
|
73 |
+
batch_query = process_queries(processor, [query], mock_image)
|
74 |
+
batch_query = {k: v.to(device) for k, v in batch_query.items()}
|
75 |
+
embeddings_query = model(**batch_query)
|
76 |
+
qs.extend(list(torch.unbind(embeddings_query.to("cpu"))))
|
77 |
+
|
78 |
+
retriever_evaluator = CustomEvaluator(is_multi_vector=True)
|
79 |
+
scores = retriever_evaluator.evaluate(qs, ds)
|
80 |
+
|
81 |
+
top_k_indices = scores.argsort(axis=1)[0][-k:][::-1]
|
82 |
+
|
83 |
+
results = [{"page": idx, "image": "image_placeholder"} for idx in top_k_indices]
|
84 |
+
|
85 |
+
return {"results": results}
|
86 |
+
|
87 |
# Rediriger la racine vers /docs
|
88 |
@app.get("/")
|
89 |
def read_root():
|