HUANG-Stephanie commited on
Commit
b1bf444
·
verified ·
1 Parent(s): 9b4d509

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py CHANGED
@@ -42,6 +42,48 @@ mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
42
  ds = []
43
  images = []
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # Rediriger la racine vers /docs
46
  @app.get("/")
47
  def read_root():
 
42
  ds = []
43
  images = []
44
 
45
+ @app.post("/index")
46
+ def index(files: List[UploadFile] = File(...)):
47
+ global ds, images
48
+ images = []
49
+ ds = []
50
+ for file in files:
51
+ content = file.read()
52
+ pdf_image_list = convert_from_path(io.BytesIO(content))
53
+ images.extend(pdf_image_list)
54
+
55
+ dataloader = DataLoader(
56
+ images,
57
+ batch_size=4,
58
+ shuffle=False,
59
+ collate_fn=lambda x: process_images(processor, x),
60
+ )
61
+ for batch_doc in dataloader:
62
+ with torch.no_grad():
63
+ batch_doc = {k: v.to(device) for k, v in batch_doc.items()}
64
+ embeddings_doc = model(**batch_doc)
65
+ ds.extend(list(torch.unbind(embeddings_doc.to("cpu"))))
66
+
67
+ return {"message": f"Uploaded and converted {len(images)} pages"}
68
+
69
+ @app.post("/search")
70
+ def search(query: str, k: int):
71
+ qs = []
72
+ with torch.no_grad():
73
+ batch_query = process_queries(processor, [query], mock_image)
74
+ batch_query = {k: v.to(device) for k, v in batch_query.items()}
75
+ embeddings_query = model(**batch_query)
76
+ qs.extend(list(torch.unbind(embeddings_query.to("cpu"))))
77
+
78
+ retriever_evaluator = CustomEvaluator(is_multi_vector=True)
79
+ scores = retriever_evaluator.evaluate(qs, ds)
80
+
81
+ top_k_indices = scores.argsort(axis=1)[0][-k:][::-1]
82
+
83
+ results = [{"page": idx, "image": "image_placeholder"} for idx in top_k_indices]
84
+
85
+ return {"results": results}
86
+
87
  # Rediriger la racine vers /docs
88
  @app.get("/")
89
  def read_root():