lukiod commited on
Commit
aa85bcf
·
verified ·
1 Parent(s): 54d353f

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -99
app.py DELETED
@@ -1,99 +0,0 @@
1
- import os
2
- from dotenv import load_dotenv
3
- from fastapi import FastAPI, File, UploadFile, HTTPException, Header
4
- from pydantic import BaseModel
5
- from typing import List, Optional
6
- import torch
7
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
8
- from qwen_vl_utils import process_vision_info
9
- from byaldi import RAGMultiModalModel
10
- from PIL import Image
11
- import io
12
-
13
- # Load environment variables
14
- load_dotenv()
15
-
16
- # Access environment variables
17
- HF_TOKEN = os.getenv("HF_TOKEN")
18
- RAG_MODEL = os.getenv("RAG_MODEL", "vidore/colpali")
19
- QWN_MODEL = os.getenv("QWN_MODEL", "Qwen/Qwen2-VL-7B-Instruct")
20
- QWN_PROCESSOR = os.getenv("QWN_PROCESSOR", "Qwen/Qwen2-VL-2B-Instruct")
21
-
22
- if not HF_TOKEN:
23
- raise ValueError("HF_TOKEN not found in .env file")
24
-
25
- # Initialize FastAPI app
26
- app = FastAPI()
27
-
28
- # Load models and processors
29
- RAG = RAGMultiModalModel.from_pretrained(RAG_MODEL, use_auth_token=HF_TOKEN)
30
-
31
- model = Qwen2VLForConditionalGeneration.from_pretrained(
32
- QWN_MODEL,
33
- torch_dtype=torch.bfloat16,
34
- attn_implementation="flash_attention_2",
35
- device_map="auto",
36
- trust_remote_code=True,
37
- use_auth_token=HF_TOKEN
38
- ).cuda().eval()
39
-
40
- processor = AutoProcessor.from_pretrained(QWN_PROCESSOR, trust_remote_code=True, use_auth_token=HF_TOKEN)
41
-
42
- # Define request model
43
- class DocumentRequest(BaseModel):
44
- text_query: str
45
-
46
- # Define processing function
47
- def document_rag(text_query, image):
48
- messages = [
49
- {
50
- "role": "user",
51
- "content": [
52
- {
53
- "type": "image",
54
- "image": image,
55
- },
56
- {"type": "text", "text": text_query},
57
- ],
58
- }
59
- ]
60
- text = processor.apply_chat_template(
61
- messages, tokenize=False, add_generation_prompt=True
62
- )
63
- image_inputs, video_inputs = process_vision_info(messages)
64
- inputs = processor(
65
- text=[text],
66
- images=image_inputs,
67
- videos=video_inputs,
68
- padding=True,
69
- return_tensors="pt",
70
- )
71
- inputs = inputs.to("cuda")
72
- generated_ids = model.generate(**inputs, max_new_tokens=50)
73
- generated_ids_trimmed = [
74
- out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
75
- ]
76
- output_text = processor.batch_decode(
77
- generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
78
- )
79
- return output_text[0]
80
-
81
- # Define API endpoints
82
- @app.post("/process_document")
83
- async def process_document(request: DocumentRequest, file: UploadFile = File(...), x_api_key: Optional[str] = Header(None)):
84
- # Check API key
85
- if x_api_key != HF_TOKEN:
86
- raise HTTPException(status_code=403, detail="Invalid API key")
87
-
88
- # Read and process the uploaded file
89
- contents = await file.read()
90
- image = Image.open(io.BytesIO(contents))
91
-
92
- # Process the document
93
- result = document_rag(request.text_query, image)
94
-
95
- return {"result": result}
96
-
97
- if __name__ == "__main__":
98
- import uvicorn
99
- uvicorn.run(app, host="0.0.0.0", port=8000)