sitammeur commited on
Commit
839e452
1 Parent(s): 524cfb3

Upload model.py

Browse files
Files changed (1) hide show
  1. model.py +47 -0
model.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing the requirements
2
+ import torch
3
+ from transformers import AutoModel, AutoTokenizer
4
+ import spaces
5
+
6
+
7
+ # Device for the model
8
+ device = "cuda"
9
+
10
+ # Load the model and tokenizer
11
+ model = AutoModel.from_pretrained(
12
+ "openbmb/MiniCPM-Llama3-V-2_5", trust_remote_code=True, torch_dtype=torch.float16
13
+ )
14
+ model = model.to(device=device)
15
+ tokenizer = AutoTokenizer.from_pretrained(
16
+ "openbmb/MiniCPM-Llama3-V-2_5", trust_remote_code=True
17
+ )
18
+ model.eval()
19
+
20
+
21
+ @spaces.GPU(duration=120)
22
+ def answer_question(image, question):
23
+ """
24
+ Generates an answer to a given question based on the provided image and question.
25
+ Args:
26
+ image (str): The path to the image file.
27
+ question (str): The question text.
28
+ Returns:
29
+ str: The generated answer to the question.
30
+ """
31
+
32
+ # Message format for the model
33
+ msgs = [{"role": "user", "content": question}]
34
+
35
+ # Generate the answer
36
+ res = model.chat(
37
+ image=image,
38
+ msgs=msgs,
39
+ tokenizer=tokenizer,
40
+ sampling=True,
41
+ temperature=0.7,
42
+ stream=True,
43
+ system_prompt="You are an AI assistant specialized in visual content analysis. Given an image and a related question, analyze the image thoroughly and provide a precise and informative answer based on the visible content. Ensure your response is clear, accurate, and directly addresses the question.",
44
+ )
45
+
46
+ # Return the answer
47
+ return "".join(res)