Tonic commited on
Commit
f316dc9
·
unverified ·
1 Parent(s): 743c7a4
Files changed (1) hide show
  1. app.py +53 -43
app.py CHANGED
@@ -53,6 +53,7 @@ def initialize_llm():
53
  print("LLM initialization failed:", e)
54
  return None
55
 
 
56
  llm = initialize_llm()
57
 
58
  def encode_image(image: Image.Image, image_format="PNG") -> str:
@@ -67,68 +68,77 @@ def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)):
67
  if llm is None:
68
  return "Error: LLM initialization failed. Please try again later."
69
 
70
- image = Image.open(BytesIO(requests.get(image_url).content))
71
- image = image.resize((3844, 2408))
72
- new_image_url = f"data:image/png;base64,{encode_image(image, image_format='PNG')}"
 
73
 
74
- messages = [
75
- {
76
- "role": "user",
77
- "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": new_image_url}}]
78
- },
79
- ]
80
 
81
- outputs = llm.chat(messages, sampling_params=sampling_params)
82
 
83
- return outputs[0].outputs[0].text
 
 
84
 
85
  @spaces.GPU()
86
  def compare_images(image1_url, image2_url, prompt, progress=gr.Progress(track_tqdm=True)):
87
  if llm is None:
88
  return "Error: LLM initialization failed. Please try again later."
89
 
90
- image1 = Image.open(BytesIO(requests.get(image1_url).content))
91
- image2 = Image.open(BytesIO(requests.get(image2_url).content))
92
- image1 = image1.resize((3844, 2408))
93
- image2 = image2.resize((3844, 2408))
94
- new_image1_url = f"data:image/png;base64,{encode_image(image1, image_format='PNG')}"
95
- new_image2_url = f"data:image/png;base64,{encode_image(image2, image_format='PNG')}"
96
-
97
- messages = [
98
- {
99
- "role": "user",
100
- "content": [
101
- {"type": "text", "text": prompt},
102
- {"type": "image_url", "image_url": {"url": new_image1_url}},
103
- {"type": "image_url", "image_url": {"url": new_image2_url}}
104
- ]
105
- },
106
- ]
107
-
108
- outputs = llm.chat(messages, sampling_params=sampling_params)
109
-
110
- return outputs[0].outputs[0].text
 
 
 
111
 
112
  @spaces.GPU()
113
  def calculate_image_similarity(image1_url, image2_url):
114
  if llm is None:
115
  return "Error: LLM initialization failed. Please try again later."
116
 
117
- image1 = Image.open(BytesIO(requests.get(image1_url).content)).convert('RGB')
118
- image2 = Image.open(BytesIO(requests.get(image2_url).content)).convert('RGB')
119
- image1 = image1.resize((224, 224)) # Resize to match model input size
120
- image2 = image2.resize((224, 224))
 
121
 
122
- image1_tensor = torch.tensor(list(image1.getdata())).view(1, 3, 224, 224).float() / 255.0
123
- image2_tensor = torch.tensor(list(image2.getdata())).view(1, 3, 224, 224).float() / 255.0
124
 
125
- with torch.no_grad():
126
- embedding1 = llm.model.vision_encoder([image1_tensor])
127
- embedding2 = llm.model.vision_encoder([image2_tensor])
128
 
129
- similarity = F.cosine_similarity(embedding1.mean(dim=0), embedding2.mean(dim=0), dim=0).item()
130
 
131
- return similarity
 
 
132
 
133
  with gr.Blocks() as demo:
134
  gr.Markdown(title)
 
53
  print("LLM initialization failed:", e)
54
  return None
55
 
56
+ sampling_params = SamplingParams(max_tokens=8192)
57
  llm = initialize_llm()
58
 
59
  def encode_image(image: Image.Image, image_format="PNG") -> str:
 
68
  if llm is None:
69
  return "Error: LLM initialization failed. Please try again later."
70
 
71
+ try:
72
+ image = Image.open(BytesIO(requests.get(image_url).content))
73
+ image = image.resize((3844, 2408))
74
+ new_image_url = f"data:image/png;base64,{encode_image(image, image_format='PNG')}"
75
 
76
+ messages = [
77
+ {
78
+ "role": "user",
79
+ "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": new_image_url}}]
80
+ },
81
+ ]
82
 
83
+ outputs = llm.chat(messages, sampling_params=sampling_params)
84
 
85
+ return outputs[0].outputs[0].text
86
+ except Exception as e:
87
+ return f"Error during inference: {e}"
88
 
89
  @spaces.GPU()
90
  def compare_images(image1_url, image2_url, prompt, progress=gr.Progress(track_tqdm=True)):
91
  if llm is None:
92
  return "Error: LLM initialization failed. Please try again later."
93
 
94
+ try:
95
+ image1 = Image.open(BytesIO(requests.get(image1_url).content))
96
+ image2 = Image.open(BytesIO(requests.get(image2_url).content))
97
+ image1 = image1.resize((3844, 2408))
98
+ image2 = image2.resize((3844, 2408))
99
+ new_image1_url = f"data:image/png;base64,{encode_image(image1, image_format='PNG')}"
100
+ new_image2_url = f"data:image/png;base64,{encode_image(image2, image_format='PNG')}"
101
+
102
+ messages = [
103
+ {
104
+ "role": "user",
105
+ "content": [
106
+ {"type": "text", "text": prompt},
107
+ {"type": "image_url", "image_url": {"url": new_image1_url}},
108
+ {"type": "image_url", "image_url": {"url": new_image2_url}}
109
+ ]
110
+ },
111
+ ]
112
+
113
+ outputs = llm.chat(messages, sampling_params=sampling_params)
114
+
115
+ return outputs[0].outputs[0].text
116
+ except Exception as e:
117
+ return f"Error during image comparison: {e}"
118
 
119
  @spaces.GPU()
120
  def calculate_image_similarity(image1_url, image2_url):
121
  if llm is None:
122
  return "Error: LLM initialization failed. Please try again later."
123
 
124
+ try:
125
+ image1 = Image.open(BytesIO(requests.get(image1_url).content)).convert('RGB')
126
+ image2 = Image.open(BytesIO(requests.get(image2_url).content)).convert('RGB')
127
+ image1 = image1.resize((224, 224)) # Resize to match model input size
128
+ image2 = image2.resize((224, 224))
129
 
130
+ image1_tensor = torch.tensor(list(image1.getdata())).view(1, 3, 224, 224).float() / 255.0
131
+ image2_tensor = torch.tensor(list(image2.getdata())).view(1, 3, 224, 224).float() / 255.0
132
 
133
+ with torch.no_grad():
134
+ embedding1 = llm.model.vision_encoder([image1_tensor])
135
+ embedding2 = llm.model.vision_encoder([image2_tensor])
136
 
137
+ similarity = F.cosine_similarity(embedding1.mean(dim=0), embedding2.mean(dim=0), dim=0).item()
138
 
139
+ return similarity
140
+ except Exception as e:
141
+ return f"Error during image similarity calculation: {e}"
142
 
143
  with gr.Blocks() as demo:
144
  gr.Markdown(title)