Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
|
|
3 |
from PIL import Image
|
4 |
import torch
|
5 |
import spaces
|
6 |
-
import
|
7 |
|
8 |
# Load the processor and model
|
9 |
processor = AutoProcessor.from_pretrained(
|
@@ -20,6 +20,54 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
20 |
device_map='auto'
|
21 |
)
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
@spaces.GPU()
|
25 |
def process_image_and_text(image, text):
|
@@ -42,15 +90,15 @@ def process_image_and_text(image, text):
|
|
42 |
# Only get generated tokens; decode them to text
|
43 |
generated_tokens = output[0, inputs['input_ids'].size(1):]
|
44 |
generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
45 |
-
|
46 |
-
|
|
|
47 |
|
48 |
def chatbot(image, text, history):
|
49 |
if image is None:
|
50 |
return history + [("Please upload an image first.", None)]
|
51 |
|
52 |
response = process_image_and_text(image, text)
|
53 |
-
# pretty_response = pprint.pp(response)
|
54 |
|
55 |
history.append({"role": "user", "content": text})
|
56 |
history.append({"role": "assistant", "content": response})
|
|
|
3 |
from PIL import Image
|
4 |
import torch
|
5 |
import spaces
|
6 |
+
import json
|
7 |
|
8 |
# Load the processor and model
|
9 |
processor = AutoProcessor.from_pretrained(
|
|
|
20 |
device_map='auto'
|
21 |
)
|
22 |
|
23 |
+
import json
|
24 |
+
|
25 |
+
def wrap_json_in_markdown(text):
|
26 |
+
result = []
|
27 |
+
stack = []
|
28 |
+
json_start = None
|
29 |
+
in_json = False
|
30 |
+
i = 0
|
31 |
+
while i < len(text):
|
32 |
+
char = text[i]
|
33 |
+
if char in ['{', '[']:
|
34 |
+
if not in_json:
|
35 |
+
json_start = i
|
36 |
+
in_json = True
|
37 |
+
stack.append(char)
|
38 |
+
else:
|
39 |
+
stack.append(char)
|
40 |
+
elif char in ['}', ']'] and in_json:
|
41 |
+
if not stack:
|
42 |
+
# Unbalanced bracket, reset
|
43 |
+
in_json = False
|
44 |
+
json_start = None
|
45 |
+
else:
|
46 |
+
last = stack.pop()
|
47 |
+
if (last == '{' and char != '}') or (last == '[' and char != ']'):
|
48 |
+
# Mismatched brackets
|
49 |
+
in_json = False
|
50 |
+
json_start = None
|
51 |
+
if in_json and not stack:
|
52 |
+
# Potential end of JSON
|
53 |
+
json_str = text[json_start:i+1]
|
54 |
+
try:
|
55 |
+
# Try to parse the JSON to ensure it's valid
|
56 |
+
parsed = json.loads(json_str)
|
57 |
+
# Wrap in Markdown code block
|
58 |
+
wrapped = f"\n```json\n{json.dumps(parsed, indent=4)}\n```\n"
|
59 |
+
result.append(text[:json_start]) # Append text before JSON
|
60 |
+
result.append(wrapped) # Append wrapped JSON
|
61 |
+
text = text[i+1:] # Update the remaining text
|
62 |
+
i = -1 # Reset index
|
63 |
+
except json.JSONDecodeError:
|
64 |
+
# Not valid JSON, continue searching
|
65 |
+
pass
|
66 |
+
in_json = False
|
67 |
+
json_start = None
|
68 |
+
i += 1
|
69 |
+
result.append(text) # Append any remaining text
|
70 |
+
return ''.join(result)
|
71 |
|
72 |
@spaces.GPU()
|
73 |
def process_image_and_text(image, text):
|
|
|
90 |
# Only get generated tokens; decode them to text
|
91 |
generated_tokens = output[0, inputs['input_ids'].size(1):]
|
92 |
generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
93 |
+
generated_text_w_json_wrapper = wrap_json_in_markdown(generated_text)
|
94 |
+
|
95 |
+
return generated_text_w_json_wrapper
|
96 |
|
97 |
def chatbot(image, text, history):
|
98 |
if image is None:
|
99 |
return history + [("Please upload an image first.", None)]
|
100 |
|
101 |
response = process_image_and_text(image, text)
|
|
|
102 |
|
103 |
history.append({"role": "user", "content": text})
|
104 |
history.append({"role": "assistant", "content": response})
|