Molmo-7B-D-0924-extended-tokens

Running on Zero

App Files Files Community

sflindrs commited on Jan 30

Commit

4bf5dae

verified ·

1 Parent(s): 5ee2847

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -4

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
 from PIL import Image
 import torch
 import spaces
-import pprint
 # Load the processor and model
 processor = AutoProcessor.from_pretrained(
@@ -20,6 +20,54 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map='auto'
 )
 @spaces.GPU()
 def process_image_and_text(image, text):
@@ -42,15 +90,15 @@ def process_image_and_text(image, text):
     # Only get generated tokens; decode them to text
     generated_tokens = output[0, inputs['input_ids'].size(1):]
     generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
-    return generated_text
 def chatbot(image, text, history):
     if image is None:
         return history + [("Please upload an image first.", None)]
     response = process_image_and_text(image, text)
-    # pretty_response = pprint.pp(response)
     history.append({"role": "user", "content": text})
     history.append({"role": "assistant", "content": response})

 from PIL import Image
 import torch
 import spaces
+import json
 # Load the processor and model
 processor = AutoProcessor.from_pretrained(
     device_map='auto'
 )
+import json
+def wrap_json_in_markdown(text):
+    result = []
+    stack = []
+    json_start = None
+    in_json = False
+    i = 0
+    while i < len(text):
+        char = text[i]
+        if char in ['{', '[']:
+            if not in_json:
+                json_start = i
+                in_json = True
+                stack.append(char)
+            else:
+                stack.append(char)
+        elif char in ['}', ']'] and in_json:
+            if not stack:
+                # Unbalanced bracket, reset
+                in_json = False
+                json_start = None
+            else:
+                last = stack.pop()
+                if (last == '{' and char != '}') or (last == '[' and char != ']'):
+                    # Mismatched brackets
+                    in_json = False
+                    json_start = None
+        if in_json and not stack:
+            # Potential end of JSON
+            json_str = text[json_start:i+1]
+            try:
+                # Try to parse the JSON to ensure it's valid
+                parsed = json.loads(json_str)
+                # Wrap in Markdown code block
+                wrapped = f"\n```json\n{json.dumps(parsed, indent=4)}\n```\n"
+                result.append(text[:json_start])  # Append text before JSON
+                result.append(wrapped)           # Append wrapped JSON
+                text = text[i+1:]                # Update the remaining text
+                i = -1                           # Reset index
+            except json.JSONDecodeError:
+                # Not valid JSON, continue searching
+                pass
+            in_json = False
+            json_start = None
+        i += 1
+    result.append(text)  # Append any remaining text
+    return ''.join(result)
 @spaces.GPU()
 def process_image_and_text(image, text):
     # Only get generated tokens; decode them to text
     generated_tokens = output[0, inputs['input_ids'].size(1):]
     generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
+    generated_text_w_json_wrapper = wrap_json_in_markdown(generated_text)
+    return generated_text_w_json_wrapper
 def chatbot(image, text, history):
     if image is None:
         return history + [("Please upload an image first.", None)]
     response = process_image_and_text(image, text)
     history.append({"role": "user", "content": text})
     history.append({"role": "assistant", "content": response})