Spaces:

yasserrmd
/

DailySnap

Running

App Files Files Community

yasserrmd commited on Oct 2, 2024

Commit

b7b43b2

verified ·

1 Parent(s): e89f20e

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -0

app.py CHANGED Viewed

@@ -5,11 +5,85 @@ import cv2
 import torch
 import os
 import spaces
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 model = YOLOv10.from_pretrained('jameslahm/yolov10x').to(device)
 # Define activity categories based on detected objects
 activity_categories = {
     "Working": ["laptop", "computer", "keyboard", "office chair"],

 import torch
 import os
 import spaces
+import markdown
+import requests
+import torch
+from PIL import Image
+from transformers import MllamaForConditionalGeneration, AutoProcessor
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 model = YOLOv10.from_pretrained('jameslahm/yolov10x').to(device)
+model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+model = MllamaForConditionalGeneration.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+processor = AutoProcessor.from_pretrained(model_id)
+SYSTEM_INSTRUCTION="You are DailySnap, your job is to anlyse the given image and provide daily journal about the image and use some random time"
+def extract_assistant_reply(input_string):
+    # Define the tag that indicates the start of the assistant's reply
+    start_tag = "<|start_header_id|>assistant<|end_header_id|>"
+    # Find the position where the assistant's reply starts
+    start_index = input_string.find(start_tag)
+    if start_index == -1:
+        return "Assistant's reply not found."
+    start_index += len(start_tag)
+    # Extract everything after the start tag
+    assistant_reply = input_string[start_index:].strip()
+    return assistant_reply
+def extract_json_from_markdown(markdown_text):
+    try:
+        start_idx = markdown_text.find('```')
+        end_idx = markdown_text.find('```', start_idx + 3)
+        if markdown_text[start_idx:start_idx + 7] == '```html':
+            start_idx += len('```html')
+        else:
+            start_idx += len('```')
+        # Extract and clean up the code block (json or not)
+        json_str = markdown_text[start_idx:end_idx].strip()
+        # Try to load it as JSON
+        return json.loads(json_str)
+    except Exception as e:
+        print(f"Error extracting JSON: {e}")
+        return None
+@spaces.GPU
+def generate__image_desc(image):
+    messages = [
+        {"role": "user", "content": [
+            {"type": "image"},
+            {"type": "text", "text": SYSTEM_INSTRUCTION}
+        ]}
+    ]
+    input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = processor(image, input_text, return_tensors="pt").to(model.device)
+    # Generate the output from the model
+    output = model.generate(**inputs, max_new_tokens=300)
+    print(output)
+    markdown_text = processor.decode(output[0])
+    print(markdown_text)
+    markdown_text=extract_assistant_reply(markdown_text)
+    html_output = markdown.markdown(markdown_text)
+    return html_output
 # Define activity categories based on detected objects
 activity_categories = {
     "Working": ["laptop", "computer", "keyboard", "office chair"],