Spaces:

paralym
/

MAmmoTH-VL-8B

Runtime error

App Files Files Community

paralym commited on Dec 8, 2024

Commit

274c497

verified ·

1 Parent(s): a35f45a

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -11

app.py CHANGED Viewed

@@ -198,7 +198,7 @@ def is_valid_image_filename(name):
         return False
-def sample_frames(video_file, num_frames):
     video = cv2.VideoCapture(video_file)
     total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
     interval = total_frames // num_frames
@@ -213,6 +213,36 @@ def sample_frames(video_file, num_frames):
     video.release()
     return frames
 def load_image(image_file):
     if image_file.startswith("http") or image_file.startswith("https"):
@@ -303,7 +333,8 @@ def bot(history, temperature, top_p, max_output_tokens):
             images_this_term.append(message[0][0])
             if is_valid_video_filename(message[0][0]):
                 # raise ValueError("Video is not supported")
-                num_new_images += our_chatbot.num_frames
             elif is_valid_image_filename(message[0][0]):
                 print("#### Load image from local file",message[0][0])
                 num_new_images += 1
@@ -314,6 +345,15 @@ def bot(history, temperature, top_p, max_output_tokens):
             num_new_images = 0
             # previous_image = False
     all_image_hash = []
     all_image_path = []
     for file_path in images_this_term:
@@ -350,14 +390,6 @@ def bot(history, temperature, top_p, max_output_tokens):
                     with open(file_path, "rb") as src, open(filename, "wb") as dst:
                         dst.write(src.read())
-    image_list = []
-    for f in images_this_term:
-        if is_valid_video_filename(f):
-            image_list += sample_frames(f, our_chatbot.num_frames)
-        elif is_valid_image_filename(f):
-            image_list.append(load_image(f))
-        else:
-            raise ValueError("Invalid image file")
     image_tensor = [
         our_chatbot.image_processor.preprocess(f, return_tensors="pt")["pixel_values"][
@@ -601,7 +633,7 @@ with gr.Blocks(
                         "text": "Please describe the video in detail.",
                     },
                 ]
-            ]
             inputs=[chat_input],
             label="Real World Video Case"
         )

         return False
+def sample_frames_old(video_file, num_frames):
     video = cv2.VideoCapture(video_file)
     total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
     interval = total_frames // num_frames
     video.release()
     return frames
+def sample_frames_frames(video_path, frame_count=32):
+    video_frames = []
+    vr = VideoReader(video_path, ctx=cpu(0))
+    total_frames = len(vr)
+    frame_interval = max(total_frames // frame_count, 1)
+    for i in range(0, total_frames, frame_interval):
+        frame = vr[i].asnumpy()
+        frame_image = Image.fromarray(frame)
+        buffered = io.BytesIO()
+        frame_image.save(buffered, format="JPEG")
+        frame_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+        video_frames.append(frame_base64)
+        if len(video_frames) >= frame_count:
+            break
+    # Ensure at least one frame is returned if total frames are less than required
+    if len(video_frames) < frame_count and total_frames > 0:
+        for i in range(total_frames):
+            frame = vr[i].asnumpy()
+            frame_image = Image.fromarray(frame)
+            buffered = io.BytesIO()
+            frame_image.save(buffered, format="JPEG")
+            frame_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+            video_frames.append(frame_base64)
+            if len(video_frames) >= frame_count:
+                break
+    return video_frames
 def load_image(image_file):
     if image_file.startswith("http") or image_file.startswith("https"):
             images_this_term.append(message[0][0])
             if is_valid_video_filename(message[0][0]):
                 # raise ValueError("Video is not supported")
+                # num_new_images += our_chatbot.num_frames
+                num_new_images += len(sample_frames(message[0][0], our_chatbot.num_frames))
             elif is_valid_image_filename(message[0][0]):
                 print("#### Load image from local file",message[0][0])
                 num_new_images += 1
             num_new_images = 0
             # previous_image = False
+    image_list = []
+    for f in images_this_term:
+        if is_valid_video_filename(f):
+            image_list += sample_frames(f, our_chatbot.num_frames)
+        elif is_valid_image_filename(f):
+            image_list.append(load_image(f))
+        else:
+            raise ValueError("Invalid image file")
     all_image_hash = []
     all_image_path = []
     for file_path in images_this_term:
                     with open(file_path, "rb") as src, open(filename, "wb") as dst:
                         dst.write(src.read())
     image_tensor = [
         our_chatbot.image_processor.preprocess(f, return_tensors="pt")["pixel_values"][
                         "text": "Please describe the video in detail.",
                     },
                 ]
+            ],
             inputs=[chat_input],
             label="Real World Video Case"
         )