Spaces:

metek7
/

instagram-short-summarizing

Runtime error

App Files Files Community

metek7 commited on Oct 8, 2024

Commit

3b421e3

verified ·

1 Parent(s): f0272e1

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -68

app.py CHANGED Viewed

@@ -1,82 +1,65 @@
 import gradio as gr
 import torch
-from llava.model.builder import load_pretrained_model
-from llava.mm_utils import get_model_name_from_path, process_images, tokenizer_image_token
-from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
-from llava.conversation import conv_templates
-import copy
 from decord import VideoReader, cpu
 import numpy as np
-title = "# 🎥 Instagram Short Video Analyzer with LLaVA-Video"
-description = """
-This application uses the LLaVA-Video-7B-Qwen2 model to analyze Instagram short videos.
-Upload your Instagram short video and ask questions about its content!
-"""
-def load_video(video_path, max_frames_num=64, fps=1):
     vr = VideoReader(video_path, ctx=cpu(0))
-    total_frame_num = len(vr)
-    video_time = total_frame_num / vr.get_avg_fps()
-    fps = round(vr.get_avg_fps()/fps)
-    frame_idx = list(range(0, len(vr), fps))
-    if len(frame_idx) > max_frames_num:
-        frame_idx = np.linspace(0, total_frame_num - 1, max_frames_num, dtype=int).tolist()
-    frame_time = [i/vr.get_avg_fps() for i in frame_idx]
-    frame_time = ",".join([f"{i:.2f}s" for i in frame_time])
-    spare_frames = vr.get_batch(frame_idx).asnumpy()
-    return spare_frames, frame_time, video_time
-# Load the model
-pretrained = "lmms-lab/LLaVA-Video-7B-Qwen2"
-model_name = "llava_qwen"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-device_map = "auto"
-print("Loading model...")
-tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, torch_dtype="bfloat16", device_map=device_map)
-model.eval()
-print("Model loaded successfully!")
-def process_instagram_short(video_path, question):
-    max_frames_num = 64
-    video, frame_time, video_time = load_video(video_path, max_frames_num)
-    video = image_processor.preprocess(video, return_tensors="pt")["pixel_values"].to(device).bfloat16()
-    video = [video]
-    time_instruction = f"This is an Instagram short video lasting {video_time:.2f} seconds. {len(video[0])} frames were sampled at {frame_time}. Analyze this short video and answer the following question:"
-    full_question = DEFAULT_IMAGE_TOKEN + f"{time_instruction}\n{question}"
-    conv = copy.deepcopy(conv_templates["qwen_1_5"])
-    conv.append_message(conv.roles[0], full_question)
-    conv.append_message(conv.roles[1], None)
-    prompt_question = conv.get_prompt()
-    input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
-    with torch.no_grad():
-        output = model.generate(
-            input_ids,
-            images=video,
-            modalities=["video"],
-            do_sample=False,
-            temperature=0,
-            max_new_tokens=4096,
-        )
-    response = tokenizer.batch_decode(output, skip_special_tokens=True)[0].strip()
-    return response
-def gradio_interface(video_file, question):
     if video_file is None:
         return "Please upload an Instagram short video."
-    response = process_instagram_short(video_file, question)
     return response
 with gr.Blocks() as demo:
-    gr.Markdown(title)
-    gr.Markdown(description)
     with gr.Row():
         with gr.Column():
@@ -86,10 +69,10 @@ with gr.Blocks() as demo:
         output = gr.Textbox(label="Analysis Result")
     submit_button.click(
-        fn=gradio_interface,
         inputs=[video_input, question_input],
         outputs=output
     )
 if __name__ == "__main__":
-    demo.launch(show_error=True)

+import sys
+import subprocess
+import pkg_resources
+required_packages = {
+    'torch': 'torch',
+    'gradio': 'gradio',
+    'transformers': 'transformers',
+    'decord': 'decord',
+    'numpy': 'numpy'
+}
+def install_packages(packages):
+    for package in packages:
+        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
+def check_and_install_packages():
+    installed_packages = {pkg.key for pkg in pkg_resources.working_set}
+    missing_packages = [required_packages[pkg] for pkg in required_packages if pkg not in installed_packages]
+    if missing_packages:
+        print("Installing missing packages...")
+        install_packages(missing_packages)
+        print("Packages installed successfully.")
+    else:
+        print("All required packages are already installed.")
+# Check and install required packages
+check_and_install_packages()
+# Now import the required modules
 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 from decord import VideoReader, cpu
 import numpy as np
+# Define a simple video processing function (placeholder for LLaVA-Video)
+def process_video(video_path, max_frames=64):
     vr = VideoReader(video_path, ctx=cpu(0))
+    total_frames = len(vr)
+    frame_indices = np.linspace(0, total_frames - 1, max_frames, dtype=int)
+    frames = vr.get_batch(frame_indices).asnumpy()
+    return frames
+# Define a simple text generation function (placeholder for actual model)
+def generate_response(video_frames, question):
+    # This is a placeholder. In reality, you'd use the LLaVA-Video model here.
+    return f"Analyzed {len(video_frames)} frames. Your question was: {question}"
+def analyze_instagram_short(video_file, question):
     if video_file is None:
         return "Please upload an Instagram short video."
+    video_frames = process_video(video_file)
+    response = generate_response(video_frames, question)
     return response
+# Create Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎥 Instagram Short Video Analyzer")
+    gr.Markdown("Upload your Instagram short video and ask questions about its content!")
     with gr.Row():
         with gr.Column():
         output = gr.Textbox(label="Analysis Result")
     submit_button.click(
+        fn=analyze_instagram_short,
         inputs=[video_input, question_input],
         outputs=output
     )
 if __name__ == "__main__":
+    demo.launch()