Manasa1's picture
Create app.py
f599177 verified
import gradio as gr
from transformers import QwenProcessor, QwenForVisionAndLanguageGeneration
import torch
# Load the Qwen-VL model and processor (on CPU)
processor = QwenProcessor.from_pretrained("Qwen/Qwen-VL")
model = QwenForVisionAndLanguageGeneration.from_pretrained("Qwen/Qwen-VL")
# Define the function to process the video and return analysis
def analyze_exercise(video_path):
# Create the message prompt for exercise analysis
messages = [
{
"role": "user",
"content": [
{
"type": "video",
},
{
"type": "text",
"text": (
"Analyze the exercise shown in the video. "
"Please provide details about the exercise type, the number of repetitions, "
"and an estimate of calories burned during the video."
)
}
]
}
]
# Generate the prompt and inputs
text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
# Prepare inputs for the model with the uploaded video
inputs = processor(
text=[text_prompt],
videos=[video_path],
padding=True,
return_tensors="pt"
)
# Generate model output
output_ids = model.generate(**inputs, max_new_tokens=1024)
# Decode and return the text output
output_text = processor.batch_decode(
output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
)
return output_text[0]
# Set up the Gradio interface
with gr.Blocks() as app:
gr.Markdown("## Exercise Video Analyzer")
gr.Markdown("Upload a video to analyze the exercise, count repetitions, and estimate calories burned.")
video_input = gr.Video(label="Upload Exercise Video")
text_output = gr.Textbox(label="Exercise Analysis")
analyze_button = gr.Button("Analyze Exercise")
# When analyze button is clicked, call the analyze_exercise function
analyze_button.click(analyze_exercise, inputs=video_input, outputs=text_output)
# Launch the app
app.launch()