|
import streamlit as st |
|
from phi.agent import Agent |
|
from phi.model.google import Gemini |
|
from phi.tools.duckduckgo import DuckDuckGo |
|
from google.generativeai import upload_file, get_file |
|
import time |
|
import google.generativeai as genai |
|
from pathlib import Path |
|
import tempfile |
|
import os |
|
import yt_dlp |
|
|
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
API_KEY = os.getenv("GOOGLE_API_KEY") |
|
if API_KEY: |
|
genai.configure(api_key=API_KEY) |
|
|
|
|
|
st.set_page_config( |
|
page_title="Agentic Video Captioning Platform::", |
|
layout="wide" |
|
) |
|
|
|
st.title("Generate the story of videos:") |
|
st.header("Agentic Video Captioning Platform:") |
|
|
|
@st.cache_resource |
|
def initialize_agent(): |
|
return Agent( |
|
name="Video AI summarizer", |
|
model=Gemini(id="gemini-2.0-flash-exp"), |
|
tools=[DuckDuckGo()], |
|
markdown=True, |
|
) |
|
|
|
|
|
multimodal_Agent = initialize_agent() |
|
|
|
def download_tiktok_video(url: str, output_path: str): |
|
"""Downloads a TikTok video using yt-dlp.""" |
|
ydl_opts = { |
|
'outtmpl': output_path, |
|
'format': 'best', |
|
} |
|
|
|
try: |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
ydl.download([url]) |
|
return True, f"Video downloaded successfully to {output_path}" |
|
except Exception as e: |
|
return False, str(e) |
|
|
|
|
|
video_url = st.text_input("Enter TikTok video URL") |
|
|
|
if st.button('Generate the story'): |
|
if not video_url: |
|
st.warning('PLEASE ENTER A VALID URL') |
|
else: |
|
|
|
with st.spinner("Downloading video..."): |
|
temp_video_path = os.path.join(tempfile.gettempdir(), 'tiktok_video.mp4') |
|
success, message = download_tiktok_video(video_url, temp_video_path) |
|
|
|
if not success: |
|
st.error(f"Error downloading video: {message}") |
|
else: |
|
st.success(message) |
|
|
|
|
|
st.video(temp_video_path, format="video/mp4", start_time=0) |
|
|
|
try: |
|
with st.spinner("Generating the story of the video"): |
|
|
|
processed_video = upload_file(temp_video_path) |
|
while processed_video.state.name == "PROCESSING": |
|
time.sleep(1) |
|
processed_video = get_file(processed_video.name) |
|
|
|
|
|
analysis_prompt = ( |
|
f''' |
|
Analyze the video thoroughly and provide a comprehensive storyline. The total generation should not be more than 500 words. As a whole from the video, detail the following aspects: |
|
|
|
1. **Story:** How the set scene introduced and tone is set. What is happening in the scene? Describe key visuals and actions. |
|
2. **Characters**: Identify each character, noting their expressions, attire, actions, and interactions. Highlight emotional nuances and gestures. |
|
1. **Narration or Voiceover**: Describe what types of narrations or voiceovers are used in the video. |
|
3. **Transitions and Pacing**: Describe scene changes, including cuts, fades, zooms, or shifts in focus, and how they affect the flow. |
|
4. **Mood and Tone**: Capture the overall mood and tone of each scene, mentioning any music or sound effects that enhance these elements. |
|
5. **Visible Texts or Brandings**: Capture all the visible texts from the videos and brandings too only if some brandings of product or service is done. |
|
|
|
Conclude with a cohesive summary that ties the scenes together, highlighting all promotional themes, ensuring each brand and key text is accurately referenced in the storyline. |
|
''' |
|
) |
|
|
|
|
|
response = multimodal_Agent.run(analysis_prompt, videos=[processed_video]) |
|
|
|
st.subheader('Analysis Result') |
|
st.markdown(response.content) |
|
|
|
except Exception as error: |
|
st.error(f"An error occurred: {error}") |
|
finally: |
|
|
|
try: |
|
os.remove(temp_video_path) |
|
st.info("Downloaded TikTok video deleted.") |
|
except Exception as e: |
|
st.warning(f"Failed to delete video: {e}") |
|
|