Spaces:

kikopubisher
/

egchat

Runtime error

App Files Files Community

egchat / app.py

kikopubisher

Update app.py

96698ba verified 6 months ago

raw

history blame

2.3 kB

	import streamlit as st
	from PIL import Image
	import torch
	from transformers import pipeline
	import numpy as np
	import io

	# إعداد الصفحة
	st.set_page_config(page_title="Image to Video with Editing", page_icon="🎥")

	# عنوان الصفحة
	st.title("Stable Video Diffusion - Image to Video")

	# تعليمات
	st.write("Upload an image to generate a video. You can also adjust settings for video generation.")

	# تحميل النموذج
	@st.cache_resource
	def load_model():
	model_id = "stabilityai/stable-video-diffusion-img2vid-xt"
	# تأكد من استخدام الكلاس الصحيح من مكتبة transformers إذا كان متاحًا
	pipe = pipeline("image-to-video", model=model_id, torch_dtype=torch.float16).to("cuda")
	return pipe

	pipe = load_model()

	# إدخال المستخدم
	uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

	# إعداد الخيارات لتعديل الفيديو
	frame_count = st.slider("Number of frames", min_value=10, max_value=50, value=25, step=5)

	if uploaded_image is not None:
	image = Image.open(uploaded_image)
	st.image(image, caption='Uploaded Image', use_column_width=True)

	if st.button('Generate Video'):
	# تحويل الصورة إلى فيديو
	with st.spinner("Generating video..."):
	video_frames = pipe(image, num_frames=frame_count)

	st.success("Video generated successfully!")

	# عرض الفيديو
	st.video(video_frames[0], format="video/mp4")

	# تنزيل الفيديو
	video_bytes = io.BytesIO()
	video_frames[0].save(video_bytes, format="mp4")
	st.download_button(label="Download Video", data=video_bytes.getvalue(), file_name="generated_video.mp4", mime="video/mp4")

	# تقديم بعض المعلومات حول النموذج
	st.write("""
	### About the Model:
	SVD Image-to-Video is a latent diffusion model trained to generate short video clips from an image conditioning.
	This model generates frames at a resolution of 576x1024 given a context frame of the same size, fine-tuned from the SVD Image-to-Video [14 frames] model.
	The widely used f8-decoder is also fine-tuned for temporal consistency, making the output videos more stable and coherent.
	""")