Spaces:
Runtime error
Runtime error
Initial Commit
Browse files- .gitignore +1 -0
- Videobook/Videobook.py +73 -0
- Videobook/__init__.py +1 -0
- app.py +20 -0
- requirements.txt +8 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
Videobook/Videobook.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
from gtts import gTTS
|
5 |
+
from mutagen.mp3 import MP3
|
6 |
+
import nltk
|
7 |
+
import ffmpeg
|
8 |
+
nltk.download('punkt')
|
9 |
+
from nltk.tokenize import sent_tokenize
|
10 |
+
from math import ceil
|
11 |
+
from segmindapi import SD2_1, Kadinsky
|
12 |
+
import gradio as gr
|
13 |
+
|
14 |
+
class Videobook:
|
15 |
+
|
16 |
+
def get_sentences(self, story):
|
17 |
+
return sent_tokenize(story)
|
18 |
+
|
19 |
+
def generate_voice(self, story, sentences , path = 'tts.mp3'):
|
20 |
+
for i,n in enumerate(sentences):
|
21 |
+
tts=gTTS(n,lang='en')
|
22 |
+
tts.save('tts'+str(i)+'.mp3')
|
23 |
+
lgth=[]
|
24 |
+
for i in range(len(sentences)):
|
25 |
+
lgth.append(MP3('tts'+str(i)+'.mp3').info.length)
|
26 |
+
os.remove(os.path.join(os.getcwd(),'tts'+str(i)+'.mp3'))
|
27 |
+
tts=gTTS(story,lang='en')
|
28 |
+
tts.save(path)
|
29 |
+
return lgth
|
30 |
+
|
31 |
+
def generate_imgs(self, sentences, steps):
|
32 |
+
imgs = []
|
33 |
+
for sentence in sentences:
|
34 |
+
sentence = self.style + ' of ' + sentence + ', ' + self.tags
|
35 |
+
imgs.append(self.pipe.generate(sentence, num_inference_steps = steps))
|
36 |
+
return imgs
|
37 |
+
|
38 |
+
def addBuffer(self, imgs, lgth):
|
39 |
+
imgs_buff = []
|
40 |
+
for i,img in enumerate(imgs):
|
41 |
+
for j in range(ceil(lgth[i] * self.fps)):
|
42 |
+
imgs_buff.append(img)
|
43 |
+
return imgs_buff
|
44 |
+
|
45 |
+
def imgs_to_video(self, imgs, video_name='video.mp4'):
|
46 |
+
video_dims = (imgs[0].width, imgs[0].height)
|
47 |
+
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
|
48 |
+
video = cv2.VideoWriter(video_name, fourcc, self.fps, video_dims)
|
49 |
+
for img in imgs:
|
50 |
+
tmp_img = img.copy()
|
51 |
+
video.write(cv2.cvtColor(np.array(tmp_img), cv2.COLOR_RGB2BGR))
|
52 |
+
video.release()
|
53 |
+
|
54 |
+
def make_video(self, imgs, lengths, video_name = "finished_video.mp4"):
|
55 |
+
self.imgs_to_video(self.addBuffer(imgs, lengths), 'test_video.mp4')
|
56 |
+
input_audio = ffmpeg.input(os.path.join(os.getcwd(),'tts.mp3'))
|
57 |
+
input_video = ffmpeg.input('/content/test_video.mp4')
|
58 |
+
ffmpeg.concat(input_video, input_audio, v=1, a=1).output(video_name).run(overwrite_output=True)
|
59 |
+
|
60 |
+
|
61 |
+
def generate(self, story, api_key, fps, style, tags, model, steps):
|
62 |
+
self.fps = fps
|
63 |
+
self.style = style
|
64 |
+
self.tags = tags
|
65 |
+
if model == "Stable Diffusion v2.1":
|
66 |
+
self.pipe = SD2_1(api_key)
|
67 |
+
else:
|
68 |
+
self.pipe = Kadinsky(api_key)
|
69 |
+
sentences = self.get_sentences(story)
|
70 |
+
lengths = self.generate_voice(story, sentences)
|
71 |
+
images = self.generate_imgs(sentences, steps)
|
72 |
+
self.make_video(images, lengths)
|
73 |
+
return "finished_video.mp4"
|
Videobook/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .Videobook import Videobook
|
app.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from Videobook import Videobook
|
2 |
+
import gradio as gr
|
3 |
+
|
4 |
+
gen = Videobook()
|
5 |
+
with gr.Blocks() as demo:
|
6 |
+
with gr.Row():
|
7 |
+
with gr.Column():
|
8 |
+
story = gr.inputs.Textbox(lines = 5, label = "Story")
|
9 |
+
api_key = gr.inputs.Textbox(label = "Segmind API Key")
|
10 |
+
tags = gr.inputs.Textbox(default = "high quality, 3d render", label = "Tags")
|
11 |
+
style = gr.Dropdown(["Cartoon", "Anime Style", "Realistic Image"], value = "Cartoon", label = "Style")
|
12 |
+
model = gr.Dropdown(["Stable Diffusion v2.1", "Kadinsky"], value = "Stable Diffusion v2.1", label = "Model")
|
13 |
+
with gr.Row():
|
14 |
+
steps = gr.Radio([25, 50], value = 50, label = "Steps")
|
15 |
+
fps = gr.Radio([10, 24, 60], value = 10, label = "FPS")
|
16 |
+
output = gr.outputs.Video()
|
17 |
+
run = gr.Button(label = "Generate Video")
|
18 |
+
run.click(gen.generate, inputs = [story, api_key, fps, style, tags, model, steps], outputs = output)
|
19 |
+
|
20 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
segmindapi
|
2 |
+
gTTS
|
3 |
+
mutagen
|
4 |
+
nltk
|
5 |
+
ffmpeg-python
|
6 |
+
opencv-python
|
7 |
+
numpy
|
8 |
+
gradio
|