Added stuff
Browse files
app.py
CHANGED
@@ -48,8 +48,8 @@ def get_audio(audio_path, duration=10, target_sr=16000):
|
|
48 |
|
49 |
def captioning(model,audio_path):
|
50 |
audio_tensor = get_audio(audio_path = audio_path)
|
51 |
-
|
52 |
-
|
53 |
with torch.no_grad():
|
54 |
output = model.generate(
|
55 |
samples=audio_tensor,
|
@@ -102,11 +102,11 @@ if st.session_state.audio_input:
|
|
102 |
st.text(captions)
|
103 |
if st.session_state.captions:
|
104 |
if st.button("Generate Image and video from text prompt"):
|
105 |
-
st.session_state.image = image_service(captions).images[0]
|
106 |
image = st.session_state.image
|
107 |
video = video_model(
|
108 |
-
prompt = captions,
|
109 |
-
image=image,
|
110 |
num_inference_steps=50
|
111 |
).frames[0]
|
112 |
st.session_state.video = video
|
|
|
48 |
|
49 |
def captioning(model,audio_path):
|
50 |
audio_tensor = get_audio(audio_path = audio_path)
|
51 |
+
if device is not None:
|
52 |
+
audio_tensor = audio_tensor.to(device)
|
53 |
with torch.no_grad():
|
54 |
output = model.generate(
|
55 |
samples=audio_tensor,
|
|
|
102 |
st.text(captions)
|
103 |
if st.session_state.captions:
|
104 |
if st.button("Generate Image and video from text prompt"):
|
105 |
+
st.session_state.image = image_service(st.session_state.captions).images[0]
|
106 |
image = st.session_state.image
|
107 |
video = video_model(
|
108 |
+
prompt = st.session_state.captions,
|
109 |
+
image=st.session_state.image,
|
110 |
num_inference_steps=50
|
111 |
).frames[0]
|
112 |
st.session_state.video = video
|