Spaces:
Sleeping
Sleeping
File size: 2,162 Bytes
590f41d a480cb3 590f41d 927ba9d 590f41d a480cb3 f96230b 83af184 f96230b 83af184 f96230b 83af184 f96230b a480cb3 590f41d a480cb3 f96230b a480cb3 590f41d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from tempfile import NamedTemporaryFile
from typing import Any
import streamlit as st
from conette import CoNeTTEModel, conette
@st.cache_resource
def load_conette(*args, **kwargs) -> CoNeTTEModel:
return conette(*args, **kwargs)
def main() -> None:
st.header("Describe audio content with CoNeTTE")
model = load_conette(model_kwds=dict(device="cpu"))
task = st.selectbox("Task embedding input", model.tasks, 0)
beam_size: int = st.select_slider( # type: ignore
"Beam size",
list(range(1, 20)),
model.config.beam_size,
)
min_pred_size: int = st.select_slider( # type: ignore
"Minimal number of words",
list(range(1, 31)),
model.config.min_pred_size,
)
max_pred_size: int = st.select_slider( # type: ignore
"Maximal number of words",
list(range(1, 31)),
model.config.max_pred_size,
)
st.write("Recommanded audio: lasting from 1s to 30s, sampled at 32 kHz.")
audios = st.file_uploader(
"Upload an audio file",
type=["wav", "flac", "mp3", "ogg", "avi"],
accept_multiple_files=True,
)
if audios is not None and len(audios) > 0:
for audio in audios:
with NamedTemporaryFile() as temp:
temp.write(audio.getvalue())
fpath = temp.name
kwargs: dict[str, Any] = dict(
task=task,
beam_size=beam_size,
min_pred_size=min_pred_size,
max_pred_size=max_pred_size,
)
cand_key = f"{audio.name}-{kwargs}"
if cand_key in st.session_state:
cand = st.session_state[cand_key]
else:
outputs = model(
fpath,
**kwargs,
)
cand = outputs["cands"][0]
st.session_state[cand_key] = cand
st.write(f"Output for {audio.name}:")
st.write(" - ", cand)
if __name__ == "__main__":
main()
|