skriller18 commited on
Commit
ae68901
โ€ข
1 Parent(s): 1a0027d

Used API backend

Browse files
Files changed (2) hide show
  1. app.py +32 -13
  2. output.wav +0 -0
app.py CHANGED
@@ -1,21 +1,18 @@
1
  import torch
2
  import torchaudio
3
- from transformers import pipeline
4
  import streamlit as st
 
 
 
 
5
 
6
  model_id = '11mlabs/indri-0.1-124m-tts'
7
  task = 'indri-tts'
8
 
9
- pipe = pipeline(
10
- task,
11
- model=model_id,
12
- #device=torch.device('cuda:0'), # Update this based on your hardware,
13
- trust_remote_code=True
14
- )
15
-
16
  st.title("Indri")
17
  st.subheader("Ultrafast multi-modal AI")
18
 
 
19
 
20
  speakers = {
21
  "[spkr_63]" : "๐Ÿ‡ฌ๐Ÿ‡ง ๐Ÿ‘จ book reader",
@@ -33,7 +30,6 @@ speakers = {
33
  "[spkr_66]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘จ politician"
34
  }
35
 
36
- # Create a container for the speaker selection and text input
37
  with st.container():
38
  st.markdown("### Speaker Selection")
39
  speaker_id = st.selectbox("Select a speaker:", options=list(speakers.keys()), format_func=lambda x: speakers[x])
@@ -41,10 +37,33 @@ with st.container():
41
  st.markdown("### Text Input")
42
  text_input = st.text_area("Enter text for TTS (max 200 characters):", max_chars=200)
43
 
44
- if st.button("Generate Audio", key="generate_audio"):
45
  if text_input:
46
- output = pipe([text_input], speaker=speaker_id)
47
- torchaudio.save('output.wav', output[0]['audio'][0], sample_rate=24000)
48
- st.audio('output.wav')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  else:
50
  st.warning("Please enter text to generate audio.")
 
 
 
 
1
  import torch
2
  import torchaudio
 
3
  import streamlit as st
4
+ import requests
5
+ import streamlit.components.v1 as components
6
+ import asyncio
7
+ import io
8
 
9
  model_id = '11mlabs/indri-0.1-124m-tts'
10
  task = 'indri-tts'
11
 
 
 
 
 
 
 
 
12
  st.title("Indri")
13
  st.subheader("Ultrafast multi-modal AI")
14
 
15
+ baseUrl = "https://a8a9e62422722.notebooks.jarvislabs.net"
16
 
17
  speakers = {
18
  "[spkr_63]" : "๐Ÿ‡ฌ๐Ÿ‡ง ๐Ÿ‘จ book reader",
 
30
  "[spkr_66]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘จ politician"
31
  }
32
 
 
33
  with st.container():
34
  st.markdown("### Speaker Selection")
35
  speaker_id = st.selectbox("Select a speaker:", options=list(speakers.keys()), format_func=lambda x: speakers[x])
 
37
  st.markdown("### Text Input")
38
  text_input = st.text_area("Enter text for TTS (max 200 characters):", max_chars=200)
39
 
40
+ async def generate_audio():
41
  if text_input:
42
+ speaker_name = speakers[speaker_id]
43
+
44
+ response = requests.post(
45
+ f"{baseUrl}/tts",
46
+ json={
47
+ "text": text_input,
48
+ "speaker": speaker_name
49
+ },
50
+ headers={
51
+ "accept": "application/json",
52
+ "Content-Type": "application/json"
53
+ }
54
+ )
55
+
56
+ if response.ok:
57
+ audio_blob = response.content
58
+
59
+ audio_tensor, sample_rate = torchaudio.load(io.BytesIO(audio_blob))
60
+
61
+ torchaudio.save('output.wav', audio_tensor, sample_rate=sample_rate)
62
+ st.audio('output.wav')
63
+ else:
64
+ st.warning(f"Received invalid response format. Status Code: {response.status_code}, Response: {response.text}")
65
  else:
66
  st.warning("Please enter text to generate audio.")
67
+
68
+ if st.button("Generate Audio", key="generate_audio"):
69
+ asyncio.run(generate_audio())
output.wav ADDED
Binary file (119 kB). View file