poemsforaphrodite commited on
Commit
d88c00f
·
verified ·
1 Parent(s): 2ab9666

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -0
app.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from io import BytesIO
4
+ from transformers import AutoTokenizer
5
+ import numpy as np
6
+ from pydub import AudioSegment
7
+ import tempfile
8
+ import os
9
+
10
+ # Set the page configuration
11
+ st.set_page_config(
12
+ page_title="Voice Cloning App",
13
+ layout="centered",
14
+ initial_sidebar_state="auto",
15
+ )
16
+
17
+ @st.cache_resource
18
+ def load_tokenizer():
19
+ return AutoTokenizer.from_pretrained("CAMeL-Lab/bert-base-arabic-camelbert-ca")
20
+
21
+ def split_text_into_chunks(text, tokenizer, max_tokens=100):
22
+ # Tokenize the entire text
23
+ tokens = tokenizer.tokenize(text)
24
+
25
+ # If text is short enough, return it as a single chunk
26
+ if len(tokens) <= max_tokens:
27
+ return [text]
28
+
29
+ # Split into chunks based on punctuation and token limit
30
+ chunks = []
31
+ current_chunk = ""
32
+ current_tokens = []
33
+
34
+ sentences = text.split('.')
35
+
36
+ for sentence in sentences:
37
+ if not sentence.strip():
38
+ continue
39
+
40
+ sentence = sentence.strip() + "."
41
+ sentence_tokens = tokenizer.tokenize(sentence)
42
+
43
+ if len(current_tokens) + len(sentence_tokens) <= max_tokens:
44
+ current_chunk += " " + sentence if current_chunk else sentence
45
+ current_tokens.extend(sentence_tokens)
46
+ else:
47
+ if current_chunk:
48
+ chunks.append(current_chunk.strip())
49
+ current_chunk = sentence
50
+ current_tokens = sentence_tokens
51
+
52
+ if current_chunk:
53
+ chunks.append(current_chunk.strip())
54
+
55
+ return chunks
56
+
57
+ def merge_audio_segments(audio_contents):
58
+ combined = None
59
+
60
+ for audio_content in audio_contents:
61
+ # Save the audio content to a temporary file
62
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
63
+ temp_file.write(audio_content)
64
+ temp_file_path = temp_file.name
65
+
66
+ # Load the audio segment
67
+ segment = AudioSegment.from_file(temp_file_path)
68
+ os.unlink(temp_file_path) # Delete the temporary file
69
+
70
+ # Combine the segments
71
+ if combined is None:
72
+ combined = segment
73
+ else:
74
+ combined += segment
75
+
76
+ # Export the combined audio to a byte stream
77
+ output = BytesIO()
78
+ combined.export(output, format='mp3')
79
+ return output.getvalue()
80
+
81
+ st.title("📢 Voice Cloning Application")
82
+ st.write("Enter the details below and upload an audio file to clone the voice.")
83
+
84
+ # Load the tokenizer
85
+ tokenizer = load_tokenizer()
86
+
87
+ # Create a form for input
88
+ with st.form("voice_clone_form"):
89
+ # Text input
90
+ text = st.text_input("Text", value="مرحباً بكم في تطبيق استنساخ الصوت. يمكنك استخدام هذا التطبيق لإنشاء نسخة من صوتك باللغة العربية.")
91
+
92
+ # Language selection
93
+ language = st.selectbox("Language", options=["ar"], index=0)
94
+
95
+ # File uploader for audio file
96
+ audio_file = st.file_uploader("Upload Audio File", type=["wav", "mp3", "ogg"])
97
+
98
+ # Submit button
99
+ submit_button = st.form_submit_button(label="Clone Voice")
100
+
101
+ if submit_button:
102
+ if not audio_file:
103
+ st.error("Please upload an audio file.")
104
+ else:
105
+ try:
106
+ # Split text into chunks if necessary
107
+ text_chunks = split_text_into_chunks(text, tokenizer)
108
+
109
+ if len(text_chunks) > 1:
110
+ st.info(f"Text will be processed in {len(text_chunks)} chunks due to length.")
111
+
112
+ audio_contents = []
113
+
114
+ # Process each chunk
115
+ progress_bar = st.progress(0)
116
+ for i, chunk in enumerate(text_chunks):
117
+ # Prepare the payload
118
+ payload = {
119
+ 'text': chunk,
120
+ 'language': language
121
+ }
122
+
123
+ # Prepare the files
124
+ files = {
125
+ 'audio_file': (audio_file.name, audio_file.read(), audio_file.type)
126
+ }
127
+
128
+ # Reset file pointer for next iteration
129
+ audio_file.seek(0)
130
+
131
+ # API endpoint
132
+ api_url = "https://tellergen.com/api/clone-voice"
133
+
134
+ with st.spinner(f"Processing chunk {i+1}/{len(text_chunks)}..."):
135
+ response = requests.post(api_url, data=payload, files=files)
136
+
137
+ if response.status_code == 200:
138
+ content_type = response.headers.get('Content-Type')
139
+ if 'audio' in content_type:
140
+ audio_contents.append(response.content)
141
+ else:
142
+ st.error(f"Unexpected response format for chunk {i+1}")
143
+ try:
144
+ st.json(response.json())
145
+ except ValueError:
146
+ st.text(response.text)
147
+ break
148
+ else:
149
+ st.error(f"API request failed for chunk {i+1} with status code {response.status_code}")
150
+ try:
151
+ error_data = response.json()
152
+ st.error(error_data)
153
+ except ValueError:
154
+ st.error(response.text)
155
+ break
156
+
157
+ progress_bar.progress((i + 1) / len(text_chunks))
158
+
159
+ # If we have all audio chunks, merge them
160
+ if len(audio_contents) == len(text_chunks):
161
+ st.success("Voice cloning completed successfully!")
162
+
163
+ if len(audio_contents) > 1:
164
+ with st.spinner("Merging audio segments..."):
165
+ final_audio = merge_audio_segments(audio_contents)
166
+ else:
167
+ final_audio = audio_contents[0]
168
+
169
+ st.audio(final_audio, format='audio/mp3')
170
+
171
+ except Exception as e:
172
+ st.error(f"An error occurred: {e}")