Spaces:

poemsforaphrodite
/

voice-finetune

Sleeping

App Files Files Community

poemsforaphrodite commited on Nov 29, 2024

Commit

d88c00f

verified ·

1 Parent(s): 2ab9666

Create app.py

Browse files

Files changed (1) hide show

app.py +172 -0

app.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import streamlit as st
+import requests
+from io import BytesIO
+from transformers import AutoTokenizer
+import numpy as np
+from pydub import AudioSegment
+import tempfile
+import os
+# Set the page configuration
+st.set_page_config(
+    page_title="Voice Cloning App",
+    layout="centered",
+    initial_sidebar_state="auto",
+)
+@st.cache_resource
+def load_tokenizer():
+    return AutoTokenizer.from_pretrained("CAMeL-Lab/bert-base-arabic-camelbert-ca")
+def split_text_into_chunks(text, tokenizer, max_tokens=100):
+    # Tokenize the entire text
+    tokens = tokenizer.tokenize(text)
+    # If text is short enough, return it as a single chunk
+    if len(tokens) <= max_tokens:
+        return [text]
+    # Split into chunks based on punctuation and token limit
+    chunks = []
+    current_chunk = ""
+    current_tokens = []
+    sentences = text.split('.')
+    for sentence in sentences:
+        if not sentence.strip():
+            continue
+        sentence = sentence.strip() + "."
+        sentence_tokens = tokenizer.tokenize(sentence)
+        if len(current_tokens) + len(sentence_tokens) <= max_tokens:
+            current_chunk += " " + sentence if current_chunk else sentence
+            current_tokens.extend(sentence_tokens)
+        else:
+            if current_chunk:
+                chunks.append(current_chunk.strip())
+            current_chunk = sentence
+            current_tokens = sentence_tokens
+    if current_chunk:
+        chunks.append(current_chunk.strip())
+    return chunks
+def merge_audio_segments(audio_contents):
+    combined = None
+    for audio_content in audio_contents:
+        # Save the audio content to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
+            temp_file.write(audio_content)
+            temp_file_path = temp_file.name
+        # Load the audio segment
+        segment = AudioSegment.from_file(temp_file_path)
+        os.unlink(temp_file_path)  # Delete the temporary file
+        # Combine the segments
+        if combined is None:
+            combined = segment
+        else:
+            combined += segment
+    # Export the combined audio to a byte stream
+    output = BytesIO()
+    combined.export(output, format='mp3')
+    return output.getvalue()
+st.title("📢 Voice Cloning Application")
+st.write("Enter the details below and upload an audio file to clone the voice.")
+# Load the tokenizer
+tokenizer = load_tokenizer()
+# Create a form for input
+with st.form("voice_clone_form"):
+    # Text input
+    text = st.text_input("Text", value="مرحباً بكم في تطبيق استنساخ الصوت. يمكنك استخدام هذا التطبيق لإنشاء نسخة من صوتك باللغة العربية.")
+    # Language selection
+    language = st.selectbox("Language", options=["ar"], index=0)
+    # File uploader for audio file
+    audio_file = st.file_uploader("Upload Audio File", type=["wav", "mp3", "ogg"])
+    # Submit button
+    submit_button = st.form_submit_button(label="Clone Voice")
+if submit_button:
+    if not audio_file:
+        st.error("Please upload an audio file.")
+    else:
+        try:
+            # Split text into chunks if necessary
+            text_chunks = split_text_into_chunks(text, tokenizer)
+            if len(text_chunks) > 1:
+                st.info(f"Text will be processed in {len(text_chunks)} chunks due to length.")
+            audio_contents = []
+            # Process each chunk
+            progress_bar = st.progress(0)
+            for i, chunk in enumerate(text_chunks):
+                # Prepare the payload
+                payload = {
+                    'text': chunk,
+                    'language': language
+                }
+                # Prepare the files
+                files = {
+                    'audio_file': (audio_file.name, audio_file.read(), audio_file.type)
+                }
+                # Reset file pointer for next iteration
+                audio_file.seek(0)
+                # API endpoint
+                api_url = "https://tellergen.com/api/clone-voice"
+                with st.spinner(f"Processing chunk {i+1}/{len(text_chunks)}..."):
+                    response = requests.post(api_url, data=payload, files=files)
+                if response.status_code == 200:
+                    content_type = response.headers.get('Content-Type')
+                    if 'audio' in content_type:
+                        audio_contents.append(response.content)
+                    else:
+                        st.error(f"Unexpected response format for chunk {i+1}")
+                        try:
+                            st.json(response.json())
+                        except ValueError:
+                            st.text(response.text)
+                        break
+                else:
+                    st.error(f"API request failed for chunk {i+1} with status code {response.status_code}")
+                    try:
+                        error_data = response.json()
+                        st.error(error_data)
+                    except ValueError:
+                        st.error(response.text)
+                    break
+                progress_bar.progress((i + 1) / len(text_chunks))
+            # If we have all audio chunks, merge them
+            if len(audio_contents) == len(text_chunks):
+                st.success("Voice cloning completed successfully!")
+                if len(audio_contents) > 1:
+                    with st.spinner("Merging audio segments..."):
+                        final_audio = merge_audio_segments(audio_contents)
+                else:
+                    final_audio = audio_contents[0]
+                st.audio(final_audio, format='audio/mp3')
+        except Exception as e:
+            st.error(f"An error occurred: {e}")