Spaces:

gpt-99
/

real-time-transcriber

Running

App Files Files Community

gpt-99 commited on Nov 28, 2024

Commit

9913174

1 Parent(s): dbbe3fe

static translator for now

Browse files

Files changed (4) hide show

.gitignore +1 -0
.gradio/certificate.pem +31 -0
app.py +303 -0
requirements.txt +73 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv/

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

app.py ADDED Viewed

	@@ -0,0 +1,303 @@

+import sounddevice as sd
+import soundfile as sf
+import torch
+import numpy as np
+import torchaudio
+import time
+import gradio as gr
+from concurrent.futures import ThreadPoolExecutor
+from transformers import AutoProcessor, AutoModel
+from queue import Queue, Empty
+import warnings
+import traceback
+import whisper
+import gc
+warnings.filterwarnings("ignore")
+class OptimizedContinuousTranslator:
+    def __init__(self, target_language="spa", chunk_duration=3, sample_rate=16000):
+        try:
+            self.processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
+            self.model = AutoModel.from_pretrained("facebook/seamless-m4t-v2-large")
+            self.target_language = target_language
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            self.processor = None
+            self.model = None
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    def wav_to_tensor(self, file_path, sampling_rate):
+        """
+        Converts a WAV file into a PyTorch tensor.
+        Args:
+            file_path (str): Path to the WAV file.
+        Returns:
+            torch.Tensor: Audio tensor.
+            int: Sampling rate of the audio.
+        """
+        # Load the WAV file
+        waveform, sample_rate = torchaudio.load(file_path)
+            # Resample if the original sampling rate is not 16000 Hz
+        if sample_rate != sampling_rate:
+            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=sampling_rate)
+            waveform = resampler(waveform)
+        return waveform, sampling_rate
+    def translate_audio(self, audio_file_path):
+        """
+        Enhanced audio translation with improved error handling and memory management
+        Args:
+            audio (torch.Tensor): Audio chunk to translate
+        Returns:
+            str: Translated text or error message
+        """
+        print("REACHED")
+        if audio_file_path is None or self.processor is None or self.model is None:
+            print(f"{audio_file_path} {self.processor} {self.model}")
+            return ""
+        try:
+            # Prepare audio inputs
+            wavform, sample_rate = self.wav_to_tensor(audio_file_path, 16000)
+            audio_inputs = self.processor(audios=wavform.unsqueeze(0), return_tensors="pt", sampling_rate=sample_rate)
+            # Move inputs to the correct device
+            audio_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
+                            for k, v in audio_inputs.items()}
+            # Generate translation
+            output_tokens = self.model.generate(
+                **audio_inputs,
+                tgt_lang=self.target_language,
+                generate_speech=False
+            )
+            # Decode the translated text
+            translated_text = self.processor.decode(
+                output_tokens[0].tolist()[0],
+                skip_special_tokens=True
+            )
+            print(translated_text)
+            return translated_text
+        except Exception as e:
+            error_message = f"Translation error: {str(e)}"
+            stack_trace = traceback.format_exc()
+            print(f"{error_message}\n{stack_trace}")
+            return ""
+        finally:
+            # Aggressive memory cleanup
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            gc.collect()
+# web app
+# simple translator (no real time)
+def create_translator_interface():
+    """Create the optimized Gradio interface for the Continuous Translator"""
+    # Initialize the translator
+    translator = OptimizedContinuousTranslator()
+    with gr.Blocks(title="Continuous Audio Translator") as demo:
+        # Usage Instructions in a Markdown Dropdown
+        gr.Markdown("""
+## 🎙️ Audio Translator: How to Use
+<details>
+<summary>Click to view usage instructions</summary>
+### 🌐 Translation Steps
+1. **Select Target Language**:
+   - Choose the language you want to translate to from the dropdown menu
+2. **Record Audio**:
+   - Click on the microphone icon in the audio input area
+   - Record your audio clearly and concisely
+   - Ensure minimal background noise for best results
+3. **Translate**:
+   - After recording, click the "Translate" button
+   - The translated text will appear in the transcript box below
+### 💡 Tips
+- Speak clearly and at a moderate pace
+- Avoid complex or technical language for more accurate translations
+- The translation works best with shorter, simpler sentences
+- Maximum recommended recording time is around 30 seconds
+### 🌍 Supported Languages
+- Input: Currently supports clear spoken language
+- Output: Any of the languages you choose from
+</details>
+""")
+        languages = {
+            "afr": "Afrikaans",
+            "amh": "Amharic",
+            "arb": "Modern Standard Arabic",
+            "ary": "Moroccan Arabic",
+            "arz": "Egyptian Arabic",
+            "asm": "Assamese",
+            "ast": "Asturian",
+            "azj": "North Azerbaijani",
+            "bel": "Belarusian",
+            "ben": "Bengali",
+            "bos": "Bosnian",
+            "bul": "Bulgarian",
+            "cat": "Catalan",
+            "ceb": "Cebuano",
+            "ces": "Czech",
+            "ckb": "Central Kurdish",
+            "cmn": "Mandarin Chinese",
+            "cmn_Hant": "Mandarin Chinese (Traditional)",
+            "cym": "Welsh",
+            "dan": "Danish",
+            "deu": "German",
+            "ell": "Greek",
+            "eng": "English",
+            "est": "Estonian",
+            "eus": "Basque",
+            "fin": "Finnish",
+            "fra": "French",
+            "fuv": "Nigerian Fulfulde",
+            "gaz": "West Central Oromo",
+            "gle": "Irish",
+            "glg": "Galician",
+            "guj": "Gujarati",
+            "heb": "Hebrew",
+            "hin": "Hindi",
+            "hrv": "Croatian",
+            "hun": "Hungarian",
+            "hye": "Armenian",
+            "ibo": "Igbo",
+            "ind": "Indonesian",
+            "isl": "Icelandic",
+            "ita": "Italian",
+            "jav": "Javanese",
+            "jpn": "Japanese",
+            "kam": "Kamba",
+            "kan": "Kannada",
+            "kat": "Georgian",
+            "kaz": "Kazakh",
+            "kea": "Kabuverdianu",
+            "khk": "Halh Mongolian",
+            "khm": "Khmer",
+            "kir": "Kyrgyz",
+            "kor": "Korean",
+            "lao": "Lao",
+            "lit": "Lithuanian",
+            "ltz": "Luxembourgish",
+            "lug": "Ganda",
+            "luo": "Luo",
+            "lvs": "Standard Latvian",
+            "mai": "Maithili",
+            "mal": "Malayalam",
+            "mar": "Marathi",
+            "mkd": "Macedonian",
+            "mlt": "Maltese",
+            "mni": "Meitei",
+            "mya": "Burmese",
+            "nld": "Dutch",
+            "nno": "Norwegian Nynorsk",
+            "nob": "Norwegian Bokmål",
+            "npi": "Nepali",
+            "nya": "Nyanja",
+            "oci": "Occitan",
+            "ory": "Odia",
+            "pan": "Punjabi",
+            "pbt": "Southern Pashto",
+            "pes": "Western Persian",
+            "pol": "Polish",
+            "por": "Portuguese",
+            "ron": "Romanian",
+            "rus": "Russian",
+            "slk": "Slovak",
+            "slv": "Slovenian",
+            "sna": "Shona",
+            "snd": "Sindhi",
+            "som": "Somali",
+            "spa": "Spanish",
+            "srp": "Serbian",
+            "swe": "Swedish",
+            "swh": "Swahili",
+            "tam": "Tamil",
+            "tel": "Telugu",
+            "tgk": "Tajik",
+            "tgl": "Tagalog",
+            "tha": "Thai",
+            "tur": "Turkish",
+            "ukr": "Ukrainian",
+            "urd": "Urdu",
+            "uzn": "Northern Uzbek",
+            "vie": "Vietnamese",
+            "xho": "Xhosa",
+            "yor": "Yoruba",
+            "yue": "Cantonese",
+            "zlm": "Colloquial Malay",
+            "zsm": "Standard Malay",
+            "zul": "Zulu",
+        }
+        # Language Dropdown
+        with gr.Row():
+            # Generate the choices for the dropdown: display names mapped to their keys
+            language_choices = [(name, code) for code, name in languages.items()]
+            language_dropdown = gr.Dropdown(
+                choices=language_choices,  # Each choice is a (display, value) tuple
+                value="spa",  # Default value corresponds to the key
+                label="Target Language",
+                scale=2
+            )
+        # Audio Input
+        audio_input = gr.Audio(label="Record Audio", sources="microphone", type="filepath")
+        # Display Components
+        transcript_box = gr.Textbox(label="Full Transcript", lines=10, interactive=False)
+        # Control Buttons
+        with gr.Row():
+            start_btn = gr.Button("Translate")
+        # Define the translation action
+        def handle_translation(audio_file, target_language):
+            """Handle the audio file and pass it to the translator for processing."""
+            if not audio_file:
+                return "No audio file provided. Please record and try again."
+            translator.target_language = target_language  # Set the target language in the translator
+            try:
+                translated_text = translator.translate_audio(audio_file)
+                return translated_text if translated_text else "Translation failed."
+            except Exception as e:
+                return f"Error: {str(e)}"
+        # Set the Gradio action
+        start_btn.click(
+            fn=handle_translation,
+            inputs=[audio_input, language_dropdown],
+            outputs=transcript_box
+        )
+    return demo
+def main():
+    """Launch the Gradio app with optimized settings"""
+    interface = create_translator_interface()
+    interface.launch(
+        share=False,
+        show_error=True,
+        debug=True  # Helpful for development
+    )
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,73 @@

+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.6.2.post1
+certifi==2024.8.30
+cffi==1.17.1
+charset-normalizer==3.4.0
+click==8.1.7
+fastapi==0.115.5
+ffmpy==0.4.0
+filelock==3.16.1
+fsspec==2024.10.0
+gradio==5.7.0
+gradio_client==1.5.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.27.2
+huggingface-hub==0.26.2
+idna==3.10
+Jinja2==3.1.4
+llvmlite==0.43.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+more-itertools==10.5.0
+mpmath==1.3.0
+networkx==3.4.2
+numba==0.60.0
+numpy==2.0.2
+openai-whisper==20240930
+orjson==3.10.12
+packaging==24.2
+pandas==2.2.3
+pillow==11.0.0
+protobuf==5.29.0
+pycparser==2.22
+pydantic==2.10.2
+pydantic_core==2.27.1
+pydub==0.25.1
+Pygments==2.18.0
+python-dateutil==2.9.0.post0
+python-multipart==0.0.12
+pytz==2024.2
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+rich==13.9.4
+ruff==0.8.0
+safehttpx==0.1.1
+safetensors==0.4.5
+semantic-version==2.10.0
+sentencepiece==0.2.0
+setuptools==75.6.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+sounddevice==0.5.1
+soundfile==0.12.1
+starlette==0.41.3
+sympy==1.13.1
+tiktoken==0.8.0
+tokenizers==0.20.3
+tomlkit==0.12.0
+torch==2.5.1
+torchaudio==2.5.1
+tqdm==4.67.1
+transformers==4.46.3
+typer==0.13.1
+typing_extensions==4.12.2
+tzdata==2024.2
+urllib3==2.2.3
+uvicorn==0.32.1
+websockets==12.0
+whisper==1.1.10