Spaces:

ajchri5
/

164-S2-Assignment_2

Sleeping

ajchri5 commited on 8 days ago

Commit

1796509

•

1 Parent(s): d71ba5c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,10 +4,24 @@ import torchaudio
 import warnings
 import fasttext  # Import fastText for language detection
 import pandas as pd
 # Suppress specific warnings related to PySoundFile fallback
 warnings.filterwarnings("ignore", category=UserWarning, message="PySoundFile failed.*")
 # Load models
 whisper_model_name = "openai/whisper-large"
 processor = WhisperProcessor.from_pretrained(whisper_model_name)
@@ -16,9 +30,6 @@ whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_na
 # Load Helsinki-NLP's opus-100 translation model
 translation_model = pipeline("translation", model="Helsinki-NLP/opus-mt-ROMANCE-en")  # A multilingual model from Opus-100
-# Load the fastText language detection model
-lang_model = fasttext.load_model('./lid.176.bin')  # This is the pre-trained model for language detection
 # Initialize history tracking
 history_data = []  # List to track transcription, detected language, translation, and confidence score

 import warnings
 import fasttext  # Import fastText for language detection
 import pandas as pd
+import urllib.request
+import os
 # Suppress specific warnings related to PySoundFile fallback
 warnings.filterwarnings("ignore", category=UserWarning, message="PySoundFile failed.*")
+# Define a temporary path to store the large model file
+temp_model_path = '/tmp/lid.176.bin'
+# Check if the model already exists in the temporary path, and download it if not
+if not os.path.exists(temp_model_path):
+    # Download the file from Hugging Face URL
+    url = "https://huggingface.co/julien-c/fasttext-language-id/resolve/0266da4549434de56667387618bc67dc6d2670ef/lid.176.bin"
+    urllib.request.urlretrieve(url, temp_model_path)
+# Load the model from the temporary path
+lang_model = fasttext.load_model(temp_model_path)
 # Load models
 whisper_model_name = "openai/whisper-large"
 processor = WhisperProcessor.from_pretrained(whisper_model_name)
 # Load Helsinki-NLP's opus-100 translation model
 translation_model = pipeline("translation", model="Helsinki-NLP/opus-mt-ROMANCE-en")  # A multilingual model from Opus-100
 # Initialize history tracking
 history_data = []  # List to track transcription, detected language, translation, and confidence score