ajchri5 commited on
Commit
1796509
1 Parent(s): d71ba5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -4,10 +4,24 @@ import torchaudio
4
  import warnings
5
  import fasttext # Import fastText for language detection
6
  import pandas as pd
 
 
7
 
8
  # Suppress specific warnings related to PySoundFile fallback
9
  warnings.filterwarnings("ignore", category=UserWarning, message="PySoundFile failed.*")
10
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # Load models
12
  whisper_model_name = "openai/whisper-large"
13
  processor = WhisperProcessor.from_pretrained(whisper_model_name)
@@ -16,9 +30,6 @@ whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_na
16
  # Load Helsinki-NLP's opus-100 translation model
17
  translation_model = pipeline("translation", model="Helsinki-NLP/opus-mt-ROMANCE-en") # A multilingual model from Opus-100
18
 
19
- # Load the fastText language detection model
20
- lang_model = fasttext.load_model('./lid.176.bin') # This is the pre-trained model for language detection
21
-
22
  # Initialize history tracking
23
  history_data = [] # List to track transcription, detected language, translation, and confidence score
24
 
 
4
  import warnings
5
  import fasttext # Import fastText for language detection
6
  import pandas as pd
7
+ import urllib.request
8
+ import os
9
 
10
  # Suppress specific warnings related to PySoundFile fallback
11
  warnings.filterwarnings("ignore", category=UserWarning, message="PySoundFile failed.*")
12
 
13
+ # Define a temporary path to store the large model file
14
+ temp_model_path = '/tmp/lid.176.bin'
15
+
16
+ # Check if the model already exists in the temporary path, and download it if not
17
+ if not os.path.exists(temp_model_path):
18
+ # Download the file from Hugging Face URL
19
+ url = "https://huggingface.co/julien-c/fasttext-language-id/resolve/0266da4549434de56667387618bc67dc6d2670ef/lid.176.bin"
20
+ urllib.request.urlretrieve(url, temp_model_path)
21
+
22
+ # Load the model from the temporary path
23
+ lang_model = fasttext.load_model(temp_model_path)
24
+
25
  # Load models
26
  whisper_model_name = "openai/whisper-large"
27
  processor = WhisperProcessor.from_pretrained(whisper_model_name)
 
30
  # Load Helsinki-NLP's opus-100 translation model
31
  translation_model = pipeline("translation", model="Helsinki-NLP/opus-mt-ROMANCE-en") # A multilingual model from Opus-100
32
 
 
 
 
33
  # Initialize history tracking
34
  history_data = [] # List to track transcription, detected language, translation, and confidence score
35