Update app.py
Browse files
app.py
CHANGED
@@ -1,27 +1,29 @@
|
|
1 |
import gradio as gr
|
2 |
-
import os
|
3 |
-
import time # To add a short delay
|
4 |
-
|
5 |
-
# Force install torch (though this should ideally be handled in requirements.txt)
|
6 |
-
os.system("pip install torch")
|
7 |
-
time.sleep(10) # Adding a short delay to ensure installation completes
|
8 |
-
|
9 |
import torch
|
10 |
-
from transformers import pipeline
|
11 |
-
|
12 |
-
# Initialize the ASR pipeline for Bulgarian
|
13 |
-
# Load model directly
|
14 |
from transformers import AutoProcessor, AutoModelForCTC
|
|
|
15 |
|
|
|
16 |
processor = AutoProcessor.from_pretrained("infinitejoy/wav2vec2-large-xls-r-300m-bulgarian")
|
17 |
model = AutoModelForCTC.from_pretrained("infinitejoy/wav2vec2-large-xls-r-300m-bulgarian")
|
18 |
|
19 |
|
20 |
# ASR λ³ν ν¨μ (speech-to-text conversion)
|
21 |
def asr_generate(audio):
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
return transcription
|
24 |
|
|
|
25 |
# Gradio μΈν°νμ΄μ€ μμ±
|
26 |
iface = gr.Interface(
|
27 |
fn=asr_generate,
|
@@ -33,4 +35,4 @@ iface = gr.Interface(
|
|
33 |
|
34 |
# μΈν°νμ΄μ€ μ€ν
|
35 |
if __name__ == "__main__":
|
36 |
-
iface.launch()
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import torch
|
|
|
|
|
|
|
|
|
3 |
from transformers import AutoProcessor, AutoModelForCTC
|
4 |
+
import soundfile as sf # To handle audio input
|
5 |
|
6 |
+
# Load the processor and model directly for Bulgarian ASR
|
7 |
processor = AutoProcessor.from_pretrained("infinitejoy/wav2vec2-large-xls-r-300m-bulgarian")
|
8 |
model = AutoModelForCTC.from_pretrained("infinitejoy/wav2vec2-large-xls-r-300m-bulgarian")
|
9 |
|
10 |
|
11 |
# ASR λ³ν ν¨μ (speech-to-text conversion)
|
12 |
def asr_generate(audio):
|
13 |
+
# Load and process the audio file
|
14 |
+
speech, _ = sf.read(audio)
|
15 |
+
inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
|
16 |
+
|
17 |
+
with torch.no_grad():
|
18 |
+
logits = model(**inputs).logits
|
19 |
+
|
20 |
+
# Get predicted IDs and decode the text
|
21 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
22 |
+
transcription = processor.batch_decode(predicted_ids)[0]
|
23 |
+
|
24 |
return transcription
|
25 |
|
26 |
+
|
27 |
# Gradio μΈν°νμ΄μ€ μμ±
|
28 |
iface = gr.Interface(
|
29 |
fn=asr_generate,
|
|
|
35 |
|
36 |
# μΈν°νμ΄μ€ μ€ν
|
37 |
if __name__ == "__main__":
|
38 |
+
iface.launch()
|