Spaces:
Sleeping
Sleeping
Manjot Singh
commited on
Commit
·
cf21473
1
Parent(s):
5865692
asr_trasncription
Browse files- .gitignore +3 -0
- app.py +21 -4
- flagged/audio_file/e6332957799e9944ecd7/audio.wav +0 -0
- flagged/log.csv +2 -0
- requirements.txt +8 -3
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
.venv/
|
3 |
+
*.pyc
|
app.py
CHANGED
@@ -1,7 +1,24 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from audio_processing import process_audio, print_results
|
3 |
+
def transcribe_audio(audio_file):
|
4 |
+
language_segments, text_segments = process_audio(audio_file)
|
5 |
+
|
6 |
+
output = "Detected language changes:\n\n"
|
7 |
+
for lang_segment in language_segments:
|
8 |
+
output += f"Language: {lang_segment['language']}\n"
|
9 |
+
output += f"Time: {lang_segment['start']:.2f}s - {lang_segment['end']:.2f}s\n\n"
|
10 |
|
11 |
+
output += "Transcription:\n\n"
|
12 |
+
for segment in text_segments:
|
13 |
+
output += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] {segment['text']}\n"
|
14 |
|
15 |
+
return output
|
16 |
+
|
17 |
+
iface = gr.Interface(
|
18 |
+
fn=transcribe_audio,
|
19 |
+
inputs=gr.Audio(type="filepath"),
|
20 |
+
outputs="text",
|
21 |
+
title="WhisperX Audio Transcription"
|
22 |
+
)
|
23 |
+
|
24 |
+
iface.launch()
|
flagged/audio_file/e6332957799e9944ecd7/audio.wav
ADDED
Binary file (238 kB). View file
|
|
flagged/log.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
audio_file,output,flag,username,timestamp
|
2 |
+
flagged/audio_file/e6332957799e9944ecd7/audio.wav,,,,2024-08-31 11:46:39.944110
|
requirements.txt
CHANGED
@@ -1,9 +1,14 @@
|
|
1 |
-
torch
|
2 |
-
torchaudio
|
3 |
transformers
|
4 |
git+https://github.com/m-bain/whisperx.git
|
5 |
numpy
|
6 |
pandas
|
7 |
pyannote.audio
|
8 |
pyperclip
|
9 |
-
sentencepiece
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
transformers
|
2 |
git+https://github.com/m-bain/whisperx.git
|
3 |
numpy
|
4 |
pandas
|
5 |
pyannote.audio
|
6 |
pyperclip
|
7 |
+
sentencepiece
|
8 |
+
gradio
|
9 |
+
speechbrain
|
10 |
+
torch>=2
|
11 |
+
torchaudio>=2
|
12 |
+
faster-whisper==1.0.0
|
13 |
+
setuptools>=65
|
14 |
+
nltk
|