Spaces:

katospiegel
/

amanu

Runtime error

App Files Files Community

katospiegel commited on Oct 19, 2023

Commit

8bc597c

1 Parent(s): 7cd200e

Testing txt output in WHISPERX

Browse files

Files changed (3) hide show

README.md +11 -0
app.py +26 -25
helpers.py +1 -1

README.md CHANGED Viewed

	@@ -51,3 +51,14 @@ The user will logging using a password and user specified by me. That user and p
51
52	Introduce Tab for analysis including POS. Maybe it would be great to have a visualizer with the timestamps and other features in Streamlit. Quizas correcciones.
53

 Introduce Tab for analysis including POS. Maybe it would be great to have a visualizer with the timestamps and other features in Streamlit. Quizas correcciones.
+## Dev
+I used huggingface lfs
+```
+git install lfs
+```
+```
+huggingface-cli lfs-enable-largefiles .
+```

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from transformers.pipelines.audio_utils import ffmpeg_read
 from transcription import fast_transcription, speech_to_text, doWhisperX
 from whisperx.utils import get_writer
 from audio import normalizeAudio, separateVoiceInstrumental, mp3_to_wav, stereo_to_mono, cutaudio, compose_audio
-from helpers import guardar_en_archivo, guardar_dataframe_en_csv
 import json
@@ -57,19 +57,8 @@ def transcribeWhisperX(audiofile, model, language, patiente,
     #nombre_archivo = guardar_en_archivo(out)
     ##########################################################################
-    import whisperx
     from pathlib import Path
-    # device = "cuda"
-    # model_a, metadata = whisperx.load_align_model(
-    #     language_code="es", device=device
-    # )
-    # result_aligned = whisperx.align(
-    #     result["segments"],
-    #     model_a,
-    #     metadata,
-    #     vocal_path,
-    #     device=device,
-    # )
     import datetime
     fecha_actual = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
@@ -81,17 +70,19 @@ def transcribeWhisperX(audiofile, model, language, patiente,
     srt_writer = get_writer("srt", Path("."))
     result_aligned["language"] = language
     srt_writer(result_aligned, file_path, writter_args)
-    # with open(
-    #    nombre_archivo,
-    #     "w",
-    #     encoding="utf-8",
-    # ) as srt:
-    #     write_srt(result_aligned["segments"], file=srt)
     ###########################################################################
-    return audio_path, audio_normalized_path, vocal_path, novocal_path, str(file_path), guardar_dataframe_en_csv(diarize_segments), json.dumps(result_speakers)
 transcribeI = gr.Interface(
     fn=transcribe,
@@ -135,10 +126,11 @@ transcribeII = gr.Interface(
              gr.Audio(type="filepath", label="normalized"),
              gr.Audio(type="filepath", label="vocal"),
              gr.Audio(type="filepath", label="no_vocal"),
-             gr.File(label="Archivo SRT generado"),
-             gr.File(label="Archivo CSV generado"),
-             gr.File(label="Tabla con diarización generada"),
-             gr.JSON(label="JSON Output"),
              #gr.JSON(label="JSON Output"),
              #gr.File(label="Archivo generado")
         ],
@@ -148,7 +140,16 @@ transcribeII = gr.Interface(
         "Esta página realiza una transcripción de audio utilizando Whisper. Además añade varias mejoras y utilidades: a) Preprocesamiento del audio y limpieza de ruido ambiental, b) Conversión de los archivos de audio a un formato compatible con Whisper, c) Cálculo de la marca temporal palabra por palabra, d) Cálculo del nivel de seguridad de la transcripción, e) Conversión del resultado a .csv, .srt y ass.\n"
     ),
     allow_flagging="never",
-    #examples=[[None, "COSER-4004-01-00_5m.wav", "large-v2"]]
 )

 from transcription import fast_transcription, speech_to_text, doWhisperX
 from whisperx.utils import get_writer
 from audio import normalizeAudio, separateVoiceInstrumental, mp3_to_wav, stereo_to_mono, cutaudio, compose_audio
+from helpers import guardar_en_archivo, guardar_dataframe_en_csv, generar_transcripcion
 import json
     #nombre_archivo = guardar_en_archivo(out)
     ##########################################################################
     from pathlib import Path
     import datetime
     fecha_actual = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     srt_writer = get_writer("srt", Path("."))
     result_aligned["language"] = language
     srt_writer(result_aligned, file_path, writter_args)
+    ###########################################################################
+    # Creating the txt
+    lineas_txt, nombre_file_txt = generar_transcripcion(diarize_segments)
+    lineas_txt_string = "\n".join(lineas_txt)
     ###########################################################################
+    outputs = (audio_path, audio_normalized_path, vocal_path, novocal_path, lineas_txt_string, nombre_file_txt,
+               str(file_path), guardar_dataframe_en_csv(diarize_segments), json.dumps(result_speakers))
+    return outputs
 transcribeI = gr.Interface(
     fn=transcribe,
              gr.Audio(type="filepath", label="normalized"),
              gr.Audio(type="filepath", label="vocal"),
              gr.Audio(type="filepath", label="no_vocal"),
+             gr.TextArea(label="Transcripción"),
+             gr.File(label="Archivo TXT generado"),
+             gr.File(label="Archivo SRT generado con turno de palabra"),
+             gr.File(label="Archivo CSV generado con turno de palabra"),
+             gr.JSON(label="Resultados estructurados en JSON palabra por palabra"),
              #gr.JSON(label="JSON Output"),
              #gr.File(label="Archivo generado")
         ],
         "Esta página realiza una transcripción de audio utilizando Whisper. Además añade varias mejoras y utilidades: a) Preprocesamiento del audio y limpieza de ruido ambiental, b) Conversión de los archivos de audio a un formato compatible con Whisper, c) Cálculo de la marca temporal palabra por palabra, d) Cálculo del nivel de seguridad de la transcripción, e) Conversión del resultado a .csv, .srt y ass.\n"
     ),
     allow_flagging="never",
+    examples=[["Espana 04 - Video 01 - extracto 2 min.wav",
+               "large-v2",
+               "Cualquiera",
+               0.5,
+               "",
+               "",
+               0.5,
+               0.5,
+               0.5,
+               0.5]]
 )

helpers.py CHANGED Viewed

@@ -77,5 +77,5 @@ def generar_transcripcion(dataframe):
     # Guardamos la transcripción en un archivo de texto
     nombre_archivo = guardar_en_archivo(lineas_transcripcion)
-    return nombre_archivo

     # Guardamos la transcripción en un archivo de texto
     nombre_archivo = guardar_en_archivo(lineas_transcripcion)
+    return lineas_transcripcion, nombre_archivo