File size: 1,727 Bytes
7af4a9f
1eb675b
 
7af4a9f
1eb675b
56b6854
1eb675b
 
 
 
 
 
 
 
 
 
 
 
56b6854
 
1eb675b
 
 
 
 
 
56b6854
 
1eb675b
56b6854
1eb675b
 
56b6854
1eb675b
 
56b6854
1eb675b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio as gr
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC

# Cargar el modelo Wav2Vec2 y el procesador
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")

# Funci贸n para transcribir audio y evaluar la fluidez del texto
def evaluate_fluency(audio):
    inputs = processor(audio, return_tensors="pt", sampling_rate=16_000).input_values
    with torch.no_grad():
        logits = model(inputs).logits
    predicted_ids = torch.argmax(logits, dim=-1)
    transcription = processor.batch_decode(predicted_ids)[0]

    # Evaluar fluidez (m茅trica personalizada)
    fluency_score = my_custom_fluency_metric(transcription)
    return transcription, fluency_score

# Funci贸n de m茅trica personalizada para evaluar fluidez
def my_custom_fluency_metric(transcription):
    # Implementa tu l贸gica para evaluar la fluidez del texto generado aqu铆
    # Puedes usar m茅tricas de NLP como ROUGE, BLEU o crear una m茅trica personalizada
    # En este ejemplo, simplemente devuelve la longitud del texto como una m茅trica de "fluidez"
    fluency_score = len(transcription.split())
    return fluency_score

# Interfaz Gradio para la aplicaci贸n
audio_input = gr.inputs.Audio(source="upload", type="file")
output_text = gr.outputs.Textbox(label="Transcription")
output_score = gr.outputs.Textbox(label="Fluency Score")

gr.Interface(
    fn=evaluate_fluency,
    inputs=audio_input,
    outputs=[output_text, output_score],
    title="Audio Transcription & Fluency Evaluation",
    description="Upload an audio file and evaluate transcription & fluency of the generated text."
).launch()