File size: 2,941 Bytes
68f92dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
import whisper    # Library for speech recognition
from transformers import pipeline
import pandas as pd


# Load the Whisper model for speech recognition
whisper_model = whisper.load_model("base")

# Load the summarization model from Hugging Face
summarization = pipeline("summarization", model="google/pegasus-large")

def process_audio(audio_file, min_length, max_length):
    try:
        # Ensure audio_file is not None and has valid content
        if audio_file is None:
            raise ValueError("No audio file provided.")

        # Use the Whisper model to transcribe the audio file into text
        result = whisper_model.transcribe(audio_file)
        text = result['text']

        # Check if transcription was successful
        if not text:
            raise ValueError("Failed to transcribe the audio. The transcription result is empty.")

        # Use the summarization pipeline to summarize the transcribed text
        summary_result = summarization(text, min_length=min_length, max_length=max_length)
        summary = summary_result[0]['summary_text']

        # Check if summarization was successful
        if not summary:
            raise ValueError("Failed to summarize the transcript. The summary result is empty.")

        # Create a DataFrame to store the audio file, transcript, and summary
        df_results = pd.DataFrame({
            "Audio File": [audio_file],  # Store the path to the audio file
            "Transcript": [text],       # Store the transcribed text
            "Summary": [summary]        # Store the generated summary
        })

        # Save the results to a CSV file named "results.csv"
        df_results.to_csv("results.csv", index=False)

        # Return the transcript and summary to be displayed in the Gradio interface
        return text, summary

    except Exception as e:
        # General error handling
        error_message = f"An error occurred: {str(e)}"
        return error_message, error_message

# Create a Gradio interface
iface = gr.Interface(
    fn=process_audio,  # The function to be called when processing the input
    inputs=[
        gr.Audio(sources="upload", type="filepath", label="Upload your audio file"),  # Audio input field for file upload
        gr.Slider(minimum=10, maximum=50, value=30, label="Minimum Summary Length"),   # Slider for setting minimum summary length
        gr.Slider(minimum=50, maximum=600, value=100, label="Maximum Summary Length")  # Slider for setting maximum summary length
    ],
    outputs=[
        gr.Textbox(label="Transcript"),  # Textbox for displaying the transcript
        gr.Textbox(label="Summary")      # Textbox for displaying the summary
    ],
    title="Audio to Summarized Transcript",  # Title of the app
    description="Upload an audio file and adjust summary length to get both the transcript and summary."  # Description of the app
)

# Launch the app
iface.launch()