File size: 2,718 Bytes
c366a43
 
 
 
 
 
0724eff
4b43f8f
c366a43
 
 
 
0724eff
c366a43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e115f21
c366a43
 
 
 
 
 
 
 
 
 
 
 
d8193ec
 
 
 
 
 
 
 
 
e115f21
d8193ec
 
 
 
 
 
e115f21
d8193ec
 
c366a43
 
 
 
 
 
 
e115f21
c366a43
 
e115f21
c366a43
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
import assemblyai as aai
import re
import os
import tempfile

# Set AssemblyAI API key from environment variable
aai.settings.api_key = os.getenv('ASSEMBLYAI_API_KEY')

def create_assembly_transcript(audio_file):
    transcriber = aai.Transcriber()
    transcript = transcriber.transcribe(
        audio_file, config=aai.TranscriptionConfig(speaker_labels=True)
    )
    return transcript

def transcript_to_string(transcript):
    output = ""
    for utterance in transcript.utterances:
        name = f"SPEAKER {utterance.speaker}"
        start_time = format_time(utterance.start)
        output += f"{name} {start_time}\n{utterance.text}\n\n"
    return output

def format_time(milliseconds):
    seconds = milliseconds // 1000
    hours, seconds = divmod(seconds, 3600)
    minutes, seconds = divmod(seconds, 60)
    return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02}"

def format_transcript_markdown(transcript_string):
    speaker_label_pattern = r"^(.+?)(?=\s\d{2}:\d{2}:\d{2})"
    timestamp_pattern = r"(\d{2}:\d{2}:\d{2})"
    formatted_transcript = re.sub(
        speaker_label_pattern, r"**\1**", transcript_string, flags=re.MULTILINE
    )
    formatted_transcript = re.sub(
        timestamp_pattern, r"_\1_", formatted_transcript, flags=re.MULTILINE
    )
    return formatted_transcript

def transcribe_audio(audio_file):
    if audio_file is None:
        return "Please upload an audio file.", None

    try:
        transcript = create_assembly_transcript(audio_file)
        
        if transcript.error:
            return f"An error occurred: {transcript.error}", None
        
        transcript_string = transcript_to_string(transcript)
        md_transcript = format_transcript_markdown(transcript_string)
        
        # Save the markdown transcript to a temporary file
        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.md') as temp_file:
            temp_file.write(md_transcript)
            temp_file_path = temp_file.name
        
        return transcript_string, temp_file_path
    except Exception as e:
        return f"An error occurred: {str(e)}", None

def launch_app():
    iface = gr.Interface(
        fn=transcribe_audio,
        inputs=gr.Audio(type="filepath", label="Upload Audio File"),
        outputs=[
            gr.Textbox(label="Transcript Preview", lines=10),
            gr.File(label="Download Formatted Transcript")
        ],
        title="Audio Transcription App",
        description="Upload an audio file to get a transcription with speaker labels. The preview shows plain text, while the download includes markdown formatting."
    )
    iface.launch()

if __name__ == "__main__":
    launch_app()