File size: 2,700 Bytes
d5c679f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from __future__ import annotations

import os
import time
from typing import Callable

from gradio_client import Client
from groq import Groq
from loguru import logger

from rate_limit import rate_limit_bypass
from settings import app_settings


def parse_audio(input_file_path: str, output_file_path: str) -> None:
    """Parse audio file from video file to mp3 format"""
    os.system(f"ffmpeg -y -i {input_file_path} -f mp3 {output_file_path}")


def split_audio_file(audio_folder_path: str, segment_time: int = app_settings.segment_time) -> None:
    """Splits an audio file into multiple segments using ffmpeg.

    Parameters
    ----------
    audio_folder_path : str
        The path to the audio file.
    segment_time : int, optional
        Time in seconds for each segment.
    """
    audio_file_path = os.path.join(audio_folder_path, "audio.mp3")
    output_file_template = os.path.join(audio_folder_path, "audio_%03d.mp3")
    os.system(
        f"ffmpeg -y -i {audio_file_path} -f segment -segment_time {segment_time} -c copy {output_file_template}")


@rate_limit_bypass(sleep_time=10)
def groq_transcript(client: Groq, audio_file_path: str) -> str:
    """Get transcript for one file.

    Parameters
    ----------
    client : Groq
    audio_file_path : str
        The path to the audio file to transcribe."""
    with open(audio_file_path, "rb") as file:
        transcription = client.audio.transcriptions.create(
            file=(audio_file_path, file.read()),
            model="whisper-large-v3",
        )
        logger.debug(f"Transcription: {transcription.text}")
        return transcription.text


def get_full_transcript(
        audio_folder_path: str,
        client: Client | Groq,
        one_file_transcript_func: Callable[[Client | Groq, str], str] = groq_transcript,
) -> list[str]:
    """Get full transcript for all audio files in a folder.

    Parameters
    ----------
    audio_folder_path : str
        folder, where all audio files are located.
    one_file_transcript_func : Callable[[str], str], optional
        Function that transcribes a single audio file.
    client : Client | Groq
        A client object to pass to transcript function

    Returns
    -------
    list[str]
        A list of transcripts for all audio files in a folder.
    ."""

    logger.info("Getting transcript...")
    split_audio_file(audio_folder_path)
    transcript = []
    for file_name in os.listdir(audio_folder_path):
        if file_name.startswith("audio_"):
            audio_file_path = os.path.join(audio_folder_path, file_name)
            transcript += [one_file_transcript_func(client, audio_file_path)]
            time.sleep(2)
    return transcript