summary_backup / transcribe.py
xsestech's picture
Created app
d5c679f verified
from __future__ import annotations
import os
import time
from typing import Callable
from gradio_client import Client
from groq import Groq
from loguru import logger
from rate_limit import rate_limit_bypass
from settings import app_settings
def parse_audio(input_file_path: str, output_file_path: str) -> None:
"""Parse audio file from video file to mp3 format"""
os.system(f"ffmpeg -y -i {input_file_path} -f mp3 {output_file_path}")
def split_audio_file(audio_folder_path: str, segment_time: int = app_settings.segment_time) -> None:
"""Splits an audio file into multiple segments using ffmpeg.
Parameters
----------
audio_folder_path : str
The path to the audio file.
segment_time : int, optional
Time in seconds for each segment.
"""
audio_file_path = os.path.join(audio_folder_path, "audio.mp3")
output_file_template = os.path.join(audio_folder_path, "audio_%03d.mp3")
os.system(
f"ffmpeg -y -i {audio_file_path} -f segment -segment_time {segment_time} -c copy {output_file_template}")
@rate_limit_bypass(sleep_time=10)
def groq_transcript(client: Groq, audio_file_path: str) -> str:
"""Get transcript for one file.
Parameters
----------
client : Groq
audio_file_path : str
The path to the audio file to transcribe."""
with open(audio_file_path, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(audio_file_path, file.read()),
model="whisper-large-v3",
)
logger.debug(f"Transcription: {transcription.text}")
return transcription.text
def get_full_transcript(
audio_folder_path: str,
client: Client | Groq,
one_file_transcript_func: Callable[[Client | Groq, str], str] = groq_transcript,
) -> list[str]:
"""Get full transcript for all audio files in a folder.
Parameters
----------
audio_folder_path : str
folder, where all audio files are located.
one_file_transcript_func : Callable[[str], str], optional
Function that transcribes a single audio file.
client : Client | Groq
A client object to pass to transcript function
Returns
-------
list[str]
A list of transcripts for all audio files in a folder.
."""
logger.info("Getting transcript...")
split_audio_file(audio_folder_path)
transcript = []
for file_name in os.listdir(audio_folder_path):
if file_name.startswith("audio_"):
audio_file_path = os.path.join(audio_folder_path, file_name)
transcript += [one_file_transcript_func(client, audio_file_path)]
time.sleep(2)
return transcript