subtify / transcribe.py
Maximofn's picture
Manage files into transcribe.py
fdb83d2
raw
history blame
1.42 kB
import os
import argparse
def transcribe(audio_file, language):
output_folder = "transcriptions"
# Transcribe audio file
model = "large-v2"
word_timestamps = True
fp16 = False
device = "cuda"
verbose = False
threads = 4
output_format = "srt"
command = f'whisper --model {model} --output_dir {output_folder} --language {language} \
--word_timestamps {word_timestamps} --fp16 {fp16} --device {device} --verbose {verbose} \
--threads {threads} --output_format {output_format} {audio_file}'
os.system(command)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Transcribe audio files')
parser.add_argument('input_files', help='Input audio files')
parser.add_argument('language', help='Language of the audio file')
parser.add_argument('speakers_file', help='File with the number of speakers')
args = parser.parse_args()
vocals_folder = "vocals"
extension = "wav"
with open(args.speakers_file, 'r') as f:
speakers = f.read().splitlines()
speakers = int(speakers[0])
with open(args.input_files, 'r') as f:
inputs = f.read().splitlines()
for input in inputs:
input, _ = input.split('.')
_, input_name = input.split('/')
for i in range(speakers):
file = f'{vocals_folder}/{input_name}_speaker{i:003d}.{extension}'
transcribe(file, args.language)