File size: 1,237 Bytes
6459994
 
 
 
a689179
 
 
 
 
6459994
 
 
 
 
 
 
 
 
a689179
 
 
f3aea51
a689179
 
 
f3aea51
a689179
 
 
f3aea51
a689179
 
 
 
 
6459994
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
import sys
import torch

# import whisperx
import stable_whisper
# from faster_whisper import WhisperModel
import dl_translate as dlt


def load_models():
    '''
    Checks CUDA availability & loads models
    '''
    try:
        print(f"CUDA Available: {torch.cuda.is_available()}")
        print(f"CUDA Device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
        
        models = dict()

        # Transcription Model - Whisper
        models['transcription'] = stable_whisper.load_model('large-v2')    # Stable Whisper
        # models['transcription'] = WhisperModel("large-v2", device="cuda", compute_type="float16")    # Faster Whisper

        # Translation Model - NLLB
        nllb_model = 'facebook/nllb-200-distilled-600M'
        # nllb_model = 'facebook/nllb-200-1.3B'
        # nllb_model = 'facebook/nllb-200-3.3B'
        # nllb_model = 'facebook/nllb-moe-54b'
        models['translation'] = dlt.TranslationModel(nllb_model)
        
        # TODO: Audio Generation Model - Bark
        # models['audiobook'] = 
        
        return models

    except KeyboardInterrupt:
        print('Interrupted')
        try:
            sys.exit(0)
        except SystemExit:
            os._exit(0)