File size: 1,348 Bytes
1d427a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def load_models():
    import torch
    from silero import silero_tts

    language = 'en'
    model_id = 'v3_en'
    model, _ = silero_tts(language=language,
                                speaker=model_id)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)  # gpu or cpu
    return model

def audiobook_gen(ebook, title, model):
    import torch
    import torchaudio
    from stqdm import stqdm

    sample_rate = 24000
    speaker = 'en_0'

    for chapter in stqdm(ebook, desc="Chapters in ebook:"):
        chapter_index = f'chapter{ebook.index(chapter):03}'
        audio_list = []
        for sentence in stqdm(chapter, desc="Sentences in chapter:"):
            audio = model.apply_tts(text=sentence,
                                    speaker=speaker,
                                    sample_rate=sample_rate)
            if len(audio) > 0 and isinstance(audio, torch.Tensor):
                audio_list.append(audio)
            else:
                print(f'Tensor for sentence is not valid: \n {sentence}')

        sample_path = f'outputs/{title}_{chapter_index}.wav'

        if len(audio_list) > 0:
            audio_file = torch.cat(audio_list).reshape(1, -1)
            torchaudio.save(sample_path, audio_file, sample_rate)
        else:
            print(f'Chapter {chapter_index} is empty.')