File size: 3,446 Bytes
8520a55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install --upgrade vocos encodec librosa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pprint\n",
    "import IPython.display as ipd\n",
    "import torch\n",
    "import librosa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load model\n",
    "mars5, config_class = torch.hub.load('Camb-ai/mars5-tts', 'mars5_english', trust_repo=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now that the model is loaded, pick a reference audio to clone from. If you want to use deep clone, also specify its transcript. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# download example ref audio\n",
    "!wget -O example.wav https://github.com/Camb-ai/mars5-tts/raw/master/docs/assets/example_ref.wav "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wav, sr = librosa.load('./example.wav', \n",
    "                       sr=mars5.sr, mono=True)\n",
    "wav = torch.from_numpy(wav)\n",
    "ref_transcript = \"We actually haven't managed to meet demand.\"\n",
    "print(\"Reference audio:\")\n",
    "ipd.display(ipd.Audio(wav.numpy(), rate=mars5.sr))\n",
    "print(f\"Reference transcript: {ref_transcript}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "deep_clone = True # set to False if you don't know prompt transcript or want fast inference.\n",
    "# Below you can tune other inference settings, like top_k, temperature, top_p, etc...\n",
    "cfg = config_class(deep_clone=deep_clone, rep_penalty_window=100,\n",
    "                      top_k=100, temperature=0.7, freq_penalty=3)\n",
    "\n",
    "ar_codes, wav_out = mars5.tts(\"The quick brown rat.\", wav, \n",
    "          ref_transcript,\n",
    "          cfg=cfg)\n",
    "\n",
    "print('Synthesized output audio:')\n",
    "ipd.Audio(wav_out.numpy(), rate=mars5.sr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can see all the inference settings available to tune in the inference config here:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pprint.pprint(config_class())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can also listen to the vocoded raw coarse codes, for debugging purposes:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ar_wav = mars5.vocode(ar_codes.cpu()[:, None])\n",
    "ipd.Audio(ar_wav.numpy(), rate=mars5.sr)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "matt-py311",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}