sasan commited on
Commit
5baa807
·
1 Parent(s): 6b9d2e8
Files changed (6) hide show
  1. kitt.py +0 -25
  2. skills/__init__.py +0 -0
  3. skills/poi.py +0 -0
  4. skills/routing.py +24 -0
  5. skills/weather.py +0 -0
  6. tts.ipynb +288 -0
kitt.py CHANGED
@@ -5,31 +5,6 @@ import requests
5
  # INTERFACE WITH AUDIO TO AUDIO
6
 
7
 
8
- def calculate_route():
9
- api_key = "api_key"
10
- origin = "49.631997,6.171029"
11
- destination = "49.586745,6.140002"
12
-
13
- url = f"https://api.tomtom.com/routing/1/calculateRoute/{origin}:{destination}/json?key={api_key}"
14
- response = requests.get(url)
15
- data = response.json()
16
-
17
- lats = []
18
- lons = []
19
-
20
- for point in data['routes'][0]['legs'][0]['points']:
21
- lats.append(point['latitude'])
22
- lons.append(point['longitude'])
23
- # fig = px.line_geo(lat=lats, lon=lons)
24
- # fig.update_geos(fitbounds="locations")
25
-
26
- fig = px.line_mapbox(lat=lats, lon=lons, zoom=12, height=600)
27
-
28
- fig.update_layout(mapbox_style="open-street-map", mapbox_zoom=12, mapbox_center_lat=lats[0], mapbox_center_lon=lons[0])
29
- fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
30
-
31
- return fig
32
-
33
 
34
  def transcript(
35
  general_context, link_to_audio, voice, emotion, place, time, delete_history, state
 
5
  # INTERFACE WITH AUDIO TO AUDIO
6
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def transcript(
10
  general_context, link_to_audio, voice, emotion, place, time, delete_history, state
skills/__init__.py ADDED
File without changes
skills/poi.py ADDED
File without changes
skills/routing.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def calculate_route():
2
+ api_key = "api_key"
3
+ origin = "49.631997,6.171029"
4
+ destination = "49.586745,6.140002"
5
+
6
+ url = f"https://api.tomtom.com/routing/1/calculateRoute/{origin}:{destination}/json?key={api_key}"
7
+ response = requests.get(url)
8
+ data = response.json()
9
+
10
+ lats = []
11
+ lons = []
12
+
13
+ for point in data['routes'][0]['legs'][0]['points']:
14
+ lats.append(point['latitude'])
15
+ lons.append(point['longitude'])
16
+ # fig = px.line_geo(lat=lats, lon=lons)
17
+ # fig.update_geos(fitbounds="locations")
18
+
19
+ fig = px.line_mapbox(lat=lats, lon=lons, zoom=12, height=600)
20
+
21
+ fig.update_layout(mapbox_style="open-street-map", mapbox_zoom=12, mapbox_center_lat=lats[0], mapbox_center_lon=lons[0])
22
+ fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
23
+
24
+ return fig
skills/weather.py ADDED
File without changes
tts.ipynb ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Text to Speech Playground"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 11,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "import os\n",
17
+ "\n",
18
+ "import torch\n",
19
+ "import gradio as gr\n",
20
+ "from TTS.api import TTS\n",
21
+ "os.environ[\"COQUI_TOS_AGREED\"] = \"1\""
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 12,
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "from collections import namedtuple\n",
31
+ "\n",
32
+ "Voice = namedtuple('voice', ['name', 'neutral','sad','angry','happy'])\n"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 13,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "voices = [\n",
42
+ " Voice('Rick', neutral='audio/rick/angry.mp3', sad=None, angry=None, happy=None),\n",
43
+ "]"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 14,
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "data": {
53
+ "text/plain": [
54
+ "[voice(name='Rick', neutral='audio/rick/angry.mp3', sad=None, angry=None, happy=None)]"
55
+ ]
56
+ },
57
+ "execution_count": 14,
58
+ "metadata": {},
59
+ "output_type": "execute_result"
60
+ }
61
+ ],
62
+ "source": [
63
+ "voices"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 16,
69
+ "metadata": {},
70
+ "outputs": [
71
+ {
72
+ "name": "stdout",
73
+ "output_type": "stream",
74
+ "text": [
75
+ " > tts_models/multilingual/multi-dataset/xtts_v1.1 is already downloaded.\n",
76
+ " > Using model: xtts\n"
77
+ ]
78
+ }
79
+ ],
80
+ "source": [
81
+ "#load model for text to speech\n",
82
+ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
83
+ "tts_pipelins = TTS(\"tts_models/multilingual/multi-dataset/xtts_v1.1\").to(device)"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": 27,
89
+ "metadata": {},
90
+ "outputs": [
91
+ {
92
+ "name": "stdout",
93
+ "output_type": "stream",
94
+ "text": [
95
+ " > Text splitted to sentences.\n",
96
+ "[\"Hello, I am Rick, pickle rick, you took a wrong turn and now you're stuck in a parallel universe\"]\n",
97
+ " > Processing time: 0.7903299331665039\n",
98
+ " > Real-time factor: 0.11176741294459602\n"
99
+ ]
100
+ },
101
+ {
102
+ "data": {
103
+ "text/plain": [
104
+ "'out.wav'"
105
+ ]
106
+ },
107
+ "execution_count": 27,
108
+ "metadata": {},
109
+ "output_type": "execute_result"
110
+ }
111
+ ],
112
+ "source": [
113
+ "tts_pipelins.tts_to_file(\"Hello, I am Rick, pickle rick, you took a wrong turn and now you're stuck in a parallel universe\", speaker_wav=\"audio/rick/neutral.wav\", emotion='neutral', language='en', file_path='out.wav')"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": 17,
119
+ "metadata": {},
120
+ "outputs": [],
121
+ "source": [
122
+ "def text_to_speech(voice, tts):\n",
123
+ " return voice.neutral"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": null,
129
+ "metadata": {},
130
+ "outputs": [],
131
+ "source": [
132
+ " tts.tts_to_file(text= str(quest_processing[0]),\n",
133
+ " file_path=\"output.wav\",\n",
134
+ " speaker_wav=f'Audio_Files/{voice}.wav',\n",
135
+ " language=quest_processing[3],\n",
136
+ " emotion = \"angry\")\n",
137
+ "\n",
138
+ " audio_path = \"output.wav\"\n",
139
+ " return audio_path, state['context'], state"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": 8,
145
+ "metadata": {},
146
+ "outputs": [
147
+ {
148
+ "name": "stderr",
149
+ "output_type": "stream",
150
+ "text": [
151
+ "/opt/conda/lib/python3.10/site-packages/gradio/utils.py:924: UserWarning: Expected 1 arguments for function <function tts at 0x7fcfd6feb760>, received 3.\n",
152
+ " warnings.warn(\n",
153
+ "/opt/conda/lib/python3.10/site-packages/gradio/utils.py:932: UserWarning: Expected maximum 1 arguments for function <function tts at 0x7fcfd6feb760>, received 3.\n",
154
+ " warnings.warn(\n"
155
+ ]
156
+ },
157
+ {
158
+ "name": "stdout",
159
+ "output_type": "stream",
160
+ "text": [
161
+ "\n",
162
+ "Running on local URL: http://0.0.0.0:7860\n",
163
+ "\n",
164
+ "Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.\n"
165
+ ]
166
+ },
167
+ {
168
+ "name": "stderr",
169
+ "output_type": "stream",
170
+ "text": [
171
+ "2024/04/18 13:48:05 [W] [service.go:132] login to server failed: dial tcp 44.237.78.176:7000: i/o timeout\n"
172
+ ]
173
+ },
174
+ {
175
+ "data": {
176
+ "text/html": [
177
+ "<div><iframe src=\"http://localhost:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
178
+ ],
179
+ "text/plain": [
180
+ "<IPython.core.display.HTML object>"
181
+ ]
182
+ },
183
+ "metadata": {},
184
+ "output_type": "display_data"
185
+ },
186
+ {
187
+ "name": "stderr",
188
+ "output_type": "stream",
189
+ "text": [
190
+ "Traceback (most recent call last):\n",
191
+ " File \"/opt/conda/lib/python3.10/site-packages/gradio/queueing.py\", line 527, in process_events\n",
192
+ " response = await route_utils.call_process_api(\n",
193
+ " File \"/opt/conda/lib/python3.10/site-packages/gradio/route_utils.py\", line 261, in call_process_api\n",
194
+ " output = await app.get_blocks().process_api(\n",
195
+ " File \"/opt/conda/lib/python3.10/site-packages/gradio/blocks.py\", line 1786, in process_api\n",
196
+ " result = await self.call_function(\n",
197
+ " File \"/opt/conda/lib/python3.10/site-packages/gradio/blocks.py\", line 1338, in call_function\n",
198
+ " prediction = await anyio.to_thread.run_sync(\n",
199
+ " File \"/opt/conda/lib/python3.10/site-packages/anyio/to_thread.py\", line 56, in run_sync\n",
200
+ " return await get_async_backend().run_sync_in_worker_thread(\n",
201
+ " File \"/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 2144, in run_sync_in_worker_thread\n",
202
+ " return await future\n",
203
+ " File \"/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 851, in run\n",
204
+ " result = context.run(func, *args)\n",
205
+ " File \"/opt/conda/lib/python3.10/site-packages/gradio/utils.py\", line 759, in wrapper\n",
206
+ " response = f(*args, **kwargs)\n",
207
+ "TypeError: tts() takes 1 positional argument but 3 were given\n"
208
+ ]
209
+ },
210
+ {
211
+ "name": "stdout",
212
+ "output_type": "stream",
213
+ "text": [
214
+ "Keyboard interruption in main thread... closing server.\n",
215
+ "Killing tunnel 0.0.0.0:7860 <> None\n"
216
+ ]
217
+ },
218
+ {
219
+ "data": {
220
+ "text/plain": []
221
+ },
222
+ "execution_count": 8,
223
+ "metadata": {},
224
+ "output_type": "execute_result"
225
+ }
226
+ ],
227
+ "source": [
228
+ "#INTERFACE WITH AUDIO TO AUDIO\n",
229
+ "\n",
230
+ "#to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/ \n",
231
+ "#in \"Insecure origins treated as secure\", enable it and relaunch chrome\n",
232
+ "\n",
233
+ "\n",
234
+ "model_answer= ''\n",
235
+ "general_context= ''\n",
236
+ "# Define the initial state with some initial context.\n",
237
+ "print(general_context)\n",
238
+ "initial_state = {'context': general_context}\n",
239
+ "initial_context= initial_state['context']\n",
240
+ "# Create the Gradio interface.\n",
241
+ "iface = gr.Interface(\n",
242
+ " fn=tts,\n",
243
+ " inputs=[\n",
244
+ " gr.Textbox(value=initial_context, visible=True),\n",
245
+ " gr.Radio(choices=[x.name for x in voices], label='Choose a voice', value=voices[0].name, show_label=True), # Radio button for voice selection\n",
246
+ " gr.State() # This will keep track of the context state across interactions.\n",
247
+ " ],\n",
248
+ " outputs=[\n",
249
+ " gr.Audio(label = 'output audio'),\n",
250
+ " gr.State()\n",
251
+ " ]\n",
252
+ ")\n",
253
+ "#close all interfaces open to make the port available\n",
254
+ "gr.close_all()\n",
255
+ "# Launch the interface.\n",
256
+ "iface.launch(debug=True, share=True, server_name=\"0.0.0.0\", server_port=7860, ssl_verify=False)"
257
+ ]
258
+ },
259
+ {
260
+ "cell_type": "code",
261
+ "execution_count": null,
262
+ "metadata": {},
263
+ "outputs": [],
264
+ "source": []
265
+ }
266
+ ],
267
+ "metadata": {
268
+ "kernelspec": {
269
+ "display_name": "base",
270
+ "language": "python",
271
+ "name": "python3"
272
+ },
273
+ "language_info": {
274
+ "codemirror_mode": {
275
+ "name": "ipython",
276
+ "version": 3
277
+ },
278
+ "file_extension": ".py",
279
+ "mimetype": "text/x-python",
280
+ "name": "python",
281
+ "nbconvert_exporter": "python",
282
+ "pygments_lexer": "ipython3",
283
+ "version": "3.10.13"
284
+ }
285
+ },
286
+ "nbformat": 4,
287
+ "nbformat_minor": 2
288
+ }