Spaces:

sasan
/

KITT

Build error

App Files Files Community

sasan commited on Apr 19, 2024

Commit

5baa807

1 Parent(s): 6b9d2e8

wip

Browse files

Files changed (6) hide show

kitt.py +0 -25
skills/__init__.py +0 -0
skills/poi.py +0 -0
skills/routing.py +24 -0
skills/weather.py +0 -0
tts.ipynb +288 -0

kitt.py CHANGED Viewed

@@ -5,31 +5,6 @@ import requests
 # INTERFACE WITH AUDIO TO AUDIO
-def calculate_route():
-    api_key = "api_key"
-    origin = "49.631997,6.171029"
-    destination = "49.586745,6.140002"
-    url = f"https://api.tomtom.com/routing/1/calculateRoute/{origin}:{destination}/json?key={api_key}"
-    response = requests.get(url)
-    data = response.json()
-    lats = []
-    lons = []
-    for point in data['routes'][0]['legs'][0]['points']:
-        lats.append(point['latitude'])
-        lons.append(point['longitude'])
-    # fig = px.line_geo(lat=lats, lon=lons)
-    # fig.update_geos(fitbounds="locations")
-    fig = px.line_mapbox(lat=lats, lon=lons, zoom=12, height=600)
-    fig.update_layout(mapbox_style="open-street-map", mapbox_zoom=12, mapbox_center_lat=lats[0], mapbox_center_lon=lons[0])
-    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
-    return fig
 def transcript(
     general_context, link_to_audio, voice, emotion, place, time, delete_history, state

 # INTERFACE WITH AUDIO TO AUDIO
 def transcript(
     general_context, link_to_audio, voice, emotion, place, time, delete_history, state

skills/__init__.py ADDED Viewed

File without changes

skills/poi.py ADDED Viewed

File without changes

skills/routing.py ADDED Viewed

	@@ -0,0 +1,24 @@

+def calculate_route():
+    api_key = "api_key"
+    origin = "49.631997,6.171029"
+    destination = "49.586745,6.140002"
+    url = f"https://api.tomtom.com/routing/1/calculateRoute/{origin}:{destination}/json?key={api_key}"
+    response = requests.get(url)
+    data = response.json()
+    lats = []
+    lons = []
+    for point in data['routes'][0]['legs'][0]['points']:
+        lats.append(point['latitude'])
+        lons.append(point['longitude'])
+    # fig = px.line_geo(lat=lats, lon=lons)
+    # fig.update_geos(fitbounds="locations")
+    fig = px.line_mapbox(lat=lats, lon=lons, zoom=12, height=600)
+    fig.update_layout(mapbox_style="open-street-map", mapbox_zoom=12, mapbox_center_lat=lats[0], mapbox_center_lon=lons[0])
+    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
+    return fig

skills/weather.py ADDED Viewed

File without changes

tts.ipynb ADDED Viewed

	@@ -0,0 +1,288 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Text to Speech Playground"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "import torch\n",
+    "import gradio as gr\n",
+    "from TTS.api import TTS\n",
+    "os.environ[\"COQUI_TOS_AGREED\"] = \"1\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import namedtuple\n",
+    "\n",
+    "Voice = namedtuple('voice', ['name', 'neutral','sad','angry','happy'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "voices = [\n",
+    "    Voice('Rick', neutral='audio/rick/angry.mp3', sad=None, angry=None, happy=None),\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[voice(name='Rick', neutral='audio/rick/angry.mp3', sad=None, angry=None, happy=None)]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "voices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " > tts_models/multilingual/multi-dataset/xtts_v1.1 is already downloaded.\n",
+      " > Using model: xtts\n"
+     ]
+    }
+   ],
+   "source": [
+    "#load model for text to speech\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "tts_pipelins = TTS(\"tts_models/multilingual/multi-dataset/xtts_v1.1\").to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " > Text splitted to sentences.\n",
+      "[\"Hello, I am Rick, pickle rick, you took a wrong turn and now you're stuck in a parallel universe\"]\n",
+      " > Processing time: 0.7903299331665039\n",
+      " > Real-time factor: 0.11176741294459602\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'out.wav'"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tts_pipelins.tts_to_file(\"Hello, I am Rick, pickle rick, you took a wrong turn and now you're stuck in a parallel universe\", speaker_wav=\"audio/rick/neutral.wav\", emotion='neutral', language='en', file_path='out.wav')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def text_to_speech(voice, tts):\n",
+    "    return voice.neutral"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "    tts.tts_to_file(text= str(quest_processing[0]),\n",
+    "                file_path=\"output.wav\",\n",
+    "                speaker_wav=f'Audio_Files/{voice}.wav',\n",
+    "                language=quest_processing[3],\n",
+    "                emotion = \"angry\")\n",
+    "\n",
+    "    audio_path = \"output.wav\"\n",
+    "    return audio_path, state['context'], state"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/gradio/utils.py:924: UserWarning: Expected 1 arguments for function <function tts at 0x7fcfd6feb760>, received 3.\n",
+      "  warnings.warn(\n",
+      "/opt/conda/lib/python3.10/site-packages/gradio/utils.py:932: UserWarning: Expected maximum 1 arguments for function <function tts at 0x7fcfd6feb760>, received 3.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Running on local URL:  http://0.0.0.0:7860\n",
+      "\n",
+      "Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024/04/18 13:48:05 [W] [service.go:132] login to server failed: dial tcp 44.237.78.176:7000: i/o timeout\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://localhost:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/gradio/queueing.py\", line 527, in process_events\n",
+      "    response = await route_utils.call_process_api(\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/gradio/route_utils.py\", line 261, in call_process_api\n",
+      "    output = await app.get_blocks().process_api(\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/gradio/blocks.py\", line 1786, in process_api\n",
+      "    result = await self.call_function(\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/gradio/blocks.py\", line 1338, in call_function\n",
+      "    prediction = await anyio.to_thread.run_sync(\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/anyio/to_thread.py\", line 56, in run_sync\n",
+      "    return await get_async_backend().run_sync_in_worker_thread(\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 2144, in run_sync_in_worker_thread\n",
+      "    return await future\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 851, in run\n",
+      "    result = context.run(func, *args)\n",
+      "  File \"/opt/conda/lib/python3.10/site-packages/gradio/utils.py\", line 759, in wrapper\n",
+      "    response = f(*args, **kwargs)\n",
+      "TypeError: tts() takes 1 positional argument but 3 were given\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Keyboard interruption in main thread... closing server.\n",
+      "Killing tunnel 0.0.0.0:7860 <> None\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#INTERFACE WITH AUDIO TO AUDIO\n",
+    "\n",
+    "#to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/ \n",
+    "#in \"Insecure origins treated as secure\", enable it and relaunch chrome\n",
+    "\n",
+    "\n",
+    "model_answer= ''\n",
+    "general_context= ''\n",
+    "# Define the initial state with some initial context.\n",
+    "print(general_context)\n",
+    "initial_state = {'context': general_context}\n",
+    "initial_context= initial_state['context']\n",
+    "# Create the Gradio interface.\n",
+    "iface = gr.Interface(\n",
+    "    fn=tts,\n",
+    "    inputs=[\n",
+    "        gr.Textbox(value=initial_context, visible=True),\n",
+    "        gr.Radio(choices=[x.name for x in voices], label='Choose a voice', value=voices[0].name, show_label=True),  # Radio button for voice selection\n",
+    "        gr.State()  # This will keep track of the context state across interactions.\n",
+    "    ],\n",
+    "    outputs=[\n",
+    "        gr.Audio(label = 'output audio'),\n",
+    "        gr.State()\n",
+    "    ]\n",
+    ")\n",
+    "#close all interfaces open to make the port available\n",
+    "gr.close_all()\n",
+    "# Launch the interface.\n",
+    "iface.launch(debug=True, share=True, server_name=\"0.0.0.0\", server_port=7860, ssl_verify=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}