Spaces:
Running
Running
File size: 3,452 Bytes
a03ccae 183e615 a03ccae 183e615 a03ccae 76f7d8c e2c2451 a03ccae f8dc62d a03ccae 65692a6 a03ccae bed9a71 431fd43 bed9a71 431fd43 bed9a71 431fd43 a03ccae 6bac00a 1e0a73d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
import openai
import requests
import json
import os
openai.api_key = os.environ.get('OPENAI_API_KEY')
messages = [{"role": "system", "content": 'You are Steve Jobs. Respond to all input in 25 words or less.'}]
# Set up the API endpoint URL and headers
url = f"https://api.elevenlabs.io/v1/text-to-speech/{os.environ.get('voice_id')}/stream"
headers = {
"accept": "*/*",
"xi-api-key": os.environ.get('elevenlabs_api_key'),
"Content-Type": "application/json",
}
# Define a function to handle the Gradio input and generate the response
def transcribe(audio):
global messages
# Use OpenAI to transcribe the user's audio input
# API call 1
audio_file = open(audio, "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
# Append the user's message to the message history
messages.append({"role": "user", "content": transcript["text"]})
# Generate a response using OpenAI's chat API
#API call 2
response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
# Extract the system message from the API response and append it to the message history
system_message = response["choices"][0]["message"]
messages.append(system_message)
#API Call 3
# Use the voice synthesis API to generate an audio response from the system message
data = {
"text": system_message["content"],
"voice_settings": {
"stability": 0,
"similarity_boost": 0
}
}
response = requests.post(url, headers=headers, data=json.dumps(data), stream=True)
# Save the audio response to a file
if response.ok:
with open("output.wav", "wb") as f:
for chunk in response.iter_content(chunk_size=1024):
f.write(chunk)
else:
print(f"Error: {response.status_code} - {response.reason}")
# IPython.display.display(IPython.display.Audio('output.wav'))
# Generate a chat transcript for display in the Gradio UI
chat_transcript = ""
for message in messages:
if message['role'] != 'system':
chat_transcript += message['role'] + ": " + message['content'] + "\n\n"
return chat_transcript,'output.wav'
# css = """
# #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
# #header {text-align: center;}
# }
# """
# with gr.Blocks(css=css) as ui:
# with gr.Column(elem_id="col-container"):
# gr.Markdown("""## Talk to AI Steve Jobs: Audio-to-Text+Audio generation
# Powered by ChatGPT + Whisper + ElevenLabs + HuggingFace <br>
# <br>
# """,
# elem_id="header")
# Define the Gradio UI interface
# ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text")
ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs=['text','audio'],title='Talk to AI Steve Jobs', description = """Click on Record from microphone and start speaking,
and when you're done, click on Stop Recording. Then click on Submit. AI Steve will then answer your question. You can continue to ask follow-up questions by clicking on Clear, and then
using Record from microphone -> Stop Recording -> Submit AI Steve Jobs will also remember the previous questions and answers.""")
ui.launch(debug=True)
|