Spaces:
Runtime error
Runtime error
""" | |
this model only supports english since text to speech is an english only model | |
""" | |
from google.cloud import texttospeech | |
import os | |
import openai | |
import gradio as gr | |
from dotenv import load_dotenv | |
import pinecone | |
""" | |
login to gcp | |
""" | |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "gcp_access_key.json" | |
# Instantiates a client | |
client = texttospeech.TextToSpeechClient() | |
""" | |
Connecting to Open AI API | |
""" | |
load_dotenv() | |
openai.organization = os.getenv("OPENAI_ORG") | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
EMBEDDING_MODEL = "text-embedding-ada-002" | |
""" | |
Connecting to pincone API and assign index | |
""" | |
index_name = 'economic-forecast' | |
pinecone.init( | |
api_key=os.getenv("Pinecone_KEY"), | |
environment=os.getenv("Pinecone_ENV") | |
) | |
## initial a first message to define GPT's role | |
""" | |
define the text -> speech function | |
""" | |
def text2speech(text): | |
# Set the text input to be synthesized | |
synthesis_input = texttospeech.SynthesisInput(text=text) | |
# Build the voice request, select the language code ("en-US") and the ssml | |
# voice gender ("neutral") | |
voice = texttospeech.VoiceSelectionParams( | |
language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.MALE | |
) | |
# Select the type of audio file you want returned | |
audio_config = texttospeech.AudioConfig( | |
audio_encoding=texttospeech.AudioEncoding.MP3 | |
) | |
# Perform the text-to-speech request on the text input with the selected | |
# voice parameters and audio file type | |
response = client.synthesize_speech( | |
input=synthesis_input, voice=voice, audio_config=audio_config | |
) | |
# The response's audio_content is binary. | |
with open("output.mp3", "wb") as out: | |
# Write the response to the output file. | |
out.write(response.audio_content) | |
print('Audio content written to file "output.mp3"') | |
""" | |
define voice -> gpt -> text -> voice workflow | |
""" | |
def transcribe(audio): | |
#global messages | |
""" | |
gradio output file doesn't have .wav so rename the file to the correct format | |
""" | |
extension = ".wav" | |
audiofomated = f"{audio}{extension}" | |
os.rename(audio,audiofomated) | |
""" | |
pass the audio file to whisper to transcribe | |
""" | |
audio_file = open(audiofomated, "rb") | |
transcript = openai.Audio.transcribe("whisper-1", audio_file) | |
""" | |
run cosin similarity to find context | |
""" | |
### Input the question and search for the relavent text | |
index = pinecone.Index(index_name) | |
query = openai.Embedding.create(input=transcript["text"], model=EMBEDDING_MODEL)["data"][0]["embedding"] # embed the user query into an embedding vector | |
res = index.query(query, top_k=3, include_metadata=True) # run cosin similarity to search the most relavent embeded content; this is done in pinecone only | |
contexts = [ | |
x['metadata']['text'] for x in res['matches'] | |
] | |
merged_context = "".join(contexts) | |
contextwithQuestion = "Context: " + "\n"+ merged_context + "*End of the context*" + "\n\n" + "Question: " + transcript["text"] | |
""" | |
pass the transcripted text to GPT | |
""" | |
messages = [ | |
{"role": "system", | |
"content": | |
"You are an assistant that answers questions only based on the context provided. Before each question, some context will be provided.\ | |
Context starts with 'Context:' and end with '*End of the context*'. Once you receive all the context, you will consider all of them to answer the questions.\ | |
It is very important to answer the question as honestly as possible.\ | |
If you are not sure about the answer based on the context provided, you can still try to come up with an answer but you must also tell the user that you are not confident about the answer and that the user should look for a secondary source to confirm the answer.\ | |
It is very important to answer the questions politely. It is very important to answer the question in great detail.\ | |
Once you receive all the context, you will receive a question that starts with 'Question:'. Once you receive the question, you can answer the question.\ | |
"} | |
] | |
messages.append({"role": "user", "content":contextwithQuestion}) ## add user input to the list of message | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=messages | |
) ## pass the list of message to GPT | |
messages.append({"role": "assistant", "content":response["choices"][0]["message"]["content"]}) ## add GPT response to the list of message | |
text2speech(response["choices"][0]["message"]["content"]) ## create mp3 voice output | |
voice_path = os.path.abspath("output.mp3") | |
return voice_path | |
output_audio = gr.outputs.Audio(type = "filepath", label="AI Assistant") | |
gr.Interface(fn=transcribe, \ | |
inputs=gr.Audio(source="microphone", type="filepath",label="Speak here..."), \ | |
outputs=output_audio, \ | |
live=True,\ | |
allow_flagging='never')\ | |
.launch() ## add share=True to publish on the public site |