from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS from langchain.chains.question_answering import load_qa_chain from langchain.llms import OpenAI import gradio as gr import openai import os from bark import SAMPLE_RATE, generate_audio, preload_models from scipy.io.wavfile import write as write_wav from IPython.display import Audio api_key = os.getenv('OPENAI_API_KEY') openai.api_key = api_key # connect your Google Drive """from google.colab import drive drive.mount('/content/gdrive', force_remount=True) root_dir = "/content/gdrive/My Drive/" data_path = '/content/gdrive/My Drive/CDSS/LLM Demos/ASHA material' """ from langchain.document_loaders import PyPDFDirectoryLoader from langchain.indexes import VectorstoreIndexCreator from langchain.document_loaders import PyPDFLoader from langchain.vectorstores import Chroma from langchain.embeddings.openai import OpenAIEmbeddings from langchain.document_loaders import UnstructuredPDFLoader from langchain.llms import OpenAI from langchain.chains.question_answering import load_qa_chain pdf_folder_path = 'ASHA material' loader = PyPDFDirectoryLoader(pdf_folder_path) pages = loader.load_and_split() embeddings = OpenAIEmbeddings() docsearch = Chroma.from_documents(pages, embeddings).as_retriever() from pydub import AudioSegment # download and load all models preload_models() lang_dict = { "English": "en", } # generate audio from text text_prompt = """ Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe. """ #audio_array = generate_audio(text_prompt) # save audio to disk #write_wav("bark_generation.wav", SAMPLE_RATE, audio_array) # play text in notebook #Audio(audio_array, rate=SAMPLE_RATE) def get_asr_output(audio_path,lang = 'English'): audio = AudioSegment.from_file(audio_path) audio.export("temp.wav", format="wav") file = open("temp.wav","rb") transcription = openai.Audio.transcribe("whisper-1", file, language=lang) op_text = transcription.text """ if lang == "hi": op_text = asr_pipe("temp.wav")['text'] print('whisper',transcription) print('ai4b',op_text) """ return op_text def greet(audio, lang, if_audio=True): query = get_asr_output(audio, lang_dict[lang]) docs = docsearch.get_relevant_documents(query) chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff") answer = chain.run(input_documents=docs, question=query) return query, answer def get_audio2(answer): audio_array = generate_audio(answer) write_wav("bark_generation.wav", SAMPLE_RATE, audio_array) return 24000, audio_array def dummy(name): return "bark_generation.wav" lang = gr.Radio(list(lang_dict.keys()), label="Select a Language") with gr.Blocks(title="CHO Assistant") as demo: #gr.Image('assistant.png', shape = (10,10)) lang = gr.Radio(list(lang_dict.keys()), label="Select a Language") user_audio = gr.Audio(source="microphone",type="filepath",label = "Speak your query") text = gr.Textbox(placeholder="Question", name = "Question / Voice Transcription", show_label=False) output = gr.Textbox(placeholder="The answer will appear here", interactive=False, show_label = False) submit = gr.Button("Submit") submit.click(greet, [user_audio, lang], [text, output]) get_audio = gr.Button('Get Audio') audio = gr.Audio() get_audio.click(get_audio2, output, audio) demo.launch()