mukul-wai commited on
Commit
375e82a
·
1 Parent(s): 0818ef3

Update runner.py

Browse files
Files changed (1) hide show
  1. runner.py +115 -0
runner.py CHANGED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
5
+
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from langchain.llms import OpenAI
8
+ import gradio as gr
9
+ import openai
10
+ import os
11
+
12
+ from bark import SAMPLE_RATE, generate_audio, preload_models
13
+ from scipy.io.wavfile import write as write_wav
14
+ from IPython.display import Audio
15
+
16
+ os.environ['OPENAI_API_KEY'] = "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"
17
+ api_key = os.getenv('OPENAI_API_KEY')
18
+ openai.api_key = api_key
19
+
20
+ # connect your Google Drive
21
+ """from google.colab import drive
22
+ drive.mount('/content/gdrive', force_remount=True)
23
+ root_dir = "/content/gdrive/My Drive/"
24
+ data_path = '/content/gdrive/My Drive/CDSS/LLM Demos/ASHA material'
25
+ """
26
+ from langchain.document_loaders import PyPDFDirectoryLoader
27
+ from langchain.indexes import VectorstoreIndexCreator
28
+ from langchain.document_loaders import PyPDFLoader
29
+ from langchain.vectorstores import Chroma
30
+ from langchain.embeddings.openai import OpenAIEmbeddings
31
+ from langchain.document_loaders import UnstructuredPDFLoader
32
+ from langchain.llms import OpenAI
33
+ from langchain.chains.question_answering import load_qa_chain
34
+
35
+ pdf_folder_path = 'ASHA material'
36
+ loader = PyPDFDirectoryLoader(pdf_folder_path)
37
+
38
+ pages = loader.load_and_split()
39
+ embeddings = OpenAIEmbeddings()
40
+ docsearch = Chroma.from_documents(pages, embeddings).as_retriever()
41
+ from pydub import AudioSegment
42
+
43
+ # download and load all models
44
+ preload_models()
45
+
46
+
47
+ lang_dict = {
48
+ "English": "en",
49
+ }
50
+
51
+
52
+ # generate audio from text
53
+ text_prompt = """
54
+ Hello, my name is Suno. And, uh — and I like pizza. [laughs]
55
+ But I also have other interests such as playing tic tac toe.
56
+ """
57
+ #audio_array = generate_audio(text_prompt)
58
+
59
+ # save audio to disk
60
+ #write_wav("bark_generation.wav", SAMPLE_RATE, audio_array)
61
+
62
+ # play text in notebook
63
+ #Audio(audio_array, rate=SAMPLE_RATE)
64
+
65
+ def get_asr_output(audio_path,lang = 'English'):
66
+ audio = AudioSegment.from_file(audio_path)
67
+ audio.export("temp.wav", format="wav")
68
+ file = open("temp.wav","rb")
69
+
70
+
71
+ transcription = openai.Audio.transcribe("whisper-1", file, language=lang)
72
+ op_text = transcription.text
73
+
74
+ """ if lang == "hi":
75
+ op_text = asr_pipe("temp.wav")['text']
76
+ print('whisper',transcription)
77
+ print('ai4b',op_text) """
78
+
79
+ return op_text
80
+
81
+ def greet(audio, lang, if_audio=True):
82
+ query = get_asr_output(audio, lang_dict[lang])
83
+
84
+ docs = docsearch.get_relevant_documents(query)
85
+ chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
86
+ answer = chain.run(input_documents=docs, question=query)
87
+
88
+ return query, answer
89
+
90
+ def get_audio2(answer):
91
+ audio_array = generate_audio(answer)
92
+ write_wav("bark_generation.wav", SAMPLE_RATE, audio_array)
93
+
94
+ return 24000, audio_array
95
+
96
+ def dummy(name):
97
+ return "bark_generation.wav"
98
+
99
+ lang = gr.Radio(list(lang_dict.keys()), label="Select a Language")
100
+
101
+ with gr.Blocks(title="CHO Assistant") as demo:
102
+ #gr.Image('assistant.png', shape = (10,10))
103
+ lang = gr.Radio(list(lang_dict.keys()), label="Select a Language")
104
+ user_audio = gr.Audio(source="microphone",type="filepath",label = "Speak your query")
105
+ text = gr.Textbox(placeholder="Question", name = "Question / Voice Transcription", show_label=False)
106
+ output = gr.Textbox(placeholder="The answer will appear here", interactive=False, show_label = False)
107
+
108
+ submit = gr.Button("Submit")
109
+ submit.click(greet, [user_audio, lang], [text, output])
110
+
111
+ get_audio = gr.Button('Get Audio')
112
+ audio = gr.Audio()
113
+ get_audio.click(get_audio2, output, audio)
114
+
115
+ demo.launch(share = True)