Spaces:
Build error
Build error
Rongjiehuang
commited on
Commit
·
3075f9b
1
Parent(s):
5db7a2d
update huggingface
Browse files- .gitignore +16 -0
- app.py +17 -9
.gitignore
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# JetBrains PyCharm IDE
|
2 |
+
.idea/
|
3 |
+
.github/
|
4 |
+
.circleci/
|
5 |
+
|
6 |
+
# Byte-compiled / optimized / DLL files
|
7 |
+
*__pycache__/
|
8 |
+
__pycache__/
|
9 |
+
*.py[cod]
|
10 |
+
*$py.class
|
11 |
+
|
12 |
+
# C extensions
|
13 |
+
*.so
|
14 |
+
|
15 |
+
# macOS dir files
|
16 |
+
.DS_Store
|
app.py
CHANGED
@@ -5,13 +5,20 @@ from langchain.llms.openai import OpenAI
|
|
5 |
from audio_foundation_models import *
|
6 |
import gradio as gr
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
TOOLS:
|
13 |
------
|
14 |
-
|
15 |
|
16 |
AUDIO_CHATGPT_FORMAT_INSTRUCTIONS = """To use a tool, please use the following format:
|
17 |
```
|
@@ -161,7 +168,7 @@ class ConversationBot:
|
|
161 |
print("Inputs:", state)
|
162 |
print("======>Previous memory:\n %s" % self.agent.memory)
|
163 |
# inpaint = Inpaint(device="cpu")
|
164 |
-
new_image_filename, new_audio_filename = self.models['Inpaint'].predict(audio_filename, image_filename)
|
165 |
AI_prompt = "Here are the predict audio and the mel spectrum." + f"*{new_audio_filename}*" + f"![](/file={new_image_filename})*{new_image_filename}*"
|
166 |
self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
167 |
print("======>Current memory:\n %s" % self.agent.memory)
|
@@ -188,7 +195,7 @@ class ConversationBot:
|
|
188 |
|
189 |
|
190 |
|
191 |
-
if __name__ == '__main__':
|
192 |
bot = ConversationBot({'ImageCaptioning': 'cuda:0',
|
193 |
'T2A': 'cuda:0',
|
194 |
'I2A': 'cuda:0',
|
@@ -203,6 +210,8 @@ if __name__ == '__main__':
|
|
203 |
'TargetSoundDetection': 'cpu'
|
204 |
})
|
205 |
with gr.Blocks(css="#chatbot {overflow:auto; height:500px;}") as demo:
|
|
|
|
|
206 |
with gr.Row():
|
207 |
openai_api_key_textbox = gr.Textbox(
|
208 |
placeholder="Paste your OpenAI API key here to start AudioGPT(sk-...) and press Enter ↵️",
|
@@ -210,8 +219,7 @@ if __name__ == '__main__':
|
|
210 |
lines=1,
|
211 |
type="password",
|
212 |
)
|
213 |
-
|
214 |
-
gr.Markdown("## AudioGPT")
|
215 |
chatbot = gr.Chatbot(elem_id="chatbot", label="AudioGPT")
|
216 |
state = gr.State([])
|
217 |
with gr.Row(visible = False) as input_raws:
|
|
|
5 |
from audio_foundation_models import *
|
6 |
import gradio as gr
|
7 |
|
8 |
+
_DESCRIPTION = '# [AudioGPT](https://github.com/AIGC-Audio/AudioGPT)'
|
9 |
+
_DESCRIPTION += '\n<p>This is a demo to the work [AudioGPT: Sending and Receiving Speech, Sing, Audio, and Talking head during chatting](https://github.com/AIGC-Audio/AudioGPT).</p>'
|
10 |
+
_DESCRIPTION += '\n<p>This model can only be used for non-commercial purposes. To learn more about the model, take a look at the <a href="https://huggingface.co/damo-vilab/modelscope-damo-text-to-video-synthesis" style="text-decoration: underline;" target="_blank">model card</a>.</p>'
|
11 |
+
|
12 |
+
|
13 |
+
AUDIO_CHATGPT_PREFIX = """AudioGPT
|
14 |
+
AudioGPT can not directly read audios, but it has a list of tools to finish different speech, audio, and singing voice tasks. Each audio will have a file name formed as "audio/xxx.wav". When talking about audios, AudioGPT is very strict to the file name and will never fabricate nonexistent files.
|
15 |
+
AudioGPT is able to use tools in a sequence, and is loyal to the tool observation outputs rather than faking the audio content and audio file name. It will remember to provide the file name from the last tool observation, if a new audio is generated.
|
16 |
+
Human may provide new audios to AudioGPT with a description. The description helps AudioGPT to understand this audio, but AudioGPT should use tools to finish following tasks, rather than directly imagine from the description.
|
17 |
+
Overall, AudioGPT is a powerful audio dialogue assistant tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics.
|
18 |
+
|
19 |
TOOLS:
|
20 |
------
|
21 |
+
AudioGPT has access to the following tools:"""
|
22 |
|
23 |
AUDIO_CHATGPT_FORMAT_INSTRUCTIONS = """To use a tool, please use the following format:
|
24 |
```
|
|
|
168 |
print("Inputs:", state)
|
169 |
print("======>Previous memory:\n %s" % self.agent.memory)
|
170 |
# inpaint = Inpaint(device="cpu")
|
171 |
+
new_image_filename, new_audio_filename = self.models['Inpaint'].predict(audio_filename, image_filename)
|
172 |
AI_prompt = "Here are the predict audio and the mel spectrum." + f"*{new_audio_filename}*" + f"![](/file={new_image_filename})*{new_image_filename}*"
|
173 |
self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
174 |
print("======>Current memory:\n %s" % self.agent.memory)
|
|
|
195 |
|
196 |
|
197 |
|
198 |
+
if __name__ == '__main__':
|
199 |
bot = ConversationBot({'ImageCaptioning': 'cuda:0',
|
200 |
'T2A': 'cuda:0',
|
201 |
'I2A': 'cuda:0',
|
|
|
210 |
'TargetSoundDetection': 'cpu'
|
211 |
})
|
212 |
with gr.Blocks(css="#chatbot {overflow:auto; height:500px;}") as demo:
|
213 |
+
gr.Markdown(_DESCRIPTION)
|
214 |
+
|
215 |
with gr.Row():
|
216 |
openai_api_key_textbox = gr.Textbox(
|
217 |
placeholder="Paste your OpenAI API key here to start AudioGPT(sk-...) and press Enter ↵️",
|
|
|
219 |
lines=1,
|
220 |
type="password",
|
221 |
)
|
222 |
+
|
|
|
223 |
chatbot = gr.Chatbot(elem_id="chatbot", label="AudioGPT")
|
224 |
state = gr.State([])
|
225 |
with gr.Row(visible = False) as input_raws:
|