omkar334 commited on
Commit
c525e42
·
1 Parent(s): 7244d35

audio history

Browse files
Files changed (4) hide show
  1. .gitignore +2 -1
  2. agent.py +3 -2
  3. app.py +20 -18
  4. sarvam.py +9 -0
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  __pycache__/
2
  .venv/
3
  .env
4
- test.py
 
 
1
  __pycache__/
2
  .venv/
3
  .env
4
+ test.py
5
+ audio_files/
agent.py CHANGED
@@ -48,7 +48,7 @@ async def call_agent(user_prompt, collection):
48
  system_prompt=system_prompt,
49
  user_prompt=user_prompt,
50
  output_format={
51
- "function": 'Type of function to call, type: Enum["retriever", "translator", "speaker", "none"]',
52
  "keywords": "Array of keywords, type: List[str]",
53
  "src_lang": "Identify the language that the user query is in, type: str",
54
  "dest_lang": """Identify the target language from the user query if the function is either "translator" or "speaker". If language is not found, return "none",
@@ -75,7 +75,7 @@ async def retriever(user_prompt, collection, client):
75
 
76
 
77
  async def extractor(user_prompt, url):
78
- text = extract(user_prompt)
79
 
80
  system_prompt = EXTRACT_SYS_PROMPT.format(url)
81
  user_prompt = EXTRACT_USER_PROMPT.format(text, user_prompt)
@@ -85,6 +85,7 @@ async def extractor(user_prompt, url):
85
 
86
  async def function_caller(user_prompt, collection, client):
87
  result = await call_agent(user_prompt, collection)
 
88
  function = result["function"].lower()
89
 
90
  if function == "none":
 
48
  system_prompt=system_prompt,
49
  user_prompt=user_prompt,
50
  output_format={
51
+ "function": 'Type of function to call, type: Enum["retriever", "translator", "speaker", "none", "extractor"]',
52
  "keywords": "Array of keywords, type: List[str]",
53
  "src_lang": "Identify the language that the user query is in, type: str",
54
  "dest_lang": """Identify the target language from the user query if the function is either "translator" or "speaker". If language is not found, return "none",
 
75
 
76
 
77
  async def extractor(user_prompt, url):
78
+ text = await extract(url)
79
 
80
  system_prompt = EXTRACT_SYS_PROMPT.format(url)
81
  user_prompt = EXTRACT_USER_PROMPT.format(text, user_prompt)
 
85
 
86
  async def function_caller(user_prompt, collection, client):
87
  result = await call_agent(user_prompt, collection)
88
+ print(f"Agent log -\n {result} \n\n")
89
  function = result["function"].lower()
90
 
91
  if function == "none":
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import base64
2
- import tempfile
3
 
4
  import gradio as gr
5
  from fastapi import FastAPI
@@ -8,7 +7,7 @@ from pydantic import BaseModel
8
 
9
  from agent import function_caller, retriever
10
  from client import HybridClient
11
- from sarvam import speaker, translator
12
 
13
  app = FastAPI()
14
  hclient = HybridClient()
@@ -42,13 +41,13 @@ class TTSQuery(BaseModel):
42
 
43
  @app.get("/agent")
44
  async def agent(query: ChatQuery):
45
- collection = f"{grade}_{subject.lower()}_{chapter}"
46
  return await function_caller(query.query, collection, hclient)
47
 
48
 
49
  @app.get("/rag")
50
  async def rag(query: ChatQuery):
51
- collection = f"{grade}_{subject.lower()}_{chapter}"
52
  return await retriever(query.query, collection, hclient)
53
 
54
 
@@ -67,22 +66,27 @@ async def gradio_interface(input_text, grade, subject, chapter, history):
67
 
68
  if "text" in response:
69
  output = response["text"]
70
- history.append((input_text, output))
71
-
72
  elif "audios" in response:
73
  audio_data = base64.b64decode(response["audios"][0])
74
-
75
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as audiofile:
76
- audiofile.write(audio_data)
77
- audiofile.flush()
78
-
79
- return "", history, audiofile.name
80
-
81
  else:
82
  output = "Unexpected response format"
83
- history.append((input_text, output))
84
 
85
- return "", history, None
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
88
  with gr.Blocks() as iface:
@@ -96,11 +100,9 @@ with gr.Blocks() as iface:
96
 
97
  chatbot = gr.Chatbot(label="Chat History")
98
  msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
99
-
100
  state = gr.State([])
101
- audio_output = gr.Audio(label="Audio Response", type="filepath") # Separate audio output component
102
 
103
- msg.submit(gradio_interface, inputs=[msg, grade, subject, chapter, state], outputs=[msg, chatbot, audio_output])
104
 
105
  app = gr.mount_gradio_app(app, iface, path="/")
106
 
 
1
  import base64
 
2
 
3
  import gradio as gr
4
  from fastapi import FastAPI
 
7
 
8
  from agent import function_caller, retriever
9
  from client import HybridClient
10
+ from sarvam import save_audio, speaker, translator
11
 
12
  app = FastAPI()
13
  hclient = HybridClient()
 
41
 
42
  @app.get("/agent")
43
  async def agent(query: ChatQuery):
44
+ collection = f"{query.grade}_{query.subject.lower()}_{query.chapter}"
45
  return await function_caller(query.query, collection, hclient)
46
 
47
 
48
  @app.get("/rag")
49
  async def rag(query: ChatQuery):
50
+ collection = f"{query.grade}_{query.subject.lower()}_{query.chapter}"
51
  return await retriever(query.query, collection, hclient)
52
 
53
 
 
66
 
67
  if "text" in response:
68
  output = response["text"]
69
+ history.append((input_text, {"type": "text", "content": output}))
 
70
  elif "audios" in response:
71
  audio_data = base64.b64decode(response["audios"][0])
72
+ audio_path = save_audio(audio_data)
73
+ history.append((input_text, {"type": "audio", "content": audio_path}))
 
 
 
 
 
74
  else:
75
  output = "Unexpected response format"
76
+ history.append((input_text, {"type": "text", "content": output}))
77
 
78
+ return "", history
79
+
80
+
81
+ def format_history(history):
82
+ formatted_history = []
83
+ for human, assistant in history:
84
+ formatted_history.append((human, None))
85
+ if assistant["type"] == "text":
86
+ formatted_history.append((None, assistant["content"]))
87
+ elif assistant["type"] == "audio":
88
+ formatted_history.append((None, gr.Audio(value=assistant["content"], visible=True)))
89
+ return formatted_history
90
 
91
 
92
  with gr.Blocks() as iface:
 
100
 
101
  chatbot = gr.Chatbot(label="Chat History")
102
  msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
 
103
  state = gr.State([])
 
104
 
105
+ msg.submit(gradio_interface, inputs=[msg, grade, subject, chapter, state], outputs=[msg, state]).then(format_history, inputs=[state], outputs=[chatbot])
106
 
107
  app = gr.mount_gradio_app(app, iface, path="/")
108
 
sarvam.py CHANGED
@@ -62,3 +62,12 @@ async def speaker(text, src="hindi"):
62
  return output
63
  else:
64
  print(response.status)
 
 
 
 
 
 
 
 
 
 
62
  return output
63
  else:
64
  print(response.status)
65
+
66
+
67
+ def save_audio(audio_data):
68
+ os.makedirs("audio_files", exist_ok=True)
69
+ file_count = len(os.listdir("audio_files"))
70
+ file_path = f"audio_files/audio_{file_count + 1}.wav"
71
+ with open(file_path, "wb") as audiofile:
72
+ audiofile.write(audio_data)
73
+ return file_path