pvanand commited on
Commit
992207f
·
verified ·
1 Parent(s): 21d14b3

Create speech_api.py

Browse files
Files changed (1) hide show
  1. speech_api.py +65 -0
speech_api.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, File, UploadFile, HTTPException
2
+ from fastapi.responses import StreamingResponse
3
+ from pydantic import BaseModel
4
+ from ai4b import BhashiniClient
5
+ from fast_langdetect import detect
6
+ import io
7
+ import base64
8
+
9
+ router = APIRouter()
10
+
11
+ ULCA_USER_ID = os.getenv("ULCA_USER_ID")
12
+ ULCA_API_KEY = os.geteenv("ULCA_API_KEY")
13
+
14
+ client = BhashiniClient(user_id=USER_ID, api_key=ULCA_API_KEY)
15
+
16
+ class TTSRequest(BaseModel):
17
+ text: str
18
+ gender: str = "female"
19
+
20
+ SUPPORTED_LANGUAGES = {'pa', 'mr', 'bn', 'en', 'as', 'or', 'ta', 'te', 'kn', 'gu', 'hi', 'ml'}
21
+
22
+ def detect_language(text):
23
+ text = text.replace("\n", " ")
24
+ try:
25
+ result = detect(text, low_memory=False)
26
+ detected_lang = result['lang']
27
+ if detected_lang in SUPPORTED_LANGUAGES:
28
+ return detected_lang
29
+ except:
30
+ pass
31
+
32
+ if any('\u0980' <= char <= '\u09FF' for char in text):
33
+ return 'brx'
34
+ elif any('\uABC0' <= char <= '\uABFF' for char in text):
35
+ return 'mni'
36
+
37
+ return 'en'
38
+
39
+ @router.post("/tts")
40
+ async def text_to_speech(request: TTSRequest):
41
+ try:
42
+ detected_language = detect_language(request.text)
43
+
44
+ tts_result = client.tts(
45
+ request.text,
46
+ source_language=detected_language,
47
+ gender=request.gender
48
+ )
49
+
50
+ audio_base64 = tts_result['pipelineResponse'][0]['audio'][0]['audioContent']
51
+ audio_data = base64.b64decode(audio_base64)
52
+
53
+ return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
54
+ except Exception as e:
55
+ raise HTTPException(status_code=500, detail=str(e))
56
+
57
+ @router.post("/asr")
58
+ async def speech_to_text(file: UploadFile = File(...), source_language: str = "ml"):
59
+ try:
60
+ audio_content = await file.read()
61
+ asr_result = client.asr(audio_content, source_language=source_language)
62
+
63
+ return {"transcription": asr_result}
64
+ except Exception as e:
65
+ raise HTTPException(status_code=500, detail=str(e))