tts and doc update
Browse files- App/Embedding/EmbeddingRoutes.py +6 -3
- App/Embedding/utils/Initialize.py +47 -26
- App/TTS/Schemas.py +28 -0
- App/TTS/TTSRoutes.py +27 -0
- App/TTS/utils/Podcastle.py +140 -0
- App/TTS/utils/__init__.py +0 -0
- App/app.py +2 -1
App/Embedding/EmbeddingRoutes.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from fastapi import APIRouter
|
2 |
|
3 |
from .utils.Initialize import TextSearch, IdSearch
|
4 |
from .Schemas import SearchRequest, AddDocumentRequest
|
@@ -13,8 +13,11 @@ async def create_embeddings(req: AddDocumentRequest):
|
|
13 |
|
14 |
|
15 |
@embeddigs_router.post("/search_id")
|
16 |
-
async def search_id(
|
17 |
-
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
@embeddigs_router.post("/search_text")
|
|
|
1 |
+
from fastapi import APIRouter, BackgroundTasks
|
2 |
|
3 |
from .utils.Initialize import TextSearch, IdSearch
|
4 |
from .Schemas import SearchRequest, AddDocumentRequest
|
|
|
13 |
|
14 |
|
15 |
@embeddigs_router.post("/search_id")
|
16 |
+
async def search_id(
|
17 |
+
req: SearchRequest,
|
18 |
+
background_tasks: BackgroundTasks,
|
19 |
+
):
|
20 |
+
return IdSearch(query=req.query, background_task=background_tasks)
|
21 |
|
22 |
|
23 |
@embeddigs_router.post("/search_text")
|
App/Embedding/utils/Initialize.py
CHANGED
@@ -1,52 +1,73 @@
|
|
1 |
from langchain.embeddings import HuggingFaceEmbeddings
|
2 |
from langchain.docstore.document import Document
|
3 |
from langchain.vectorstores import Pinecone
|
4 |
-
import
|
5 |
-
import
|
|
|
6 |
from .Elastic import FetchDocuments
|
7 |
|
8 |
|
9 |
-
index_name =
|
10 |
model_name = "thenlper/gte-base"
|
11 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
12 |
|
13 |
-
TMDB_API=os.environ.get(
|
14 |
|
15 |
# get api key from app.pinecone.io
|
16 |
-
PINECONE_API_KEY = os.environ.get(
|
17 |
# find your environment next to the api key in pinecone console
|
18 |
-
PINECONE_ENV = os.environ.get(
|
|
|
|
|
|
|
19 |
|
20 |
-
pinecone.init(
|
21 |
-
api_key=PINECONE_API_KEY,
|
22 |
-
environment=PINECONE_ENV
|
23 |
-
)
|
24 |
|
25 |
docsearch = Pinecone.from_existing_index(index_name, embeddings)
|
26 |
|
27 |
-
def generate_text(doc):
|
28 |
-
if doc['tv_results']:
|
29 |
-
return pprint.pformat(doc['tv_results'][0]),doc['tv_results'][0]
|
30 |
-
return pprint.pformat(doc['movie_results'][0]),doc['movie_results'][0]
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
|
34 |
-
def IdSearch(query:str):
|
35 |
-
doc=requests.get(
|
|
|
|
|
36 |
try:
|
37 |
-
text,props=generate_text(doc)
|
38 |
except Exception as e:
|
39 |
print(e)
|
40 |
return []
|
41 |
-
|
42 |
-
|
43 |
|
44 |
|
45 |
-
def TextSearch(query: str,filter=None):
|
46 |
-
docs = docsearch.similarity_search(query,k=10,filter=filter)
|
47 |
-
keys= [
|
48 |
return FetchDocuments(keys)
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
1 |
from langchain.embeddings import HuggingFaceEmbeddings
|
2 |
from langchain.docstore.document import Document
|
3 |
from langchain.vectorstores import Pinecone
|
4 |
+
from fastapi import BackgroundTasks
|
5 |
+
import os, requests
|
6 |
+
import pinecone, pprint
|
7 |
from .Elastic import FetchDocuments
|
8 |
|
9 |
|
10 |
+
index_name = "movie-recommender-fast"
|
11 |
model_name = "thenlper/gte-base"
|
12 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
13 |
|
14 |
+
TMDB_API = os.environ.get("TMDB_API")
|
15 |
|
16 |
# get api key from app.pinecone.io
|
17 |
+
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
|
18 |
# find your environment next to the api key in pinecone console
|
19 |
+
PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
|
20 |
+
|
21 |
+
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
|
22 |
+
vector_index = pinecone.Index(index_name=index_name)
|
23 |
|
|
|
|
|
|
|
|
|
24 |
|
25 |
docsearch = Pinecone.from_existing_index(index_name, embeddings)
|
26 |
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
def check_if_exists(imdb_id):
|
29 |
+
results = vector_index.query(filter={"key": {"$eq": imdb_id}}, top_k=1)
|
30 |
+
if results:
|
31 |
+
return True
|
32 |
+
else:
|
33 |
+
return False
|
34 |
+
|
35 |
+
|
36 |
+
def add_document(imdb_id, doc):
|
37 |
+
response = check_if_exists(imdb_id=imdb_id)
|
38 |
+
if response:
|
39 |
+
print("document exists")
|
40 |
+
return
|
41 |
+
text, temp_doc = doc
|
42 |
+
temp_doc["key"] = imdb_id
|
43 |
+
temp = Document(
|
44 |
+
page_content=text,
|
45 |
+
metadata=temp_doc,
|
46 |
+
)
|
47 |
+
print("document added")
|
48 |
+
docsearch.add_documents([temp])
|
49 |
+
|
50 |
+
|
51 |
+
def generate_text(doc):
|
52 |
+
if doc["tv_results"]:
|
53 |
+
return pprint.pformat(doc["tv_results"][0]), doc["tv_results"][0]
|
54 |
+
return pprint.pformat(doc["movie_results"][0]), doc["movie_results"][0]
|
55 |
|
56 |
|
57 |
+
def IdSearch(query: str, background_task: BackgroundTasks):
|
58 |
+
doc = requests.get(
|
59 |
+
f"https://api.themoviedb.org/3/find/{query}?external_source=imdb_id&language=en&api_key={TMDB_API}"
|
60 |
+
).json()
|
61 |
try:
|
62 |
+
text, props = generate_text(doc)
|
63 |
except Exception as e:
|
64 |
print(e)
|
65 |
return []
|
66 |
+
background_task.add_task(add_document, imdb_id=query, doc=(text, props))
|
67 |
+
return TextSearch(text, filter={"key": {"$ne": query}})
|
68 |
|
69 |
|
70 |
+
def TextSearch(query: str, filter=None):
|
71 |
+
docs = docsearch.similarity_search(query, k=10, filter=filter)
|
72 |
+
keys = [doc.metadata["key"] for doc in docs]
|
73 |
return FetchDocuments(keys)
|
|
|
|
|
|
|
|
App/TTS/Schemas.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel,Field
|
2 |
+
from typing import List,Optional
|
3 |
+
import uuid
|
4 |
+
|
5 |
+
class Speak(BaseModel):
|
6 |
+
paragraphId: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
7 |
+
speaker: str
|
8 |
+
text: str
|
9 |
+
voiceId: str = Field(default="c60166365edf46589657770d", alias="speaker") # Default speaker value
|
10 |
+
|
11 |
+
def __init__(self, **data):
|
12 |
+
data["text"] = data.get('text') if '<speak>' in data.get('text') else f"<speak>{data.get('text')}</speak>"
|
13 |
+
super().__init__(**data)
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
class TTSGenerateRequest(BaseModel):
|
18 |
+
paragraphs: List[Speak]
|
19 |
+
requestId: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
20 |
+
workspaceId: str =Field(default_factory=lambda: str(uuid.uuid4()))
|
21 |
+
|
22 |
+
|
23 |
+
class StatusRequest(BaseModel):
|
24 |
+
requestId: str
|
25 |
+
|
26 |
+
|
27 |
+
class GetTranscriptions(BaseModel):
|
28 |
+
userId: int
|
App/TTS/TTSRoutes.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
|
3 |
+
|
4 |
+
from .Schemas import StatusRequest, TTSGenerateRequest
|
5 |
+
from .utils.Podcastle import PodcastleAPI
|
6 |
+
import os
|
7 |
+
|
8 |
+
tts_router = APIRouter(tags=["TTS"])
|
9 |
+
data = {"username": os.environ.get("USERNAME"), "password": os.environ.get("PASSWORD")}
|
10 |
+
tts = PodcastleAPI(**data)
|
11 |
+
|
12 |
+
|
13 |
+
#
|
14 |
+
@tts_router.post("/generate_tts")
|
15 |
+
async def generate_voice(req: TTSGenerateRequest):
|
16 |
+
print("here --entered!")
|
17 |
+
return await tts.make_request(req)
|
18 |
+
|
19 |
+
|
20 |
+
@tts_router.post("/status")
|
21 |
+
async def search_id(req: StatusRequest):
|
22 |
+
return await tts.check_status(req)
|
23 |
+
|
24 |
+
|
25 |
+
# @tts_router.post("/search_text")
|
26 |
+
# async def search_text(req: SearchRequest):
|
27 |
+
# return TextSearch(query=req.query)
|
App/TTS/utils/Podcastle.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import aiohttp
|
2 |
+
import asyncio
|
3 |
+
from App.TTS.Schemas import TTSGenerateRequest,StatusRequest
|
4 |
+
from pydantic import BaseModel
|
5 |
+
|
6 |
+
class PodcastleAPI:
|
7 |
+
def __init__(self, username, password):
|
8 |
+
self.base_url = "https://podcastle.ai/api"
|
9 |
+
self.username = username
|
10 |
+
self.password = password
|
11 |
+
self.headers = {
|
12 |
+
'authority': 'podcastle.ai',
|
13 |
+
'accept': '*/*',
|
14 |
+
'accept-language': 'en-US,en;q=0.9',
|
15 |
+
'cache-control': 'no-cache',
|
16 |
+
'content-type': 'application/json',
|
17 |
+
# Add your other headers here
|
18 |
+
}
|
19 |
+
self.session = None # Initialize the session in the constructor
|
20 |
+
self.access_token = None
|
21 |
+
|
22 |
+
async def create_session(self):
|
23 |
+
self.session = aiohttp.ClientSession(headers=self.headers)
|
24 |
+
|
25 |
+
async def close_session(self):
|
26 |
+
if self.session:
|
27 |
+
await self.session.close()
|
28 |
+
|
29 |
+
async def signin(self):
|
30 |
+
url = f"{self.base_url}/auth/signin"
|
31 |
+
payload = {
|
32 |
+
"username": self.username,
|
33 |
+
"password": self.password
|
34 |
+
}
|
35 |
+
|
36 |
+
if not self.session:
|
37 |
+
await self.create_session()
|
38 |
+
|
39 |
+
async with self.session.post(url, json=payload) as response:
|
40 |
+
response_data = await response.json()
|
41 |
+
self.access_token = response_data['auth']['accessToken']
|
42 |
+
return response_data
|
43 |
+
|
44 |
+
async def make_request(self, tts_request: TTSGenerateRequest):
|
45 |
+
if not self.session:
|
46 |
+
await self.create_session()
|
47 |
+
|
48 |
+
if not self.access_token:
|
49 |
+
await self.signin()
|
50 |
+
|
51 |
+
headers_with_auth = self.headers.copy()
|
52 |
+
headers_with_auth['authorization'] = f"Bearer {self.access_token}"
|
53 |
+
|
54 |
+
url = f"{self.base_url}/speech/text-to-speech"
|
55 |
+
|
56 |
+
async with self.session.post(url, json=tts_request.dict(), headers=headers_with_auth) as response:
|
57 |
+
if response.status == 401:
|
58 |
+
# If a 401 error is encountered, sign in again to update the access token
|
59 |
+
await self.signin()
|
60 |
+
# Retry the request with the updated access token
|
61 |
+
headers_with_auth['authorization'] = f"Bearer {self.access_token}"
|
62 |
+
async with self.session.post(url, json=tts_request.dict(), headers=headers_with_auth) as retry_response:
|
63 |
+
response_text = await retry_response.json()
|
64 |
+
return response_text
|
65 |
+
else:
|
66 |
+
response_text = await response.json()
|
67 |
+
return response_text
|
68 |
+
|
69 |
+
async def check_status(self, tts_status: StatusRequest):
|
70 |
+
if not self.session:
|
71 |
+
await self.create_session()
|
72 |
+
|
73 |
+
if not self.access_token:
|
74 |
+
await self.signin()
|
75 |
+
|
76 |
+
headers_with_auth = self.headers.copy()
|
77 |
+
headers_with_auth['authorization'] = f"Bearer {self.access_token}"
|
78 |
+
|
79 |
+
url = f"{self.base_url}/speech/text-to-speech/{tts_status.requestId}"
|
80 |
+
|
81 |
+
async with self.session.get(url, headers=headers_with_auth) as response:
|
82 |
+
if response.status == 401:
|
83 |
+
# If a 401 error is encountered, sign in again to update the access token
|
84 |
+
await self.signin()
|
85 |
+
# Retry the request with the updated access token
|
86 |
+
headers_with_auth['authorization'] = f"Bearer {self.access_token}"
|
87 |
+
async with self.session.get(url, headers=headers_with_auth) as retry_response:
|
88 |
+
response_text = await retry_response.json()
|
89 |
+
return response_text
|
90 |
+
else:
|
91 |
+
response_text = await response.json()
|
92 |
+
return response_text
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
async def __aenter__(self):
|
98 |
+
if not self.session:
|
99 |
+
await self.create_session()
|
100 |
+
return self
|
101 |
+
|
102 |
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
103 |
+
await self.close_session()
|
104 |
+
|
105 |
+
# Example usage:
|
106 |
+
if __name__ == "__main__":
|
107 |
+
class Speak(BaseModel):
|
108 |
+
paragraphId: str
|
109 |
+
text: str
|
110 |
+
speaker: str
|
111 |
+
|
112 |
+
class TTSGenerateRequest(BaseModel):
|
113 |
+
paragraphs: [Speak]
|
114 |
+
requestId: str
|
115 |
+
workspaceId: str
|
116 |
+
|
117 |
+
async def main():
|
118 |
+
username = "veyivib549@gronasu.com"
|
119 |
+
password = "k7bNvgmJUda3yEG"
|
120 |
+
|
121 |
+
# Create a TTSGenerateRequest object
|
122 |
+
tts_request = TTSGenerateRequest(
|
123 |
+
paragraphs=[
|
124 |
+
Speak(
|
125 |
+
paragraphId="6f05p",
|
126 |
+
text="<speak>Hey Daniel. Are you ok?. Manchester United almost lost yesterday </speak>",
|
127 |
+
speaker="c60166365edf46589657770d"
|
128 |
+
)
|
129 |
+
],
|
130 |
+
requestId="7d6018ae-9617-4d22-879f-5e67283fa140",
|
131 |
+
workspaceId="f84fd58e-2899-4531-9f51-77c155c1e294"
|
132 |
+
)
|
133 |
+
|
134 |
+
async with PodcastleAPI(username, password) as podcastle_api:
|
135 |
+
# Make the TTS request using the TTSGenerateRequest object
|
136 |
+
response_text = await podcastle_api.make_request(tts_request)
|
137 |
+
print(response_text)
|
138 |
+
|
139 |
+
loop = asyncio.get_event_loop()
|
140 |
+
loop.run_until_complete(main())
|
App/TTS/utils/__init__.py
ADDED
File without changes
|
App/app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
|
3 |
from fastapi.middleware.gzip import GZipMiddleware
|
4 |
-
|
5 |
|
6 |
from .Embedding.EmbeddingRoutes import embeddigs_router
|
7 |
|
@@ -39,3 +39,4 @@ async def landing_page():
|
|
39 |
|
40 |
|
41 |
app.include_router(embeddigs_router)
|
|
|
|
1 |
from fastapi import FastAPI
|
2 |
|
3 |
from fastapi.middleware.gzip import GZipMiddleware
|
4 |
+
from .TTS.TTSRoutes import tts_router
|
5 |
|
6 |
from .Embedding.EmbeddingRoutes import embeddigs_router
|
7 |
|
|
|
39 |
|
40 |
|
41 |
app.include_router(embeddigs_router)
|
42 |
+
app.include_router(tts_router)
|