Adarsh Shirawalmath commited on
Commit
e430acc
·
1 Parent(s): bd8f5fd
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. app.py +176 -0
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.responses import StreamingResponse
3
+ from pydantic import BaseModel, HttpUrl
4
+ import os
5
+ import logging
6
+ import pytubefix
7
+ from openai import OpenAI
8
+ from deepgram import Deepgram
9
+ import asyncio
10
+ import json
11
+ import io
12
+ import google.generativeai as genai
13
+ import time
14
+ from collections import deque
15
+ import uvicorn
16
+
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
19
+
20
+ app = FastAPI()
21
+
22
+ DEEPGRAM_API_KEY = "3aaf03ba25b82a4be7deb0a33968fa47a79536be"
23
+ deepgram = Deepgram(DEEPGRAM_API_KEY)
24
+
25
+ OPENAI_API_KEY = "sk-proj-ZxVo1ECb1m0Pi6EZBvSnT3BlbkFJOYcbeRfqshRSApddPkdU"
26
+ openai_client = OpenAI(api_key=OPENAI_API_KEY)
27
+
28
+ GOOGLE_API_KEY = "AIzaSyCFm0LD5G-57mC4zVPDuteCq7XlFbqqQRo"
29
+ genai.configure(api_key=GOOGLE_API_KEY)
30
+
31
+ class VideoURL(BaseModel):
32
+ url: HttpUrl
33
+ summary_length: str
34
+
35
+ class ChatMessage(BaseModel):
36
+ speaker: str
37
+ message: str
38
+ transcript: str
39
+
40
+ RATE_LIMIT = 15
41
+ RATE_WINDOW = 60
42
+ request_timestamps = deque()
43
+
44
+ def is_rate_limited():
45
+ current_time = time.time()
46
+ while request_timestamps and current_time - request_timestamps[0] > RATE_WINDOW:
47
+ request_timestamps.popleft()
48
+ return len(request_timestamps) >= RATE_LIMIT
49
+
50
+ async def transcribe_audio(audio_file):
51
+ try:
52
+ with open(audio_file, 'rb') as audio:
53
+ source = {'buffer': audio, 'mimetype': 'audio/mp3'}
54
+ options = {"diarize": True, "punctuate": True, "paragraphs": True, "model": 'general', "tier": 'enhanced'}
55
+ response = await deepgram.transcription.prerecorded(source, options)
56
+ return response
57
+ except Exception as e:
58
+ logger.error(f"Error transcribing audio: {str(e)}")
59
+ return None
60
+
61
+ def generate_summary(text, video_description, summary_length):
62
+ if summary_length == "100 words - bullet points":
63
+ prompt = f"Summarize the following podcast in about 100 words using bullet points. Focus only on the main content and key points discussed in the podcast. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text}"
64
+ elif summary_length == "250 words - TL;DR":
65
+ prompt = f"Provide a TL;DR summary of the following podcast in about 250 words. Concentrate on the core content and main ideas presented in the podcast. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text}"
66
+ else: # 500 words - paragraph
67
+ prompt = f"Summarize the following podcast in about 500 words using paragraphs. Emphasize the primary topics and key discussions from the podcast. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text}"
68
+
69
+ if not is_rate_limited():
70
+ try:
71
+ model = genai.GenerativeModel('gemini-1.5-flash')
72
+ response = model.generate_content(prompt)
73
+ request_timestamps.append(time.time())
74
+ return response.text
75
+ except Exception as e:
76
+ logger.error(f"Error with Gemini model: {str(e)}. Falling back to GPT-4O-mini.")
77
+ else:
78
+ logger.info("Gemini rate limit reached. Falling back to GPT-4O-mini.")
79
+
80
+ response = openai_client.chat.completions.create(
81
+ model="gpt-4o-mini",
82
+ messages=[
83
+ {"role": "system", "content": "You are a helpful assistant that summarizes podcasts concisely and accurately, focusing on the main content and key points discussed."},
84
+ {"role": "user", "content": prompt}
85
+ ]
86
+ )
87
+ return response.choices[0].message.content.strip()
88
+
89
+ def generate_quiz(text, video_description):
90
+ prompt = f"Create a quiz with 10 multiple-choice questions based on the following podcast. Each question should have 4 options (A, B, C, D) with only one correct answer. Focus on the main content and key points discussed in the podcast. Format the output as a JSON array of objects, where each object represents a question with 'question', 'choices', and 'correct_answer' keys. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text[:4000]}"
91
+
92
+ response = openai_client.chat.completions.create(
93
+ model="gpt-4o-mini",
94
+ messages=[
95
+ {"role": "system", "content": "You are an expert at creating engaging and informative quizzes based on podcast content, focusing on the main topics and key points discussed. Output your response as a valid JSON array."},
96
+ {"role": "user", "content": prompt}
97
+ ]
98
+ )
99
+
100
+ try:
101
+ quiz_data = json.loads(response.choices[0].message.content.strip())
102
+ return quiz_data
103
+ except json.JSONDecodeError as e:
104
+ logger.error(f"Error parsing quiz data: {str(e)}")
105
+ logger.error(f"Raw response: {response.choices[0].message.content}")
106
+ return []
107
+
108
+ @app.post("/transcribe")
109
+ async def transcribe_video(video: VideoURL):
110
+ try:
111
+ yt = pytubefix.YouTube(str(video.url), use_oauth=True, allow_oauth_cache=True)
112
+ audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
113
+ if not audio_stream:
114
+ raise HTTPException(status_code=400, detail="No audio stream found for this video")
115
+
116
+ video_description = yt.description
117
+
118
+ audio_file = audio_stream.download(output_path="temp")
119
+ audio_file_mp3 = audio_file + ".mp3"
120
+ os.rename(audio_file, audio_file_mp3)
121
+
122
+ transcript = await transcribe_audio(audio_file_mp3)
123
+
124
+ if not transcript:
125
+ raise HTTPException(status_code=500, detail="Transcription failed")
126
+
127
+ full_text = transcript['results']['channels'][0]['alternatives'][0]['transcript']
128
+
129
+ summary = generate_summary(full_text, video_description, video.summary_length)
130
+ quiz = generate_quiz(full_text, video_description)
131
+
132
+ result = {
133
+ "transcription": full_text,
134
+ "summary": summary,
135
+ "video_description": video_description,
136
+ "quiz": quiz,
137
+ "detailed_transcript": transcript
138
+ }
139
+
140
+ os.remove(audio_file_mp3)
141
+
142
+ return result
143
+
144
+ except Exception as e:
145
+ logger.error(f"Error processing video: {str(e)}")
146
+ logger.exception("Full traceback:")
147
+ raise HTTPException(status_code=500, detail=str(e))
148
+
149
+ @app.post("/generate_audio_summary")
150
+ async def generate_audio_summary(summary: str):
151
+ response = openai_client.audio.speech.create(
152
+ model="tts-1",
153
+ voice="alloy",
154
+ input=summary
155
+ )
156
+
157
+ audio_data = io.BytesIO(response.content)
158
+
159
+ return StreamingResponse(audio_data, media_type="audio/mp3")
160
+
161
+ @app.post("/chat")
162
+ async def chat_with_personality(chat_message: ChatMessage):
163
+ prompt = f"You are roleplaying as {chat_message.speaker}, a podcast guest. Respond to the user's message in character, based on the content of the podcast. Here's the full transcript for context: {chat_message.transcript[:2000]}"
164
+
165
+ response = openai_client.chat.completions.create(
166
+ model="gpt-4o",
167
+ messages=[
168
+ {"role": "system", "content": prompt},
169
+ {"role": "user", "content": chat_message.message}
170
+ ]
171
+ )
172
+
173
+ return {"response": response.choices[0].message.content.strip()}
174
+
175
+ if __name__ == "__main__":
176
+ uvicorn.run("main:app", host="0.0.0.0", port=int(os.getenv("PORT", 8000)))
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ pydantic
3
+ pytubefix
4
+ openai
5
+ deepgram-sdk
6
+ google-generativeai
7
+ uvicorn
8
+ gunicorn