Spaces:
Sleeping
Sleeping
miaohaiyuan
commited on
Commit
•
8c02b48
1
Parent(s):
3ea1479
deliver grognotes
Browse files- .gitignore +5 -0
- .streamlit/config.toml +2 -0
- LICENSE.md +21 -0
- app.py +502 -0
- download.py +91 -0
- example.env +1 -0
- packages.txt +2 -0
- requirements.txt +74 -0
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
venv
|
3 |
+
.venv
|
4 |
+
**.pyc
|
5 |
+
downloads/
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[server]
|
2 |
+
maxUploadSize = 25
|
LICENSE.md
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 Benjamin Klieger
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
app.py
ADDED
@@ -0,0 +1,502 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from groq import Groq
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
from io import BytesIO
|
6 |
+
from md2pdf.core import md2pdf
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from download import download_video_audio, delete_download
|
9 |
+
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
GROQ_API_KEY = os.environ.get("GROQ_API_KEY", None)
|
13 |
+
|
14 |
+
MAX_FILE_SIZE = 25 * 1024 * 1024 # 25 MB
|
15 |
+
FILE_TOO_LARGE_MESSAGE = "The audio file is too large. If you used a YouTube link, please try a shorter video clip. If you uploaded an audio file, try trimming or compressing the audio to under 25 MB."
|
16 |
+
|
17 |
+
audio_file_path = None
|
18 |
+
|
19 |
+
if 'api_key' not in st.session_state:
|
20 |
+
st.session_state.api_key = GROQ_API_KEY
|
21 |
+
|
22 |
+
if 'groq' not in st.session_state:
|
23 |
+
if GROQ_API_KEY:
|
24 |
+
st.session_state.groq = Groq()
|
25 |
+
|
26 |
+
st.set_page_config(
|
27 |
+
page_title="Groqnotes",
|
28 |
+
page_icon="🗒️",
|
29 |
+
)
|
30 |
+
|
31 |
+
class GenerationStatistics:
|
32 |
+
def __init__(self, input_time=0,output_time=0,input_tokens=0,output_tokens=0,total_time=0,model_name="llama3-8b-8192"):
|
33 |
+
self.input_time = input_time
|
34 |
+
self.output_time = output_time
|
35 |
+
self.input_tokens = input_tokens
|
36 |
+
self.output_tokens = output_tokens
|
37 |
+
self.total_time = total_time # Sum of queue, prompt (input), and completion (output) times
|
38 |
+
self.model_name = model_name
|
39 |
+
|
40 |
+
def get_input_speed(self):
|
41 |
+
"""
|
42 |
+
Tokens per second calculation for input
|
43 |
+
"""
|
44 |
+
if self.input_time != 0:
|
45 |
+
return self.input_tokens / self.input_time
|
46 |
+
else:
|
47 |
+
return 0
|
48 |
+
|
49 |
+
def get_output_speed(self):
|
50 |
+
"""
|
51 |
+
Tokens per second calculation for output
|
52 |
+
"""
|
53 |
+
if self.output_time != 0:
|
54 |
+
return self.output_tokens / self.output_time
|
55 |
+
else:
|
56 |
+
return 0
|
57 |
+
|
58 |
+
def add(self, other):
|
59 |
+
"""
|
60 |
+
Add statistics from another GenerationStatistics object to this one.
|
61 |
+
"""
|
62 |
+
if not isinstance(other, GenerationStatistics):
|
63 |
+
raise TypeError("Can only add GenerationStatistics objects")
|
64 |
+
|
65 |
+
self.input_time += other.input_time
|
66 |
+
self.output_time += other.output_time
|
67 |
+
self.input_tokens += other.input_tokens
|
68 |
+
self.output_tokens += other.output_tokens
|
69 |
+
self.total_time += other.total_time
|
70 |
+
|
71 |
+
def __str__(self):
|
72 |
+
return (f"\n## {self.get_output_speed():.2f} T/s ⚡\nRound trip time: {self.total_time:.2f}s Model: {self.model_name}\n\n"
|
73 |
+
f"| Metric | Input | Output | Total |\n"
|
74 |
+
f"|-----------------|----------------|-----------------|----------------|\n"
|
75 |
+
f"| Speed (T/s) | {self.get_input_speed():.2f} | {self.get_output_speed():.2f} | {(self.input_tokens + self.output_tokens) / self.total_time if self.total_time != 0 else 0:.2f} |\n"
|
76 |
+
f"| Tokens | {self.input_tokens} | {self.output_tokens} | {self.input_tokens + self.output_tokens} |\n"
|
77 |
+
f"| Inference Time (s) | {self.input_time:.2f} | {self.output_time:.2f} | {self.total_time:.2f} |")
|
78 |
+
|
79 |
+
class NoteSection:
|
80 |
+
def __init__(self, structure, transcript):
|
81 |
+
self.structure = structure
|
82 |
+
self.contents = {title: "" for title in self.flatten_structure(structure)}
|
83 |
+
self.placeholders = {title: st.empty() for title in self.flatten_structure(structure)}
|
84 |
+
|
85 |
+
st.markdown("## Raw transcript:")
|
86 |
+
st.markdown(transcript)
|
87 |
+
st.markdown("---")
|
88 |
+
|
89 |
+
def flatten_structure(self, structure):
|
90 |
+
sections = []
|
91 |
+
for title, content in structure.items():
|
92 |
+
sections.append(title)
|
93 |
+
if isinstance(content, dict):
|
94 |
+
sections.extend(self.flatten_structure(content))
|
95 |
+
return sections
|
96 |
+
|
97 |
+
def update_content(self, title, new_content):
|
98 |
+
try:
|
99 |
+
self.contents[title] += new_content
|
100 |
+
self.display_content(title)
|
101 |
+
except TypeError as e:
|
102 |
+
pass
|
103 |
+
|
104 |
+
def display_content(self, title):
|
105 |
+
if self.contents[title].strip():
|
106 |
+
self.placeholders[title].markdown(f"## {title}\n{self.contents[title]}")
|
107 |
+
|
108 |
+
def return_existing_contents(self, level=1) -> str:
|
109 |
+
existing_content = ""
|
110 |
+
for title, content in self.structure.items():
|
111 |
+
if self.contents[title].strip(): # Only include title if there is content
|
112 |
+
existing_content += f"{'#' * level} {title}\n{self.contents[title]}.\n\n"
|
113 |
+
if isinstance(content, dict):
|
114 |
+
existing_content += self.get_markdown_content(content, level + 1)
|
115 |
+
return existing_content
|
116 |
+
|
117 |
+
def display_structure(self, structure=None, level=1):
|
118 |
+
if structure is None:
|
119 |
+
structure = self.structure
|
120 |
+
|
121 |
+
for title, content in structure.items():
|
122 |
+
if self.contents[title].strip(): # Only display title if there is content
|
123 |
+
st.markdown(f"{'#' * level} {title}")
|
124 |
+
self.placeholders[title].markdown(self.contents[title])
|
125 |
+
if isinstance(content, dict):
|
126 |
+
self.display_structure(content, level + 1)
|
127 |
+
|
128 |
+
def display_toc(self, structure, columns, level=1, col_index=0):
|
129 |
+
for title, content in structure.items():
|
130 |
+
with columns[col_index % len(columns)]:
|
131 |
+
st.markdown(f"{' ' * (level-1) * 2}- {title}")
|
132 |
+
col_index += 1
|
133 |
+
if isinstance(content, dict):
|
134 |
+
col_index = self.display_toc(content, columns, level + 1, col_index)
|
135 |
+
return col_index
|
136 |
+
|
137 |
+
def get_markdown_content(self, structure=None, level=1):
|
138 |
+
"""
|
139 |
+
Returns the markdown styled pure string with the contents.
|
140 |
+
"""
|
141 |
+
if structure is None:
|
142 |
+
structure = self.structure
|
143 |
+
|
144 |
+
markdown_content = ""
|
145 |
+
for title, content in structure.items():
|
146 |
+
if self.contents[title].strip(): # Only include title if there is content
|
147 |
+
markdown_content += f"{'#' * level} {title}\n{self.contents[title]}.\n\n"
|
148 |
+
if isinstance(content, dict):
|
149 |
+
markdown_content += self.get_markdown_content(content, level + 1)
|
150 |
+
return markdown_content
|
151 |
+
|
152 |
+
def create_markdown_file(content: str) -> BytesIO:
|
153 |
+
"""
|
154 |
+
Create a Markdown file from the provided content.
|
155 |
+
"""
|
156 |
+
markdown_file = BytesIO()
|
157 |
+
markdown_file.write(content.encode('utf-8'))
|
158 |
+
markdown_file.seek(0)
|
159 |
+
return markdown_file
|
160 |
+
|
161 |
+
def create_pdf_file(content: str):
|
162 |
+
"""
|
163 |
+
Create a PDF file from the provided content.
|
164 |
+
"""
|
165 |
+
pdf_buffer = BytesIO()
|
166 |
+
md2pdf(pdf_buffer, md_content=content)
|
167 |
+
pdf_buffer.seek(0)
|
168 |
+
return pdf_buffer
|
169 |
+
|
170 |
+
def transcribe_audio(audio_file):
|
171 |
+
"""
|
172 |
+
Transcribes audio using Groq's Whisper API.
|
173 |
+
"""
|
174 |
+
transcription = st.session_state.groq.audio.transcriptions.create(
|
175 |
+
file=audio_file,
|
176 |
+
model="whisper-large-v3",
|
177 |
+
prompt="",
|
178 |
+
response_format="json",
|
179 |
+
language="en",
|
180 |
+
temperature=0.0
|
181 |
+
)
|
182 |
+
|
183 |
+
results = transcription.text
|
184 |
+
return results
|
185 |
+
|
186 |
+
def generate_notes_structure(transcript: str, model: str = "llama3-70b-8192"):
|
187 |
+
"""
|
188 |
+
Returns notes structure content as well as total tokens and total time for generation.
|
189 |
+
"""
|
190 |
+
|
191 |
+
shot_example = """
|
192 |
+
"Introduction": "Introduction to the AMA session, including the topic of Groq scaling architecture and the panelists",
|
193 |
+
"Panelist Introductions": "Brief introductions from Igor, Andrew, and Omar, covering their backgrounds and roles at Groq",
|
194 |
+
"Groq Scaling Architecture Overview": "High-level overview of Groq's scaling architecture, covering hardware, software, and cloud components",
|
195 |
+
"Hardware Perspective": "Igor's overview of Groq's hardware approach, using an analogy of city traffic management to explain the traditional compute approach and Groq's innovative approach",
|
196 |
+
"Traditional Compute": "Description of traditional compute approach, including asynchronous nature, queues, and poor utilization of infrastructure",
|
197 |
+
"Groq's Approach": "Description of Groq's approach, including pre-orchestrated movement of data, low latency, high energy efficiency, and high utilization of resources",
|
198 |
+
"Hardware Implementation": "Igor's explanation of the hardware implementation, including a comparison of GPU and LPU architectures"
|
199 |
+
}"""
|
200 |
+
completion = st.session_state.groq.chat.completions.create(
|
201 |
+
model=model,
|
202 |
+
messages=[
|
203 |
+
{
|
204 |
+
"role": "system",
|
205 |
+
"content": "Write in JSON format:\n\n{\"Title of section goes here\":\"Description of section goes here\",\"Title of section goes here\":\"Description of section goes here\",\"Title of section goes here\":\"Description of section goes here\"}"
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"role": "user",
|
209 |
+
"content": f"### Transcript {transcript}\n\n### Example\n\n{shot_example}### Instructions\n\nCreate a structure for comprehensive notes on the above transcribed audio. Section titles and content descriptions must be comprehensive. Quality over quantity."
|
210 |
+
}
|
211 |
+
],
|
212 |
+
temperature=0.3,
|
213 |
+
max_tokens=8000,
|
214 |
+
top_p=1,
|
215 |
+
stream=False,
|
216 |
+
response_format={"type": "json_object"},
|
217 |
+
stop=None,
|
218 |
+
)
|
219 |
+
|
220 |
+
usage = completion.usage
|
221 |
+
statistics_to_return = GenerationStatistics(input_time=usage.prompt_time, output_time=usage.completion_time, input_tokens=usage.prompt_tokens, output_tokens=usage.completion_tokens, total_time=usage.total_time, model_name=model)
|
222 |
+
|
223 |
+
return statistics_to_return, completion.choices[0].message.content
|
224 |
+
|
225 |
+
def generate_section(transcript: str, existing_notes: str, section: str, model: str = "llama3-8b-8192"):
|
226 |
+
stream = st.session_state.groq.chat.completions.create(
|
227 |
+
model=model,
|
228 |
+
messages=[
|
229 |
+
{
|
230 |
+
"role": "system",
|
231 |
+
"content": "You are an expert writer. Generate a comprehensive note for the section provided based factually on the transcript provided. Do *not* repeat any content from previous sections."
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"role": "user",
|
235 |
+
"content": f"### Transcript\n\n{transcript}\n\n### Existing Notes\n\n{existing_notes}\n\n### Instructions\n\nGenerate comprehensive notes for this section only based on the transcript: \n\n{section}"
|
236 |
+
}
|
237 |
+
],
|
238 |
+
temperature=0.3,
|
239 |
+
max_tokens=8000,
|
240 |
+
top_p=1,
|
241 |
+
stream=True,
|
242 |
+
stop=None,
|
243 |
+
)
|
244 |
+
|
245 |
+
for chunk in stream:
|
246 |
+
tokens = chunk.choices[0].delta.content
|
247 |
+
if tokens:
|
248 |
+
yield tokens
|
249 |
+
if x_groq := chunk.x_groq:
|
250 |
+
if not x_groq.usage:
|
251 |
+
continue
|
252 |
+
usage = x_groq.usage
|
253 |
+
statistics_to_return = GenerationStatistics(input_time=usage.prompt_time, output_time=usage.completion_time, input_tokens=usage.prompt_tokens, output_tokens=usage.completion_tokens, total_time=usage.total_time, model_name=model)
|
254 |
+
yield statistics_to_return
|
255 |
+
|
256 |
+
# Initialize
|
257 |
+
if 'button_disabled' not in st.session_state:
|
258 |
+
st.session_state.button_disabled = False
|
259 |
+
|
260 |
+
if 'button_text' not in st.session_state:
|
261 |
+
st.session_state.button_text = "Generate Notes"
|
262 |
+
|
263 |
+
if 'statistics_text' not in st.session_state:
|
264 |
+
st.session_state.statistics_text = ""
|
265 |
+
|
266 |
+
st.write("""
|
267 |
+
# Groqnotes: Create structured notes from audio 🗒️⚡
|
268 |
+
""")
|
269 |
+
|
270 |
+
def disable():
|
271 |
+
st.session_state.button_disabled = True
|
272 |
+
|
273 |
+
def enable():
|
274 |
+
st.session_state.button_disabled = False
|
275 |
+
|
276 |
+
def empty_st():
|
277 |
+
st.empty()
|
278 |
+
|
279 |
+
try:
|
280 |
+
with st.sidebar:
|
281 |
+
audio_files = {
|
282 |
+
"Transformers Explained by Google Cloud Tech": {
|
283 |
+
"file_path": "assets/audio/transformers_explained.m4a",
|
284 |
+
"youtube_link": "https://www.youtube.com/watch?v=SZorAJ4I-sA"
|
285 |
+
},
|
286 |
+
"The Essence of Calculus by 3Blue1Brown": {
|
287 |
+
"file_path": "assets/audio/essence_calculus.m4a",
|
288 |
+
"youtube_link": "https://www.youtube.com/watch?v=WUvTyaaNkzM"
|
289 |
+
},
|
290 |
+
"First 20 minutes of Groq's AMA": {
|
291 |
+
"file_path": "assets/audio/groq_ama_trimmed_20min.m4a",
|
292 |
+
"youtube_link": "https://www.youtube.com/watch?v=UztfweS-7MU"
|
293 |
+
}
|
294 |
+
}
|
295 |
+
|
296 |
+
st.write(f"# 🗒️ GroqNotes \n## Generate notes from audio in seconds using Groq, Whisper, and Llama3")
|
297 |
+
st.markdown(f"[Github Repository](https://github.com/bklieger/groqnotes)\n\nAs with all generative AI, content may include inaccurate or placeholder information. GroqNotes is in beta and all feedback is welcome!")
|
298 |
+
|
299 |
+
st.write(f"---")
|
300 |
+
|
301 |
+
st.write(f"# Sample Audio Files")
|
302 |
+
|
303 |
+
for audio_name, audio_info in audio_files.items():
|
304 |
+
|
305 |
+
st.write(f"### {audio_name}")
|
306 |
+
|
307 |
+
# Read audio file as binary
|
308 |
+
with open(audio_info['file_path'], 'rb') as audio_file:
|
309 |
+
audio_bytes = audio_file.read()
|
310 |
+
|
311 |
+
# Create download button
|
312 |
+
st.download_button(
|
313 |
+
label=f"Download audio",
|
314 |
+
data=audio_bytes,
|
315 |
+
file_name=audio_info['file_path'],
|
316 |
+
mime='audio/m4a'
|
317 |
+
)
|
318 |
+
|
319 |
+
st.markdown(f"[Credit Youtube Link]({audio_info['youtube_link']})")
|
320 |
+
st.write(f"\n\n")
|
321 |
+
|
322 |
+
st.write(f"---")
|
323 |
+
|
324 |
+
st.write("# Customization Settings\n🧪 These settings are experimental.\n")
|
325 |
+
st.write(f"By default, GroqNotes uses Llama3-70b for generating the notes outline and Llama3-8b for the content. This balances quality with speed and rate limit usage. You can customize these selections below.")
|
326 |
+
outline_model_options = ["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"]
|
327 |
+
outline_selected_model = st.selectbox("Outline generation:", outline_model_options)
|
328 |
+
content_model_options = ["llama3-8b-8192", "llama3-70b-8192", "mixtral-8x7b-32768", "gemma-7b-it"]
|
329 |
+
content_selected_model = st.selectbox("Content generation:", content_model_options)
|
330 |
+
|
331 |
+
|
332 |
+
# Add note about rate limits
|
333 |
+
st.info("Important: Different models have different token and rate limits which may cause runtime errors.")
|
334 |
+
|
335 |
+
|
336 |
+
if st.button('End Generation and Download Notes'):
|
337 |
+
if "notes" in st.session_state:
|
338 |
+
|
339 |
+
# Create markdown file
|
340 |
+
markdown_file = create_markdown_file(st.session_state.notes.get_markdown_content())
|
341 |
+
st.download_button(
|
342 |
+
label='Download Text',
|
343 |
+
data=markdown_file,
|
344 |
+
file_name='generated_notes.txt',
|
345 |
+
mime='text/plain'
|
346 |
+
)
|
347 |
+
|
348 |
+
# Create pdf file (styled)
|
349 |
+
pdf_file = create_pdf_file(st.session_state.notes.get_markdown_content())
|
350 |
+
st.download_button(
|
351 |
+
label='Download PDF',
|
352 |
+
data=pdf_file,
|
353 |
+
file_name='generated_notes.pdf',
|
354 |
+
mime='application/pdf'
|
355 |
+
)
|
356 |
+
st.session_state.button_disabled = False
|
357 |
+
else:
|
358 |
+
raise ValueError("Please generate content first before downloading the notes.")
|
359 |
+
|
360 |
+
input_method = st.radio("Choose input method:", ["Upload audio file", "YouTube link"])
|
361 |
+
audio_file = None
|
362 |
+
youtube_link = None
|
363 |
+
groq_input_key = None
|
364 |
+
with st.form("groqform"):
|
365 |
+
if not GROQ_API_KEY:
|
366 |
+
groq_input_key = st.text_input("Enter your Groq API Key (gsk_yA...):", "", type="password")
|
367 |
+
|
368 |
+
# Add radio button to choose between file upload and YouTube link
|
369 |
+
|
370 |
+
if input_method == "Upload audio file":
|
371 |
+
audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"]) # TODO: Add a max size
|
372 |
+
else:
|
373 |
+
youtube_link = st.text_input("Enter YouTube link:", "")
|
374 |
+
|
375 |
+
# Generate button
|
376 |
+
submitted = st.form_submit_button(st.session_state.button_text, on_click=disable, disabled=st.session_state.button_disabled)
|
377 |
+
|
378 |
+
#processing status
|
379 |
+
status_text = st.empty()
|
380 |
+
def display_status(text):
|
381 |
+
status_text.write(text)
|
382 |
+
|
383 |
+
def clear_status():
|
384 |
+
status_text.empty()
|
385 |
+
|
386 |
+
download_status_text = st.empty()
|
387 |
+
def display_download_status(text:str):
|
388 |
+
download_status_text.write(text)
|
389 |
+
|
390 |
+
def clear_download_status():
|
391 |
+
download_status_text.empty()
|
392 |
+
|
393 |
+
# Statistics
|
394 |
+
placeholder = st.empty()
|
395 |
+
def display_statistics():
|
396 |
+
with placeholder.container():
|
397 |
+
if st.session_state.statistics_text:
|
398 |
+
if "Transcribing audio in background" not in st.session_state.statistics_text:
|
399 |
+
st.markdown(st.session_state.statistics_text + "\n\n---\n") # Format with line if showing statistics
|
400 |
+
else:
|
401 |
+
st.markdown(st.session_state.statistics_text)
|
402 |
+
else:
|
403 |
+
placeholder.empty()
|
404 |
+
|
405 |
+
if submitted:
|
406 |
+
if input_method == "Upload audio file" and audio_file is None:
|
407 |
+
st.error("Please upload an audio file")
|
408 |
+
elif input_method == "YouTube link" and not youtube_link:
|
409 |
+
st.error("Please enter a YouTube link")
|
410 |
+
else:
|
411 |
+
st.session_state.button_disabled = True
|
412 |
+
# Show temporary message before transcription is generated and statistics show
|
413 |
+
|
414 |
+
audio_file_path = None
|
415 |
+
|
416 |
+
if input_method == "YouTube link":
|
417 |
+
display_status("Downloading audio from YouTube link ....")
|
418 |
+
audio_file_path = download_video_audio(youtube_link, display_download_status)
|
419 |
+
if audio_file_path is None:
|
420 |
+
st.error("Failed to download audio from YouTube link. Please try again.")
|
421 |
+
enable()
|
422 |
+
clear_status()
|
423 |
+
else:
|
424 |
+
# Read the downloaded file and create a file-like objec
|
425 |
+
display_status("Processing Youtube audio ....")
|
426 |
+
with open(audio_file_path, 'rb') as f:
|
427 |
+
file_contents = f.read()
|
428 |
+
audio_file = BytesIO(file_contents)
|
429 |
+
|
430 |
+
# Check size first to ensure will work with Whisper
|
431 |
+
if os.path.getsize(audio_file_path) > MAX_FILE_SIZE:
|
432 |
+
raise ValueError(FILE_TOO_LARGE_MESSAGE)
|
433 |
+
|
434 |
+
audio_file.name = os.path.basename(audio_file_path) # Set the file name
|
435 |
+
delete_download(audio_file_path)
|
436 |
+
clear_download_status()
|
437 |
+
|
438 |
+
if not GROQ_API_KEY:
|
439 |
+
st.session_state.groq = Groq(api_key=groq_input_key)
|
440 |
+
|
441 |
+
display_status("Transcribing audio in background....")
|
442 |
+
transcription_text = transcribe_audio(audio_file)
|
443 |
+
|
444 |
+
display_statistics()
|
445 |
+
|
446 |
+
|
447 |
+
display_status("Generating notes structure....")
|
448 |
+
large_model_generation_statistics, notes_structure = generate_notes_structure(transcription_text, model=str(outline_selected_model))
|
449 |
+
print("Structure: ",notes_structure)
|
450 |
+
|
451 |
+
display_status("Generating notes ...")
|
452 |
+
total_generation_statistics = GenerationStatistics(model_name="llama3-8b-8192")
|
453 |
+
clear_status()
|
454 |
+
|
455 |
+
|
456 |
+
try:
|
457 |
+
notes_structure_json = json.loads(notes_structure)
|
458 |
+
notes = NoteSection(structure=notes_structure_json,transcript=transcription_text)
|
459 |
+
|
460 |
+
if 'notes' not in st.session_state:
|
461 |
+
st.session_state.notes = notes
|
462 |
+
|
463 |
+
st.session_state.notes.display_structure()
|
464 |
+
|
465 |
+
def stream_section_content(sections):
|
466 |
+
for title, content in sections.items():
|
467 |
+
if isinstance(content, str):
|
468 |
+
content_stream = generate_section(transcript=transcription_text, existing_notes=notes.return_existing_contents(), section=(title + ": " + content),model=str(content_selected_model))
|
469 |
+
for chunk in content_stream:
|
470 |
+
# Check if GenerationStatistics data is returned instead of str tokens
|
471 |
+
chunk_data = chunk
|
472 |
+
if type(chunk_data) == GenerationStatistics:
|
473 |
+
total_generation_statistics.add(chunk_data)
|
474 |
+
|
475 |
+
st.session_state.statistics_text = str(total_generation_statistics)
|
476 |
+
display_statistics()
|
477 |
+
elif chunk is not None:
|
478 |
+
st.session_state.notes.update_content(title, chunk)
|
479 |
+
elif isinstance(content, dict):
|
480 |
+
stream_section_content(content)
|
481 |
+
|
482 |
+
stream_section_content(notes_structure_json)
|
483 |
+
except json.JSONDecodeError:
|
484 |
+
st.error("Failed to decode the notes structure. Please try again.")
|
485 |
+
|
486 |
+
enable()
|
487 |
+
|
488 |
+
except Exception as e:
|
489 |
+
st.session_state.button_disabled = False
|
490 |
+
|
491 |
+
if hasattr(e, 'status_code') and e.status_code == 413:
|
492 |
+
# In the future, this limitation will be fixed as Groqnotes will automatically split the audio file and transcribe each part.
|
493 |
+
st.error(FILE_TOO_LARGE_MESSAGE)
|
494 |
+
else:
|
495 |
+
st.error(e)
|
496 |
+
|
497 |
+
if st.button("Clear"):
|
498 |
+
st.rerun()
|
499 |
+
|
500 |
+
# Remove audio after exception to prevent data storage leak
|
501 |
+
if audio_file_path is not None:
|
502 |
+
delete_download(audio_file_path)
|
download.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import unicode_literals
|
2 |
+
import yt_dlp as youtube_dl
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
import os
|
6 |
+
import shutil
|
7 |
+
|
8 |
+
max_retries = 3
|
9 |
+
delay = 2
|
10 |
+
|
11 |
+
|
12 |
+
class MyLogger(object):
|
13 |
+
def __init__(self, external_logger=lambda x: None):
|
14 |
+
self.external_logger = external_logger
|
15 |
+
|
16 |
+
def debug(self, msg):
|
17 |
+
print("[debug]: ", msg)
|
18 |
+
self.external_logger(msg)
|
19 |
+
|
20 |
+
def warning(self, msg):
|
21 |
+
print("[warning]: ", msg)
|
22 |
+
|
23 |
+
def error(self, msg):
|
24 |
+
print("[error]: ", msg)
|
25 |
+
|
26 |
+
|
27 |
+
def my_hook(d):
|
28 |
+
print("hook", d["status"])
|
29 |
+
if d["status"] == "finished":
|
30 |
+
print("Done downloading, now converting ...")
|
31 |
+
|
32 |
+
|
33 |
+
def get_ydl_opts(external_logger=lambda x: None):
|
34 |
+
return {
|
35 |
+
"format": "bestaudio/best",
|
36 |
+
"postprocessors": [
|
37 |
+
{
|
38 |
+
"key": "FFmpegExtractAudio",
|
39 |
+
"preferredcodec": "mp3",
|
40 |
+
"preferredquality": "192", # set the preferred bitrate to 192kbps
|
41 |
+
}
|
42 |
+
],
|
43 |
+
"logger": MyLogger(external_logger),
|
44 |
+
"outtmpl": "./downloads/audio/%(title)s.%(ext)s", # Set the output filename directly
|
45 |
+
"progress_hooks": [my_hook],
|
46 |
+
}
|
47 |
+
|
48 |
+
|
49 |
+
def download_video_audio(url, external_logger=lambda x: None):
|
50 |
+
retries = 0
|
51 |
+
while retries < max_retries:
|
52 |
+
try:
|
53 |
+
ydl_opts = get_ydl_opts(external_logger)
|
54 |
+
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
55 |
+
print("Going to download ", url)
|
56 |
+
info = ydl.extract_info(url, download=False)
|
57 |
+
filename = ydl.prepare_filename(info)
|
58 |
+
res = ydl.download([url])
|
59 |
+
print("youtube-dl result :", res)
|
60 |
+
mp3_filename = os.path.splitext(filename)[0] + '.mp3'
|
61 |
+
print('mp3 file name - ', mp3_filename)
|
62 |
+
return mp3_filename
|
63 |
+
except Exception as e:
|
64 |
+
retries += 1
|
65 |
+
print(
|
66 |
+
f"An error occurred during downloading (Attempt {retries}/{max_retries}):",
|
67 |
+
str(e),
|
68 |
+
)
|
69 |
+
if retries >= max_retries:
|
70 |
+
raise e
|
71 |
+
time.sleep(delay)
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
def delete_download(path):
|
76 |
+
try:
|
77 |
+
if os.path.isfile(path):
|
78 |
+
os.remove(path)
|
79 |
+
print(f"File {path} has been deleted.")
|
80 |
+
elif os.path.isdir(path):
|
81 |
+
shutil.rmtree(path)
|
82 |
+
print(f"Directory {path} and its contents have been deleted.")
|
83 |
+
else:
|
84 |
+
print(f"The path {path} is neither a file nor a directory.")
|
85 |
+
except PermissionError:
|
86 |
+
print(f"Permission denied: Unable to delete {path}.")
|
87 |
+
except FileNotFoundError:
|
88 |
+
print(f"File or directory not found: {path}")
|
89 |
+
except Exception as e:
|
90 |
+
print(f"An error occurred while trying to delete {path}: {str(e)}")
|
91 |
+
|
example.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
GROQ_API_KEY=gsk_yA...
|
packages.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
weasyprint
|
2 |
+
ffmpeg
|
requirements.txt
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==5.3.0
|
2 |
+
annotated-types==0.6.0
|
3 |
+
anyio==4.3.0
|
4 |
+
attrs==23.2.0
|
5 |
+
blinker==1.8.2
|
6 |
+
Brotli==1.1.0
|
7 |
+
cachetools==5.3.3
|
8 |
+
certifi==2024.2.2
|
9 |
+
cffi==1.16.0
|
10 |
+
charset-normalizer==3.3.2
|
11 |
+
click==8.1.7
|
12 |
+
cssselect2==0.7.0
|
13 |
+
defusedxml==0.7.1
|
14 |
+
distro==1.9.0
|
15 |
+
docopt==0.6.2
|
16 |
+
exceptiongroup==1.2.1
|
17 |
+
fonttools==4.51.0
|
18 |
+
fpdf2==2.7.9
|
19 |
+
gitdb==4.0.11
|
20 |
+
GitPython==3.1.43
|
21 |
+
groq==0.6.0
|
22 |
+
h11==0.14.0
|
23 |
+
html5lib==1.1
|
24 |
+
httpcore==1.0.5
|
25 |
+
httpx==0.27.0
|
26 |
+
idna==3.7
|
27 |
+
Jinja2==3.1.4
|
28 |
+
jsonschema==4.22.0
|
29 |
+
jsonschema-specifications==2023.12.1
|
30 |
+
markdown-it-py==3.0.0
|
31 |
+
markdown2==2.4.13
|
32 |
+
MarkupSafe==2.1.5
|
33 |
+
md2pdf==1.0.1
|
34 |
+
mdurl==0.1.2
|
35 |
+
mutagen==1.47.0
|
36 |
+
numpy==1.26.4
|
37 |
+
packaging==24.0
|
38 |
+
pandas==2.2.2
|
39 |
+
pillow==10.3.0
|
40 |
+
protobuf==4.25.3
|
41 |
+
pyarrow==16.1.0
|
42 |
+
pycparser==2.22
|
43 |
+
pycryptodomex==3.20.0
|
44 |
+
pydantic==2.7.1
|
45 |
+
pydantic_core==2.18.2
|
46 |
+
pydeck==0.9.1
|
47 |
+
pydyf==0.10.0
|
48 |
+
Pygments==2.18.0
|
49 |
+
pyphen==0.15.0
|
50 |
+
python-dateutil==2.9.0.post0
|
51 |
+
python-dotenv==1.0.1
|
52 |
+
pytz==2024.1
|
53 |
+
referencing==0.35.1
|
54 |
+
requests==2.32.3
|
55 |
+
rich==13.7.1
|
56 |
+
rpds-py==0.18.1
|
57 |
+
six==1.16.0
|
58 |
+
smmap==5.0.1
|
59 |
+
sniffio==1.3.1
|
60 |
+
streamlit==1.36.0
|
61 |
+
tenacity==8.3.0
|
62 |
+
tinycss2==1.3.0
|
63 |
+
toml==0.10.2
|
64 |
+
toolz==0.12.1
|
65 |
+
tornado==6.4
|
66 |
+
typing_extensions==4.11.0
|
67 |
+
tzdata==2024.1
|
68 |
+
urllib3==2.2.1
|
69 |
+
watchdog==4.0.1
|
70 |
+
weasyprint==62.3
|
71 |
+
webencodings==0.5.1
|
72 |
+
websockets==12.0
|
73 |
+
yt-dlp @ https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz
|
74 |
+
zopfli==0.2.3
|