Update functions.py
Browse files- functions.py +36 -6
functions.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import whisper
|
2 |
import os
|
3 |
from pytube import YouTube
|
|
|
4 |
import pandas as pd
|
5 |
import plotly_express as px
|
6 |
import nltk
|
@@ -21,6 +22,7 @@ import pickle, math
|
|
21 |
import wikipedia
|
22 |
from pyvis.network import Network
|
23 |
import torch
|
|
|
24 |
from langchain.docstore.document import Document
|
25 |
from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
|
26 |
from langchain.vectorstores import FAISS
|
@@ -218,14 +220,42 @@ def inference(link, upload, _asr_model):
|
|
218 |
|
219 |
if validators.url(link):
|
220 |
|
221 |
-
|
222 |
-
|
223 |
-
audio_file = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
|
224 |
|
225 |
-
|
|
|
226 |
|
227 |
-
|
228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
|
230 |
results = _asr_model.transcribe(path, task='transcribe', language='en')
|
231 |
|
|
|
1 |
import whisper
|
2 |
import os
|
3 |
from pytube import YouTube
|
4 |
+
import openai
|
5 |
import pandas as pd
|
6 |
import plotly_express as px
|
7 |
import nltk
|
|
|
22 |
import wikipedia
|
23 |
from pyvis.network import Network
|
24 |
import torch
|
25 |
+
from pydub import AudioSegment
|
26 |
from langchain.docstore.document import Document
|
27 |
from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
|
28 |
from langchain.vectorstores import FAISS
|
|
|
220 |
|
221 |
if validators.url(link):
|
222 |
|
223 |
+
yt = YouTube(link)
|
224 |
+
title = yt.title
|
|
|
225 |
|
226 |
+
#Get audio file from YT
|
227 |
+
audio_file = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
|
228 |
|
229 |
+
#Get size of audio file
|
230 |
+
audio_size = round(os.path.getsize(path)/(1024*1024),1)
|
231 |
+
|
232 |
+
#Check if file is > 24mb, if not then use Whisper API
|
233 |
+
if audio_size <= 24:
|
234 |
+
|
235 |
+
#Use whisper API
|
236 |
+
transcript = openai.Audio.translate("whisper-1", audio_file)
|
237 |
+
|
238 |
+
else:
|
239 |
+
|
240 |
+
st.write('File size larger than 24mb, applying chunking and transcription')
|
241 |
+
|
242 |
+
# load the audio file
|
243 |
+
audio_file = AudioSegment.from_file(path, format="mp4")
|
244 |
+
|
245 |
+
# set chunk size to 24mb (in bytes)
|
246 |
+
chunk_size = 24 * 1024 * 1024
|
247 |
+
|
248 |
+
# create a directory to store the output files
|
249 |
+
if not os.path.exists("audio_output"):
|
250 |
+
os.mkdir("audio_output")
|
251 |
+
|
252 |
+
audio_chunks = []
|
253 |
+
|
254 |
+
# iterate over each chunk and export it as a separate file
|
255 |
+
for i, chunk in enumerate(audio_file[::chunk_size]):
|
256 |
+
chunk.export(f"output/chunk_{i}.mp4", format="mp4")
|
257 |
+
audio_chunks.append(f"output/chunk_{i}.mp4")
|
258 |
+
|
259 |
|
260 |
results = _asr_model.transcribe(path, task='transcribe', language='en')
|
261 |
|