Keane Moraes commited on
Commit
2b58524
1 Parent(s): 1b8c496

transcription uses an executable

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. exec/yt-dlp_linux +3 -0
  3. transcription.py +11 -8
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ exec/yt-dlp_linux filter=lfs diff=lfs merge=lfs -text
exec/yt-dlp_linux ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7494af600c4389d19961b44e406472bba7c54c28dc1f62bb72bdde7181708a9a
3
+ size 29333576
transcription.py CHANGED
@@ -5,6 +5,7 @@ from pydub import AudioSegment
5
  from pydub.utils import make_chunks
6
  import pydub
7
  from pathlib import Path
 
8
 
9
  # For getting text from PDF
10
  from zipfile import ZipFile
@@ -57,7 +58,7 @@ class DownloadAudio:
57
  self.yt = YouTube(self.link)
58
  continue
59
 
60
- def download(self, pathname:str):
61
  """
62
  Download the audio from the youtube video and saves it to multiple .wav files
63
  in the specified folder. Returns a list of the paths to the .wav files.
@@ -69,13 +70,15 @@ class DownloadAudio:
69
  FINAL_WAV_PATH = f"{pathname}/{self.WAV_FILE_NAME}"
70
 
71
  if not os.path.exists(FINAL_WAV_PATH):
72
- # Download the .mp4 file
73
- audiostream = self.yt.streams.filter(only_audio=True).first()
74
- outfile_path = audiostream.download(pathname)
75
-
76
- # Convert the .mp4 file to .wav
77
- wav_file = AudioFileClip(outfile_path)
78
- wav_file.write_audiofile(FINAL_WAV_PATH, bitrate="16k", fps=16000)
 
 
79
 
80
  # Load the input .wav file
81
  audio = AudioSegment.from_wav(FINAL_WAV_PATH)
 
5
  from pydub.utils import make_chunks
6
  import pydub
7
  from pathlib import Path
8
+ import subprocess
9
 
10
  # For getting text from PDF
11
  from zipfile import ZipFile
 
58
  self.yt = YouTube(self.link)
59
  continue
60
 
61
+ def download(self, pathname:str) -> str:
62
  """
63
  Download the audio from the youtube video and saves it to multiple .wav files
64
  in the specified folder. Returns a list of the paths to the .wav files.
 
70
  FINAL_WAV_PATH = f"{pathname}/{self.WAV_FILE_NAME}"
71
 
72
  if not os.path.exists(FINAL_WAV_PATH):
73
+ print("\n\n\n DOWNLOADING AUDIO \n\n\n")
74
+ current_dir = os.getcwd()
75
+ print(current_dir)
76
+ executable_path = os.path.join(current_dir, "exec/yt-dlp_linux")
77
+ # Download the video as an audio file using youtube-dl
78
+ result = subprocess.run([executable_path, "-x", "--audio-format", "wav", "-o", FINAL_WAV_PATH, self.link])
79
+ if result.returncode != 0:
80
+ print("Failed to download audio. Retrying...")
81
+ return "FAILED"
82
 
83
  # Load the input .wav file
84
  audio = AudioSegment.from_wav(FINAL_WAV_PATH)