Spaces:
Running
Running
Upload 3 files
Browse files- app.py +34 -0
- deepgram_transcribe.py +129 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from pathlib import Path
|
3 |
+
from deepgram_transcribe import process
|
4 |
+
import os
|
5 |
+
|
6 |
+
def upload_file(files):
|
7 |
+
file_paths = [file.name for file in files]
|
8 |
+
return file_paths
|
9 |
+
|
10 |
+
def process_submit(title, file, upload_button, progress=gr.Progress()):
|
11 |
+
progress(0, desc="Starting...")
|
12 |
+
zip_folder_path = process(file, progress)
|
13 |
+
return zip_folder_path
|
14 |
+
|
15 |
+
track_title = gr.Markdown(
|
16 |
+
"""
|
17 |
+
You can track your progress here
|
18 |
+
""")
|
19 |
+
|
20 |
+
file_output = gr.File()
|
21 |
+
upload_button = gr.UploadButton("Click to upload a file", file_types=["audio","video"], file_count="multiple")
|
22 |
+
|
23 |
+
title = gr.Markdown(
|
24 |
+
"""
|
25 |
+
# Playground
|
26 |
+
Upload your audio file here.
|
27 |
+
""")
|
28 |
+
|
29 |
+
with gr.Interface(fn=process_submit, inputs=[title, file_output, upload_button], outputs="file", allow_flagging="never") as demo:
|
30 |
+
upload_button.upload(upload_file, upload_button, file_output)
|
31 |
+
|
32 |
+
demo.launch()
|
33 |
+
|
34 |
+
|
deepgram_transcribe.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# this is a script that transcibe downloaded youtube video using deepgram
|
2 |
+
# the audio should be cleaned with UVR5 first, so the file is flac
|
3 |
+
# it will upload the full length of interview or podcast to deepgram
|
4 |
+
# and will return the speaker id. User must manually listen audio clip to find out which speaker is wanted
|
5 |
+
# discard remaining speakers and short length audio
|
6 |
+
#
|
7 |
+
|
8 |
+
import os
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from pydub import AudioSegment
|
11 |
+
import math
|
12 |
+
from os.path import join
|
13 |
+
import shutil
|
14 |
+
|
15 |
+
from deepgram import (
|
16 |
+
DeepgramClient,
|
17 |
+
PrerecordedOptions,
|
18 |
+
FileSource,
|
19 |
+
)
|
20 |
+
|
21 |
+
def process(audio_file, progress):
|
22 |
+
load_dotenv("myenv-variable.env")
|
23 |
+
|
24 |
+
# Path to the audio file
|
25 |
+
AUDIO_FILE = audio_file #audio name
|
26 |
+
TAGS = "vogue" # youtube source, for categorization
|
27 |
+
|
28 |
+
API_KEY = os.getenv('API_DEEPGRAM')
|
29 |
+
original_parent_folder = os.getcwd()
|
30 |
+
print(original_parent_folder)
|
31 |
+
# start_index = original_parent_folder.find("file=")
|
32 |
+
|
33 |
+
speaker_folder = join(original_parent_folder, "output")
|
34 |
+
|
35 |
+
if not os.path.isdir(speaker_folder):
|
36 |
+
os.mkdir(speaker_folder)
|
37 |
+
|
38 |
+
deepgram = DeepgramClient(API_KEY)
|
39 |
+
|
40 |
+
with open(AUDIO_FILE, "rb") as file:
|
41 |
+
buffer_data = file.read()
|
42 |
+
|
43 |
+
payload: FileSource = {
|
44 |
+
"buffer": buffer_data,
|
45 |
+
}
|
46 |
+
|
47 |
+
#STEP 2: Configure Deepgram options for audio analysis
|
48 |
+
options = PrerecordedOptions(
|
49 |
+
model="nova-2",
|
50 |
+
smart_format=True,
|
51 |
+
filler_words=True,
|
52 |
+
diarize=True
|
53 |
+
)
|
54 |
+
|
55 |
+
progress(0.20)
|
56 |
+
|
57 |
+
try:
|
58 |
+
response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options)
|
59 |
+
except Exception as e:
|
60 |
+
print(e)
|
61 |
+
|
62 |
+
progress(0.30)
|
63 |
+
|
64 |
+
audio = AudioSegment.from_file(AUDIO_FILE)
|
65 |
+
data = response
|
66 |
+
|
67 |
+
paragraphs = data['results']['channels'][0]['alternatives'][0]['paragraphs']['paragraphs']
|
68 |
+
|
69 |
+
csv_data = [["filename", "speaker", "text", "start_time", "end_time", "duration"]]
|
70 |
+
|
71 |
+
i=1
|
72 |
+
|
73 |
+
progress(0.40)
|
74 |
+
|
75 |
+
for paragraph in progress.tqdm(paragraphs, desc="Generating..."):
|
76 |
+
sentences = paragraph['sentences']
|
77 |
+
for text in sentences:
|
78 |
+
|
79 |
+
# convert the start and end time of the sentence to ms, add +- 5ms buffer to it
|
80 |
+
start_time_ms = math.floor(text['start']*1000)-5
|
81 |
+
end_time_ms = math.ceil(text['end']*1000)+5
|
82 |
+
duration_s = round(text['end']-text['start'],3)
|
83 |
+
duration_ms = str(end_time_ms-start_time_ms).zfill(6)
|
84 |
+
speaker_id = paragraph['speaker']
|
85 |
+
|
86 |
+
folder_path = join(speaker_folder, "Speaker_"+str(speaker_id))
|
87 |
+
if not os.path.isdir(folder_path):
|
88 |
+
os.mkdir(folder_path)
|
89 |
+
|
90 |
+
if speaker_id == 10:
|
91 |
+
speaker_id = "Tayr"
|
92 |
+
|
93 |
+
|
94 |
+
# Slice the audio segment
|
95 |
+
segment = audio[start_time_ms:end_time_ms]
|
96 |
+
|
97 |
+
# Generate file name
|
98 |
+
file_name = join("wavs",f"{TAGS}_Speaker_{speaker_id}_i{str(i).zfill(3)}_d{duration_ms}.wav")
|
99 |
+
|
100 |
+
|
101 |
+
# Export the segment to temp folder
|
102 |
+
temp_folder = join(folder_path,f"{TAGS}_Speaker_{speaker_id}_i{str(i).zfill(3)}_d{duration_ms}.wav")
|
103 |
+
segment.export(temp_folder, format="wav")
|
104 |
+
|
105 |
+
# Add data to CSV list
|
106 |
+
csv_data.append([file_name, speaker_id, text['text'], start_time_ms, end_time_ms, duration_s])
|
107 |
+
|
108 |
+
i += 1
|
109 |
+
|
110 |
+
|
111 |
+
# Specify the filename
|
112 |
+
csv_filename = join(speaker_folder,f"{TAGS}_output.txt")
|
113 |
+
|
114 |
+
with open(csv_filename, 'w') as file:
|
115 |
+
# Iterate over each row in the data
|
116 |
+
for row in csv_data:
|
117 |
+
# Create a string where each field is separated by a '|'
|
118 |
+
row_string = '|'.join(str(item) for item in row)
|
119 |
+
# Write the string to the file, followed by a newline character
|
120 |
+
file.write(row_string + '\n')
|
121 |
+
|
122 |
+
progress(0.90)
|
123 |
+
|
124 |
+
shutil.make_archive("output", 'zip', speaker_folder)
|
125 |
+
print(f"Data written to {csv_filename}")
|
126 |
+
|
127 |
+
progress(1.00)
|
128 |
+
return "output.zip"
|
129 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
python-dotenv
|
2 |
+
deepgram-sdk==3.2.6
|