Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import whisper
|
2 |
+
select_model ="base" # ['tiny', 'base']
|
3 |
+
model = whisper.load_model(select_model)
|
4 |
+
|
5 |
+
|
6 |
+
import yt_dlp
|
7 |
+
import ffmpeg
|
8 |
+
import sys
|
9 |
+
import uuid
|
10 |
+
import re
|
11 |
+
|
12 |
+
def extract_video_id(url):
|
13 |
+
# Regular expression to extract the video ID from different YouTube URL formats
|
14 |
+
pattern = r"(?:youtu\.be/|youtube(?:-nocookie)?\.com/(?:embed/|v/|shorts/|watch\?v=|watch\?.+&v=))([\w-]+)"
|
15 |
+
match = re.search(pattern, url)
|
16 |
+
if match:
|
17 |
+
return match.group(1)
|
18 |
+
return None
|
19 |
+
|
20 |
+
def download_audio(Youtube_Video_Link):
|
21 |
+
video_id = extract_video_id(Youtube_Video_Link)
|
22 |
+
yt_url = f"https://www.youtube.com/watch?v={video_id}"
|
23 |
+
random_uuid = str(uuid.uuid4())[:8]
|
24 |
+
ydl_opts = {
|
25 |
+
'format': 'bestaudio/best',
|
26 |
+
# 'outtmpl': 'output.%(ext)s',
|
27 |
+
'postprocessors': [{
|
28 |
+
'key': 'FFmpegExtractAudio',
|
29 |
+
'preferredcodec': 'mp3',
|
30 |
+
}],
|
31 |
+
"outtmpl": f'{random_uuid}', # this is where you can edit how you'd like the filenames to be formatted
|
32 |
+
}
|
33 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
34 |
+
ydl.download([yt_url])
|
35 |
+
return f"{random_uuid}.mp3"
|
36 |
+
|
37 |
+
def store_path_in_json(path, json_file_path="stored_paths.json"):
|
38 |
+
# Create a dictionary with the path and timestamp
|
39 |
+
entry = {
|
40 |
+
"path": path,
|
41 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
42 |
+
}
|
43 |
+
|
44 |
+
# If the JSON file doesn't exist, create it with an empty list
|
45 |
+
if not os.path.exists(json_file_path):
|
46 |
+
with open(json_file_path, 'w') as json_file:
|
47 |
+
json.dump([], json_file)
|
48 |
+
|
49 |
+
try:
|
50 |
+
# Read existing entries from the JSON file
|
51 |
+
with open(json_file_path, 'r') as json_file:
|
52 |
+
data = json.load(json_file)
|
53 |
+
except json.decoder.JSONDecodeError as e:
|
54 |
+
print(f"Error decoding JSON file: {e}")
|
55 |
+
print(f"Content of JSON file: {json_file.read()}")
|
56 |
+
raise # Reraise the exception after printing for further analysis
|
57 |
+
|
58 |
+
# Append the new entry to the list
|
59 |
+
data.append(entry)
|
60 |
+
|
61 |
+
# Write the updated list back to the JSON file
|
62 |
+
with open(json_file_path, 'w') as json_file:
|
63 |
+
json.dump(data, json_file, indent=2)
|
64 |
+
|
65 |
+
# print(f"Path '{path}' stored in '{json_file_path}' with timestamp '{entry['timestamp']}'.")
|
66 |
+
|
67 |
+
import os
|
68 |
+
import json
|
69 |
+
from datetime import datetime, timedelta
|
70 |
+
|
71 |
+
def delete_old_files(json_filename, max_age_hours):
|
72 |
+
# Load JSON data
|
73 |
+
if os.path.exists(json_filename):
|
74 |
+
with open(json_filename, 'r') as json_file:
|
75 |
+
data = json.load(json_file)
|
76 |
+
else:
|
77 |
+
# No data in the JSON file, nothing to delete
|
78 |
+
return
|
79 |
+
|
80 |
+
# Get the current date and time
|
81 |
+
now = datetime.now()
|
82 |
+
|
83 |
+
# Loop through the entries in the JSON file
|
84 |
+
updated_data = []
|
85 |
+
for entry in data:
|
86 |
+
path = entry["path"]
|
87 |
+
timestamp_str = entry["timestamp"]
|
88 |
+
creation_date = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
|
89 |
+
|
90 |
+
# Check if the file is older than the specified max age in hours
|
91 |
+
if (now - creation_date).total_seconds() / 3600 > max_age_hours:
|
92 |
+
# Delete the file if it exists
|
93 |
+
if os.path.exists(path):
|
94 |
+
os.remove(path)
|
95 |
+
|
96 |
+
# Skip this entry in the updated data
|
97 |
+
continue
|
98 |
+
|
99 |
+
# Keep the entry in the updated data
|
100 |
+
updated_data.append(entry)
|
101 |
+
|
102 |
+
# Save the updated JSON data
|
103 |
+
with open(json_filename, 'w') as json_file:
|
104 |
+
json.dump(updated_data, json_file, indent=2)
|
105 |
+
def convert_to_text(audio_path):
|
106 |
+
delete_old_files("stored_paths.json", 1)
|
107 |
+
store_path_in_json(audio_path)
|
108 |
+
result = model.transcribe(audio_path,fp16=False)
|
109 |
+
return result["text"]
|
110 |
+
import os
|
111 |
+
def audio_to_text(youtube_link,audio_path):
|
112 |
+
if len(youtube_link)>3:
|
113 |
+
audio_file_path=download_audio(youtube_link)
|
114 |
+
audio_file_path=os.getcwd()+"/"+audio_file_path
|
115 |
+
text=convert_to_text(audio_file_path)
|
116 |
+
return text
|
117 |
+
if os.path.exists(audio_path):
|
118 |
+
text=convert_to_text(audio_path)
|
119 |
+
return text
|
120 |
+
|
121 |
+
|
122 |
+
import gradio as gr
|
123 |
+
import os
|
124 |
+
|
125 |
+
|
126 |
+
def transcribe_audio(youtube_link, audio_file):
|
127 |
+
if youtube_link:
|
128 |
+
result = audio_to_text(youtube_link, "None")
|
129 |
+
elif audio_file:
|
130 |
+
if os.path.exists(audio_file):
|
131 |
+
result = audio_to_text("None",audio_file)
|
132 |
+
else:
|
133 |
+
result = "Please provide a YouTube link or upload an audio file."
|
134 |
+
|
135 |
+
return result
|
136 |
+
|
137 |
+
iface = gr.Interface(
|
138 |
+
fn=transcribe_audio,
|
139 |
+
inputs=[
|
140 |
+
gr.Textbox(),
|
141 |
+
gr.File()
|
142 |
+
],
|
143 |
+
outputs="text",
|
144 |
+
live=True
|
145 |
+
)
|
146 |
+
|
147 |
+
iface.launch()
|