hellos commited on
Commit
43b6df2
1 Parent(s): 87f0545

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ select_model ="base" # ['tiny', 'base']
3
+ model = whisper.load_model(select_model)
4
+
5
+
6
+ import yt_dlp
7
+ import ffmpeg
8
+ import sys
9
+ import uuid
10
+ import re
11
+
12
+ def extract_video_id(url):
13
+ # Regular expression to extract the video ID from different YouTube URL formats
14
+ pattern = r"(?:youtu\.be/|youtube(?:-nocookie)?\.com/(?:embed/|v/|shorts/|watch\?v=|watch\?.+&v=))([\w-]+)"
15
+ match = re.search(pattern, url)
16
+ if match:
17
+ return match.group(1)
18
+ return None
19
+
20
+ def download_audio(Youtube_Video_Link):
21
+ video_id = extract_video_id(Youtube_Video_Link)
22
+ yt_url = f"https://www.youtube.com/watch?v={video_id}"
23
+ random_uuid = str(uuid.uuid4())[:8]
24
+ ydl_opts = {
25
+ 'format': 'bestaudio/best',
26
+ # 'outtmpl': 'output.%(ext)s',
27
+ 'postprocessors': [{
28
+ 'key': 'FFmpegExtractAudio',
29
+ 'preferredcodec': 'mp3',
30
+ }],
31
+ "outtmpl": f'{random_uuid}', # this is where you can edit how you'd like the filenames to be formatted
32
+ }
33
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
34
+ ydl.download([yt_url])
35
+ return f"{random_uuid}.mp3"
36
+
37
+ def store_path_in_json(path, json_file_path="stored_paths.json"):
38
+ # Create a dictionary with the path and timestamp
39
+ entry = {
40
+ "path": path,
41
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
42
+ }
43
+
44
+ # If the JSON file doesn't exist, create it with an empty list
45
+ if not os.path.exists(json_file_path):
46
+ with open(json_file_path, 'w') as json_file:
47
+ json.dump([], json_file)
48
+
49
+ try:
50
+ # Read existing entries from the JSON file
51
+ with open(json_file_path, 'r') as json_file:
52
+ data = json.load(json_file)
53
+ except json.decoder.JSONDecodeError as e:
54
+ print(f"Error decoding JSON file: {e}")
55
+ print(f"Content of JSON file: {json_file.read()}")
56
+ raise # Reraise the exception after printing for further analysis
57
+
58
+ # Append the new entry to the list
59
+ data.append(entry)
60
+
61
+ # Write the updated list back to the JSON file
62
+ with open(json_file_path, 'w') as json_file:
63
+ json.dump(data, json_file, indent=2)
64
+
65
+ # print(f"Path '{path}' stored in '{json_file_path}' with timestamp '{entry['timestamp']}'.")
66
+
67
+ import os
68
+ import json
69
+ from datetime import datetime, timedelta
70
+
71
+ def delete_old_files(json_filename, max_age_hours):
72
+ # Load JSON data
73
+ if os.path.exists(json_filename):
74
+ with open(json_filename, 'r') as json_file:
75
+ data = json.load(json_file)
76
+ else:
77
+ # No data in the JSON file, nothing to delete
78
+ return
79
+
80
+ # Get the current date and time
81
+ now = datetime.now()
82
+
83
+ # Loop through the entries in the JSON file
84
+ updated_data = []
85
+ for entry in data:
86
+ path = entry["path"]
87
+ timestamp_str = entry["timestamp"]
88
+ creation_date = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
89
+
90
+ # Check if the file is older than the specified max age in hours
91
+ if (now - creation_date).total_seconds() / 3600 > max_age_hours:
92
+ # Delete the file if it exists
93
+ if os.path.exists(path):
94
+ os.remove(path)
95
+
96
+ # Skip this entry in the updated data
97
+ continue
98
+
99
+ # Keep the entry in the updated data
100
+ updated_data.append(entry)
101
+
102
+ # Save the updated JSON data
103
+ with open(json_filename, 'w') as json_file:
104
+ json.dump(updated_data, json_file, indent=2)
105
+ def convert_to_text(audio_path):
106
+ delete_old_files("stored_paths.json", 1)
107
+ store_path_in_json(audio_path)
108
+ result = model.transcribe(audio_path,fp16=False)
109
+ return result["text"]
110
+ import os
111
+ def audio_to_text(youtube_link,audio_path):
112
+ if len(youtube_link)>3:
113
+ audio_file_path=download_audio(youtube_link)
114
+ audio_file_path=os.getcwd()+"/"+audio_file_path
115
+ text=convert_to_text(audio_file_path)
116
+ return text
117
+ if os.path.exists(audio_path):
118
+ text=convert_to_text(audio_path)
119
+ return text
120
+
121
+
122
+ import gradio as gr
123
+ import os
124
+
125
+
126
+ def transcribe_audio(youtube_link, audio_file):
127
+ if youtube_link:
128
+ result = audio_to_text(youtube_link, "None")
129
+ elif audio_file:
130
+ if os.path.exists(audio_file):
131
+ result = audio_to_text("None",audio_file)
132
+ else:
133
+ result = "Please provide a YouTube link or upload an audio file."
134
+
135
+ return result
136
+
137
+ iface = gr.Interface(
138
+ fn=transcribe_audio,
139
+ inputs=[
140
+ gr.Textbox(),
141
+ gr.File()
142
+ ],
143
+ outputs="text",
144
+ live=True
145
+ )
146
+
147
+ iface.launch()