Spaces:

hellos
/

Speech-to-Text

Runtime error

App Files Files Community

hellos commited on Feb 12

Commit

43b6df2

•

1 Parent(s): 87f0545

Create app.py

Browse files

Files changed (1) hide show

app.py +147 -0

app.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import whisper
+select_model ="base" # ['tiny', 'base']
+model = whisper.load_model(select_model)
+import yt_dlp
+import ffmpeg
+import sys
+import uuid
+import re
+def extract_video_id(url):
+    # Regular expression to extract the video ID from different YouTube URL formats
+    pattern = r"(?:youtu\.be/|youtube(?:-nocookie)?\.com/(?:embed/|v/|shorts/|watch\?v=|watch\?.+&v=))([\w-]+)"
+    match = re.search(pattern, url)
+    if match:
+        return match.group(1)
+    return None
+def download_audio(Youtube_Video_Link):
+  video_id = extract_video_id(Youtube_Video_Link)
+  yt_url = f"https://www.youtube.com/watch?v={video_id}"
+  random_uuid = str(uuid.uuid4())[:8]
+  ydl_opts = {
+    'format': 'bestaudio/best',
+  #    'outtmpl': 'output.%(ext)s',
+    'postprocessors': [{
+        'key': 'FFmpegExtractAudio',
+        'preferredcodec': 'mp3',
+    }],
+    "outtmpl": f'{random_uuid}',  # this is where you can edit how you'd like the filenames to be formatted
+  }
+  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+    ydl.download([yt_url])
+  return f"{random_uuid}.mp3"
+def store_path_in_json(path, json_file_path="stored_paths.json"):
+    # Create a dictionary with the path and timestamp
+    entry = {
+        "path": path,
+        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    }
+    # If the JSON file doesn't exist, create it with an empty list
+    if not os.path.exists(json_file_path):
+        with open(json_file_path, 'w') as json_file:
+            json.dump([], json_file)
+    try:
+        # Read existing entries from the JSON file
+        with open(json_file_path, 'r') as json_file:
+            data = json.load(json_file)
+    except json.decoder.JSONDecodeError as e:
+        print(f"Error decoding JSON file: {e}")
+        print(f"Content of JSON file: {json_file.read()}")
+        raise  # Reraise the exception after printing for further analysis
+    # Append the new entry to the list
+    data.append(entry)
+    # Write the updated list back to the JSON file
+    with open(json_file_path, 'w') as json_file:
+        json.dump(data, json_file, indent=2)
+    # print(f"Path '{path}' stored in '{json_file_path}' with timestamp '{entry['timestamp']}'.")
+import os
+import json
+from datetime import datetime, timedelta
+def delete_old_files(json_filename, max_age_hours):
+    # Load JSON data
+    if os.path.exists(json_filename):
+        with open(json_filename, 'r') as json_file:
+            data = json.load(json_file)
+    else:
+        # No data in the JSON file, nothing to delete
+        return
+    # Get the current date and time
+    now = datetime.now()
+    # Loop through the entries in the JSON file
+    updated_data = []
+    for entry in data:
+        path = entry["path"]
+        timestamp_str = entry["timestamp"]
+        creation_date = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
+        # Check if the file is older than the specified max age in hours
+        if (now - creation_date).total_seconds() / 3600 > max_age_hours:
+            # Delete the file if it exists
+            if os.path.exists(path):
+                os.remove(path)
+            # Skip this entry in the updated data
+            continue
+        # Keep the entry in the updated data
+        updated_data.append(entry)
+    # Save the updated JSON data
+    with open(json_filename, 'w') as json_file:
+        json.dump(updated_data, json_file, indent=2)
+def convert_to_text(audio_path):
+  delete_old_files("stored_paths.json", 1)
+  store_path_in_json(audio_path)
+  result = model.transcribe(audio_path,fp16=False)
+  return result["text"]
+import os
+def audio_to_text(youtube_link,audio_path):
+  if len(youtube_link)>3:
+    audio_file_path=download_audio(youtube_link)
+    audio_file_path=os.getcwd()+"/"+audio_file_path
+    text=convert_to_text(audio_file_path)
+    return text
+  if os.path.exists(audio_path):
+    text=convert_to_text(audio_path)
+    return text
+import gradio as gr
+import os
+def transcribe_audio(youtube_link, audio_file):
+    if youtube_link:
+        result = audio_to_text(youtube_link, "None")
+    elif audio_file:
+        if os.path.exists(audio_file):
+            result = audio_to_text("None",audio_file)
+    else:
+        result = "Please provide a YouTube link or upload an audio file."
+    return result
+iface = gr.Interface(
+    fn=transcribe_audio,
+    inputs=[
+        gr.Textbox(),
+        gr.File()
+    ],
+    outputs="text",
+    live=True
+)
+iface.launch()