LuckyHappyFish commited on
Commit
a57ce2c
·
1 Parent(s): ab8268b
Files changed (2) hide show
  1. app.py +145 -104
  2. requirements.txt +8 -1
app.py CHANGED
@@ -1,115 +1,156 @@
1
  import gradio as gr
2
- import subprocess
3
- import requests
4
- import re
5
- import traceback
6
-
7
- # Regular expression to validate YouTube URLs
8
- YOUTUBE_URL_PATTERN = re.compile(
9
- r'(https?://)?(www\.)?(youtube\.com|youtu\.?be)/.+'
10
- )
11
-
12
- def test_network():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
14
- response = requests.get("https://www.youtube.com", timeout=10)
15
- if response.status_code == 200:
16
- return "Successfully connected to YouTube."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  else:
18
- return f"Failed to connect to YouTube. Status code: {response.status_code}"
 
 
 
 
19
  except Exception as e:
20
- return f"Exception occurred: {e}"
21
-
22
- def test_youtube_transcript(link):
23
- debug_info = ""
24
- transcript_text = ""
25
- thumbnail_url = None
26
-
27
- # Validate YouTube URL
28
- if not YOUTUBE_URL_PATTERN.match(link):
29
- return "", "Invalid YouTube URL provided.", None
30
-
31
- # Add network test result to debug info
32
- test_result = test_network()
33
- debug_info += f"Network test result: {test_result}\n"
34
-
35
- # Include package versions
36
- import gradio
37
- import yt_dlp
38
- import requests
39
- debug_info += f"Package versions:\n"
40
- debug_info += f"gradio: {gradio.__version__}\n"
41
- debug_info += f"yt-dlp: {yt_dlp.version.__version__}\n"
42
- debug_info += f"requests: {requests.__version__}\n"
43
 
 
 
 
44
  try:
45
- # Use yt-dlp as a subprocess to download subtitles
46
- command = [
47
- 'yt-dlp',
48
- '--skip-download',
49
- '--write-subs',
50
- '--sub-lang', 'en',
51
- '--sub-format', 'vtt',
52
- '--output', '%(id)s.%(ext)s',
53
- link
54
- ]
55
-
56
- result = subprocess.run(command, capture_output=True, text=True)
57
-
58
- if result.returncode != 0:
59
- transcript_text = "Failed to download subtitles using yt-dlp."
60
- debug_info += f"yt-dlp error: {result.stderr}\n"
61
- return transcript_text, debug_info, thumbnail_url
62
-
63
- # Extract video ID from URL
64
- video_id_match = re.search(r'v=([A-Za-z0-9_-]{11})', link)
65
- if video_id_match:
66
- video_id = video_id_match.group(1)
67
- debug_info += f"Video ID: {video_id}\n"
68
- thumbnail_url = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
69
- debug_info += f"Thumbnail URL: {thumbnail_url}\n"
70
  else:
71
- video_id = "unknown"
72
- debug_info += "Could not extract Video ID.\n"
73
-
74
- # Read the downloaded subtitle file
75
- subtitle_file = f"{video_id}.en.vtt"
76
- try:
77
- with open(subtitle_file, 'r', encoding='utf-8') as f:
78
- transcript_text = f.read()
79
-
80
- # Process the VTT file to extract plain text
81
- transcript_text = re.sub(r'WEBVTT\n\n', '', transcript_text)
82
- transcript_text = re.sub(r'\d+\n', '', transcript_text)
83
- transcript_text = re.sub(r'\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}\n', '', transcript_text)
84
- transcript_text = transcript_text.strip()
85
-
86
- debug_info += "Transcript fetched using yt-dlp subprocess.\n"
87
- except FileNotFoundError:
88
- transcript_text = "Subtitle file not found."
89
- debug_info += "Subtitle file was not found after yt-dlp execution.\n"
90
- except Exception as e:
91
- transcript_text = f"An error occurred while reading subtitles: {e}"
92
- debug_info += f"Error reading subtitles: {e}\n"
93
- debug_info += traceback.format_exc()
94
  except Exception as e:
95
- transcript_text = f"An error occurred with yt-dlp subprocess: {e}"
96
- debug_info += f"Error with yt-dlp subprocess: {e}\n"
97
- debug_info += traceback.format_exc()
98
-
99
- return transcript_text, debug_info, thumbnail_url
100
-
101
- # Gradio interface
102
- demo = gr.Interface(
103
- fn=test_youtube_transcript,
104
- inputs=gr.Textbox(label="YouTube Video URL", placeholder="Enter YouTube video URL here"),
105
- outputs=[
106
- gr.Textbox(label="Transcript", lines=20, interactive=False),
107
- gr.Textbox(label="Debug Information", lines=20, interactive=False),
108
- gr.Image(label="Thumbnail")
109
- ],
110
- title="YouTube Transcript Fetcher",
111
- description="Enter a YouTube video link to fetch the transcript and detailed debug information."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  )
113
 
114
  if __name__ == "__main__":
115
- demo.launch()
 
1
  import gradio as gr
2
+ import librosa
3
+ import numpy as np
4
+ import torch
5
+ from diffusers import StableDiffusionPipeline
6
+ import os
7
+ import gradio as gr
8
+ import sys
9
+
10
+ print(f"Gradio version: {gr.__version__}")
11
+ print(f"Gradio location: {gr.__file__}")
12
+ print(f"Python executable: {sys.executable}")
13
+
14
+ # Ensure that the script uses CUDA if available
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ print(f"Using device: {device}")
17
+
18
+ # Load the Stable Diffusion model
19
+ model_id = "runwayml/stable-diffusion-v1-5" # Updated model ID for better accessibility
20
+ try:
21
+ stable_diffusion = StableDiffusionPipeline.from_pretrained(
22
+ model_id,
23
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32
24
+ ).to(device)
25
+ except Exception as e:
26
+ print(f"Error loading the model: {e}")
27
+ print("Ensure you have the correct model ID and access rights.")
28
+ exit(1)
29
+
30
+ def describe_audio(audio_path):
31
+ """
32
+ Generate a textual description based on audio features.
33
+
34
+ Parameters:
35
+ audio_path (str): Path to the audio file.
36
+
37
+ Returns:
38
+ str: Generated description.
39
+ """
40
  try:
41
+ # Load the audio file
42
+ y, sr = librosa.load(audio_path, sr=None)
43
+
44
+ # Extract Mel Spectrogram
45
+ S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
46
+ db_spec = librosa.power_to_db(S, ref=np.max)
47
+
48
+ # Calculate average amplitude and frequency
49
+ avg_amplitude = np.mean(db_spec)
50
+ spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)
51
+ avg_frequency = np.mean(spectral_centroids)
52
+
53
+ # Generate description based on amplitude
54
+ if avg_amplitude < -40:
55
+ amplitude_desc = "a calm and serene landscape with gentle waves"
56
+ elif avg_amplitude < -20:
57
+ amplitude_desc = "a vibrant forest with rustling leaves"
58
+ else:
59
+ amplitude_desc = "a thunderstorm with dark clouds and lightning"
60
+
61
+ # Generate description based on frequency
62
+ if avg_frequency < 2000:
63
+ frequency_desc = "under soft, ambient light"
64
+ elif avg_frequency < 4000:
65
+ frequency_desc = "with vivid and lively colors"
66
  else:
67
+ frequency_desc = "in a surreal and dynamic setting"
68
+
69
+ # Combine descriptions
70
+ description = f"{amplitude_desc} {frequency_desc}"
71
+ return description
72
  except Exception as e:
73
+ print(f"Error processing audio: {e}")
74
+ return "an abstract artistic scene"
75
+
76
+ def generate_image(description):
77
+ """
78
+ Generate an image using the Stable Diffusion model based on the description.
79
+
80
+ Parameters:
81
+ description (str): Textual description for image generation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ Returns:
84
+ PIL.Image: Generated image.
85
+ """
86
  try:
87
+ if device == "cuda":
88
+ with torch.autocast("cuda"):
89
+ image = stable_diffusion(description).images[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  else:
91
+ image = stable_diffusion(description).images[0]
92
+ return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  except Exception as e:
94
+ print(f"Error generating image: {e}")
95
+ return None
96
+
97
+ def audio_to_image(audio_file):
98
+ """
99
+ Convert an audio file to an artistic image.
100
+
101
+ Parameters:
102
+ audio_file (str): Path to the uploaded audio file.
103
+
104
+ Returns:
105
+ PIL.Image or str: Generated image or error message.
106
+ """
107
+ if audio_file is None:
108
+ return "No audio file provided."
109
+
110
+ description = describe_audio(audio_file)
111
+ print(f"Generated Description: {description}")
112
+
113
+ image = generate_image(description)
114
+ if image is not None:
115
+ return image
116
+ else:
117
+ return "Failed to generate image."
118
+
119
+ # Gradio Interface
120
+ title = "🎵 Audio to Artistic Image Converter 🎨"
121
+ description_text = """
122
+ Upload an audio file, and this app will generate an artistic image based on the sound's characteristics.
123
+ """
124
+
125
+ # Define example paths
126
+ example_paths = [
127
+ "example_audio/calm_ocean.wav",
128
+ "example_audio/rustling_leaves.wav",
129
+ "example_audio/thunderstorm.wav",
130
+ ]
131
+
132
+ # Verify example files exist
133
+ valid_examples = []
134
+ for path in example_paths:
135
+ if os.path.isfile(path):
136
+ valid_examples.append([path])
137
+ else:
138
+ print(f"Example file not found: {path}")
139
+
140
+ if not os.path.exists("example_audio"):
141
+ os.makedirs("example_audio")
142
+ print("Please add some example audio files in the 'example_audio' directory.")
143
+
144
+ interface = gr.Interface(
145
+ fn=audio_to_image,
146
+ inputs=gr.Audio(source="upload", type="filepath"),
147
+ outputs=gr.Image(type="pil"),
148
+ title=title,
149
+ description=description_text,
150
+ examples=valid_examples if valid_examples else None,
151
+ allow_flagging="never",
152
+ theme="default"
153
  )
154
 
155
  if __name__ == "__main__":
156
+ interface.launch()
requirements.txt CHANGED
@@ -1,3 +1,10 @@
1
- gradio==4.44.1
2
  yt-dlp==2023.10.7
3
  requests==2.32.3
 
 
 
 
 
 
 
 
 
 
1
  yt-dlp==2023.10.7
2
  requests==2.32.3
3
+ accelerate
4
+ gradio>=4.44.1
5
+ librosa
6
+ numpy
7
+ torch
8
+ diffusers
9
+ accelerate
10
+ psutil