Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,212 +1,71 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from gradio_client import Client
|
3 |
-
import json
|
4 |
-
import logging
|
5 |
-
import openai
|
6 |
import os
|
|
|
|
|
|
|
7 |
import re
|
8 |
-
import html
|
9 |
-
|
10 |
-
# λ‘κΉ
μ€μ
|
11 |
-
logging.basicConfig(filename='youtube_script_extractor.log', level=logging.DEBUG,
|
12 |
-
format='%(asctime)s - %(levelname)s - %(message)s')
|
13 |
-
|
14 |
-
openai.api_key = os.getenv("OPENAI_API_KEY")
|
15 |
-
|
16 |
-
def parse_api_response(response):
|
17 |
-
try:
|
18 |
-
if isinstance(response, str):
|
19 |
-
response = json.loads(response)
|
20 |
-
if isinstance(response, list) and len(response) > 0:
|
21 |
-
response = response[0]
|
22 |
-
if not isinstance(response, dict):
|
23 |
-
raise ValueError(f"μμμΉ λͺ»ν μλ΅ νμμ
λλ€. λ°μ λ°μ΄ν° νμ
: {type(response)}")
|
24 |
-
return response
|
25 |
-
except Exception as e:
|
26 |
-
logging.error(f"API μλ΅ νμ± μ€ν¨: {str(e)}")
|
27 |
-
raise ValueError(f"API μλ΅ νμ± μ€ν¨: {str(e)}")
|
28 |
-
|
29 |
-
def get_youtube_script(url):
|
30 |
-
logging.info(f"μ€ν¬λ¦½νΈ μΆμΆ μμ: URL = {url}")
|
31 |
-
client = Client("whispersound/YT_Ts_R")
|
32 |
-
try:
|
33 |
-
result = client.predict(youtube_url=url, api_name="/predict")
|
34 |
-
parsed_result = parse_api_response(result)
|
35 |
-
|
36 |
-
if 'data' not in parsed_result or not parsed_result['data']:
|
37 |
-
raise ValueError("API μλ΅μ μ ν¨ν λ°μ΄ν°κ° μμ΅λλ€.")
|
38 |
-
|
39 |
-
data = parsed_result["data"][0]
|
40 |
-
title = data.get("title", "μ λͺ© μμ")
|
41 |
-
description = data.get("description", "μ€λͺ
μμ")
|
42 |
-
transcription_text = data.get("transcriptionAsText", "")
|
43 |
-
thumbnails = data.get("thumbnails", [])
|
44 |
-
|
45 |
-
if not transcription_text:
|
46 |
-
raise ValueError("μΆμΆλ μ€ν¬λ¦½νΈκ° μμ΅λλ€.")
|
47 |
-
|
48 |
-
logging.info("μ€ν¬λ¦½νΈ μΆμΆ μλ£")
|
49 |
-
return title, description, transcription_text, thumbnails
|
50 |
-
except Exception as e:
|
51 |
-
logging.exception("μ€ν¬λ¦½νΈ μΆμΆ μ€ μ€λ₯ λ°μ")
|
52 |
-
raise
|
53 |
-
|
54 |
-
def call_api(prompt, max_tokens, temperature, top_p):
|
55 |
-
try:
|
56 |
-
response = openai.ChatCompletion.create(
|
57 |
-
model="gpt-4o-mini",
|
58 |
-
messages=[{"role": "user", "content": prompt}],
|
59 |
-
max_tokens=max_tokens,
|
60 |
-
temperature=temperature,
|
61 |
-
top_p=top_p
|
62 |
-
)
|
63 |
-
return response['choices'][0]['message']['content']
|
64 |
-
except Exception as e:
|
65 |
-
logging.exception("LLM API νΈμΆ μ€ μ€λ₯ λ°μ")
|
66 |
-
raise
|
67 |
-
|
68 |
-
def summarize_text(title, description, text):
|
69 |
-
prompt = f"""
|
70 |
-
[μ νλΈ μμ½ κ·μΉ]
|
71 |
-
1. λλ μ νλΈ μμ μ λ¬Έ ν΄μ€κ°λ‘μ μ§μΉ¨μ λ§κ² μ΄ κΈμ μμ±νλΌ
|
72 |
-
2. μλμ μ λͺ©κ³Ό μ€λͺ
μ μ΄ μ νλΈ μμμ μλ³Έ λ©νλ°μ΄ν°μ΄λ€.
|
73 |
-
3. λ°λμ μ λͺ©κ³Ό μ€λͺ
μΌλ‘ μ£Όμ μ λ¬Έλ§₯, μ² μ(Spelling)μ λ¨Όμ νμ
νκ³ , μλμ λλ³Έμ λ°λμ μ§μΉ¨μ λ§κ² μμΈνκ² μμ½νλΌ
|
74 |
-
- λ°λμ μ£Όμ΄μ§ μ λͺ©, μ€λͺ
μ μλ μ² μ(Spelling)λ₯Ό μμ½μ λ°μνλΌ(μλ¬Έ λλ³Έμλ μ€νμκ° μμ μ μλ€)
|
75 |
-
4. λ°λμ νκΈλ‘ μμ±νλΌ
|
76 |
-
5. λ°λμ 'μ΄ μ νλΈ λλ³Έμ', 'μ΄ μμμ', 'μ΄ μ νλΈλ'λ±μ μκ°μ ννμ μ μΈνλΌ
|
77 |
-
6. μμ½λ¬Έλ§μΌλ‘λ μμμ μ§μ μμ²ν κ²κ³Ό λμΌν μμ€μΌλ‘ λ΄μ©μ μ΄ν΄ν μ μλλ‘ μμΈν μμ±
|
78 |
-
7. κΈμ λ무 μμΆνκ±°λ ν¨μΆνμ§ λ§κ³ , μ€μν λ΄μ©κ³Ό μΈλΆμ¬νμ λͺ¨λ ν¬ν¨
|
79 |
-
8. λ°λμ λλ³Έμ νλ¦κ³Ό λ
Όλ¦¬ ꡬ쑰λ₯Ό μ μ§
|
80 |
-
9. λλ³Έμ λͺ©μ μ΄λ μλλ₯Ό νμ
νκ³ , μ΄λ₯Ό μμ½μ λ°λμ λ°μ
|
81 |
-
10. λ°λμ μκ° μμλ μ¬κ±΄μ μ κ° κ³Όμ μ λͺ
ννκ² λ°μ
|
82 |
-
11. λ±μ₯μΈλ¬Ό, μ₯μ, μ¬κ±΄ λ± μ€μν μμλ₯Ό μ ννκ² μμ±
|
83 |
-
12. λλ³Έμμ μ λ¬νλ κ°μ μ΄λ λΆμκΈ°λ ν¬ν¨
|
84 |
-
13. λ°λμ κΈ°μ μ μ©μ΄λ μ λ¬Έ μ©μ΄κ° μμ κ²½μ°, μ΄λ₯Ό μ ννκ² μ¬μ©
|
85 |
-
14. λ°λμ ν΅μ¬ μΉμ
(μμ£Όμ )λ₯Ό νμ
νμ¬ μΉμ
μ λ§κ² κΈμ μμ½νλΌ(κΈμ μμ κ³ λ €νμ¬ μΉμ
μ κ°μλ₯Ό νλ ₯μ μΌλ‘ μ€μ )
|
86 |
-
15. κ° μΉμ
μ μ λͺ©(μμ£Όμ )μλ λ΄μ©κ³Ό μ΄μΈλ¦¬λ μ μ ν μ΄λͺ¨μ§λ‘ μμ£Όμ λ₯Ό μμνλΌ
|
87 |
-
16. κ° μΉμ
μ λ΄μ©μ Bullet Pointλ₯Ό μ¬μ©νμ¬ κ°λ
μ±μ λμ¬λΌ(λ¬Έμ₯ λ¨μλ‘ κ΅¬λΆ)
|
88 |
-
[μμ]
|
89 |
-
(λ³κ²½μ )
|
90 |
-
- μ νλΈλ₯Ό μ²μ μμνλ μ¬λλ€μ ꡬλ
μ μμ μ‘°νμμ ν° κ΄μ¬μ λκ³ λ§€μΌ μ νλΈ μ€νλμ€λ₯Ό νμΈνκ² λλ€. κ·Έλ¬λ ꡬλ
μκ° 100λͺ
, 1,000λͺ
μ λλ¬νλ κ²λ§μΌλ‘λ μ§μμ μΈ μ±μ₯μ λμμ΄ λμ§ μλλ€. ꡬλ
μ μκ° λμ΄λ νμλ μ νλΈ μ±λ μ΄μμ λν κ°μ μ‘μ§ λͺ»ν΄ ν¬κΈ°νλ κ²½μ°κ° λ§λ€.
|
91 |
-
(λ³κ²½ν)
|
92 |
-
- μ νλΈλ₯Ό μ²μ μμνλ μ¬λλ€μ ꡬλ
μ μμ μ‘°νμμ ν° κ΄μ¬μ λκ³ λ§€μΌ μ νλΈ μ€νλμ€λ₯Ό νμΈνκ² λλ€.
|
93 |
-
- κ·Έλ¬λ ꡬλ
μκ° 100λͺ
, 1,000λͺ
μ λλ¬νλ κ²λ§μΌλ‘λ μ§μμ μΈ μ±μ₯μ λμμ΄ λμ§ μλλ€.
|
94 |
-
- ꡬλ
μ μκ° λμ΄λ οΏ½οΏ½μλ μ νλΈ μ±λ μ΄μμ λν κ°μ μ‘μ§ λͺ»ν΄ ν¬κΈ°νλ κ²½μ°κ° λ§λ€.
|
95 |
-
17. κ° μΉμ
μ λ΄μ©μ λ°λμ μΆ©μ€νκ² μμ±
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
current_sentence = sentence.strip()
|
117 |
-
else:
|
118 |
-
current_sentence += sentence
|
119 |
-
if sentence.endswith(('.', '?', '!')):
|
120 |
-
combined_sentences.append(current_sentence.strip())
|
121 |
-
current_sentence = ""
|
122 |
-
if current_sentence:
|
123 |
-
combined_sentences.append(current_sentence.strip())
|
124 |
-
return combined_sentences
|
125 |
-
|
126 |
-
def display_script(title, script):
|
127 |
-
script_sentences = split_sentences(script)
|
128 |
-
formatted_script = "\n\n".join(script_sentences)
|
129 |
-
return f"""<div class="script-box">
|
130 |
-
<details>
|
131 |
-
<summary>ν΄λ¦νμ¬ νΌμΉκΈ°</summary>
|
132 |
-
<div class="output-title">{title}</div>
|
133 |
-
<p style="white-space: pre-wrap;">{formatted_script}</p>
|
134 |
-
</details>
|
135 |
-
</div>"""
|
136 |
-
|
137 |
-
def display_summary(title, summary):
|
138 |
-
return f"""<div class="script-box">
|
139 |
-
<div class="output-title">{title}</div>
|
140 |
-
{summary}
|
141 |
-
</div>"""
|
142 |
-
|
143 |
-
def get_thumbnail_url(thumbnails):
|
144 |
-
for thumbnail in thumbnails:
|
145 |
-
if thumbnail.get("width") == 640 and thumbnail.get("height") == 480:
|
146 |
-
return thumbnail.get("url")
|
147 |
-
return "640x480 ν¬κΈ°μ μΈλ€μΌμ μ°Ύμ μ μμ΅λλ€."
|
148 |
-
|
149 |
-
def analyze(url):
|
150 |
-
# μ€ν¬λ¦½νΈ μΆμΆ
|
151 |
-
yield "μ€ν¬λ¦½νΈ μΆμΆ μ€...", "μ€ν¬λ¦½νΈ μΆμΆ μ€...", ""
|
152 |
-
title, description, script, thumbnails = get_youtube_script(url)
|
153 |
-
script_content = display_script(title, script)
|
154 |
-
thumbnail_url = get_thumbnail_url(thumbnails)
|
155 |
-
|
156 |
-
# μλ¬Έ μ€ν¬λ¦½νΈ νμ λ° μμ½ μμ
|
157 |
-
yield script_content, "μμ½ μμ± μ€...", thumbnail_url
|
158 |
|
159 |
-
|
160 |
-
summary = summarize_text(title, description, script)
|
161 |
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
{formatted_summary}
|
191 |
-
</div>"""
|
192 |
|
193 |
-
#
|
194 |
-
|
195 |
-
|
196 |
-
# Gradio μΈν°νμ΄μ€
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
inputs=[youtube_url_input],
|
208 |
-
outputs=[script_output, summary_output, thumbnail_output] # thumbnail_output μΆκ°
|
209 |
-
)
|
210 |
-
|
211 |
-
if __name__ == "__main__":
|
212 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
import gradio as gr
|
5 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
# Hugging Face νκ²½ λ³μλ‘λΆν° RapidAPI ν€μ νΈμ€νΈ κ°μ Έμ€κΈ°
|
8 |
+
AA_KEY = os.getenv("AA_KEY")
|
9 |
+
AA_HOST = "youtube-transcriptor.p.rapidapi.com"
|
10 |
+
|
11 |
+
# μ νλΈ URLμμ λΉλμ€ IDλ₯Ό μΆμΆνλ ν¨μ
|
12 |
+
def get_video_id(youtube_url):
|
13 |
+
# μ νλΈ URL λλ youtu.be λ¨μΆ URLμμ video_id μΆμΆ
|
14 |
+
video_id_match = re.search(r"(?<=v=)[^#&?]*", youtube_url) or re.search(r"(?<=youtu.be/)[^#&?]*", youtube_url)
|
15 |
+
return video_id_match.group(0) if video_id_match else None
|
16 |
+
|
17 |
+
# μλ§ μΈμ΄ μ°μ μμ 리μ€νΈ
|
18 |
+
LANGUAGE_PRIORITY = ['ko', 'en', 'ja', 'zh']
|
19 |
+
|
20 |
+
# μ νλΈ μλ§μ μμ²νλ ν¨μ (μΈμ΄ μ°μ μμλ₯Ό μ μ©νμ¬ μλ)
|
21 |
+
def get_youtube_transcript(youtube_url):
|
22 |
+
# λΉλμ€ ID μΆμΆ
|
23 |
+
video_id = get_video_id(youtube_url)
|
24 |
+
if video_id is None:
|
25 |
+
return {"error": "μλͺ»λ μ νλΈ URLμ
λλ€. λΉλμ€ IDλ₯Ό μ°Ύμ μ μμ΅λλ€."}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
|
|
|
28 |
|
29 |
+
headers = {
|
30 |
+
"x-rapidapi-key": AA_KEY,
|
31 |
+
"x-rapidapi-host": AA_HOST
|
32 |
+
}
|
33 |
+
|
34 |
+
# μΈμ΄ μ°μ μμμ λ°λΌ μοΏ½οΏ½μ μΌλ‘ μμ²μ μλ
|
35 |
+
for lang in LANGUAGE_PRIORITY:
|
36 |
+
querystring = {"video_id": video_id, "lang": lang}
|
37 |
+
response = requests.get(url, headers=headers, params=querystring)
|
38 |
+
|
39 |
+
# μν μ½λ νμΈ λ° μ 체 μλ΅ λ°ν
|
40 |
+
if response.status_code == 200:
|
41 |
+
try:
|
42 |
+
data = response.json()
|
43 |
+
|
44 |
+
# μ 체 μλ΅ λ°μ΄ν°λ₯Ό κ·Έλλ‘ λ°ν
|
45 |
+
return {"language": lang, "data": data}
|
46 |
+
|
47 |
+
except json.JSONDecodeError as e:
|
48 |
+
return {"error": f"JSON λμ½λ© μ€λ₯ λ°μ: {str(e)}"}
|
49 |
+
|
50 |
+
# λͺ¨λ μΈμ΄μμ μλ§μ μ°Ύμ§ λͺ»ν κ²½μ°
|
51 |
+
return {"error": "μ°μ μμ μΈμ΄λ‘ μλ§μ μ°Ύμ μ μμ΅λλ€."}
|
52 |
+
|
53 |
+
# Gradio μΈν°νμ΄μ€ μ μ
|
54 |
+
def youtube_transcript_interface(youtube_url):
|
55 |
+
# μλ§ λ°μ΄ν° κ°μ Έμ€κΈ°
|
56 |
+
transcript_data = get_youtube_transcript(youtube_url)
|
|
|
|
|
57 |
|
58 |
+
# κ²°κ³Ό μΆλ ₯
|
59 |
+
return json.dumps(transcript_data, ensure_ascii=False, indent=2)
|
60 |
+
|
61 |
+
# Gradio μΈν°νμ΄μ€ μμ±
|
62 |
+
interface = gr.Interface(
|
63 |
+
fn=youtube_transcript_interface,
|
64 |
+
inputs="text",
|
65 |
+
outputs="text",
|
66 |
+
title="YouTube μλ§ μΆμΆκΈ°",
|
67 |
+
description="μ νλΈ URLμ μ
λ ₯νμΈμ."
|
68 |
+
)
|
69 |
+
|
70 |
+
# Gradio μΈν°νμ΄μ€ μ€ν
|
71 |
+
interface.launch()
|
|
|
|
|
|
|
|
|
|
|
|