File size: 2,508 Bytes
1a4221b
bc45ae2
1ef3458
cf65cfe
315190f
1ef3458
cf65cfe
49a964d
 
 
315190f
 
b3ee479
315190f
 
 
b4d2f5f
 
 
fe178da
828fe8d
315190f
 
 
 
 
427bdc6
cf65cfe
1ef3458
cf65cfe
 
1ef3458
427bdc6
 
 
 
cf65cfe
44d26ea
b3ee479
427bdc6
1477083
 
fe178da
b3ee479
 
fe178da
 
 
 
b3ee479
427bdc6
cf65cfe
 
828fe8d
cf65cfe
828fe8d
cf65cfe
fe178da
cf65cfe
 
 
 
 
828fe8d
cf65cfe
315190f
828fe8d
1a4221b
 
cf65cfe
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import requests
import json
import gradio as gr
import re

# Hugging Face ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ๋ถ€ํ„ฐ RapidAPI ํ‚ค์™€ ํ˜ธ์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ
RAPIDAPI_KEY = os.getenv("RAPIDAPI_KEY")
RAPIDAPI_HOST = "youtube-transcriptor.p.rapidapi.com"

# ์œ ํŠœ๋ธŒ URL์—์„œ ๋น„๋””์˜ค ID๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜
def get_video_id(youtube_url):
    # ์œ ํŠœ๋ธŒ URL ๋˜๋Š” youtu.be ๋‹จ์ถ• URL์—์„œ video_id ์ถ”์ถœ
    video_id_match = re.search(r"(?<=v=)[^#&?]*", youtube_url) or re.search(r"(?<=youtu.be/)[^#&?]*", youtube_url)
    return video_id_match.group(0) if video_id_match else None

# ์ž๋ง‰ ์–ธ์–ด ์šฐ์„ ์ˆœ์œ„ ๋ฆฌ์ŠคํŠธ
LANGUAGE_PRIORITY = ['ko', 'en', 'ja', 'zh']

# ์œ ํŠœ๋ธŒ ์ž๋ง‰์„ ์š”์ฒญํ•˜๋Š” ํ•จ์ˆ˜ (์–ธ์–ด ์šฐ์„ ์ˆœ์œ„๋ฅผ ์ ์šฉํ•˜์—ฌ ์‹œ๋„)
def get_youtube_transcript(youtube_url):
    # ๋น„๋””์˜ค ID ์ถ”์ถœ
    video_id = get_video_id(youtube_url)
    if video_id is None:
        return {"error": "์ž˜๋ชป๋œ ์œ ํŠœ๋ธŒ URL์ž…๋‹ˆ๋‹ค. ๋น„๋””์˜ค ID๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
    
    url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
    
    headers = {
        "x-rapidapi-key": RAPIDAPI_KEY,
        "x-rapidapi-host": RAPIDAPI_HOST
    }

    # ์–ธ์–ด ์šฐ์„ ์ˆœ์œ„์— ๋”ฐ๋ผ ์ˆœ์ฐจ์ ์œผ๋กœ ์š”์ฒญ์„ ์‹œ๋„
    for lang in LANGUAGE_PRIORITY:
        querystring = {"video_id": video_id, "lang": lang}
        response = requests.get(url, headers=headers, params=querystring)
        
        # ์ƒํƒœ ์ฝ”๋“œ ํ™•์ธ ๋ฐ ์ „์ฒด ์‘๋‹ต ๋ฐ˜ํ™˜
        if response.status_code == 200:
            try:
                data = response.json()

                # ์ „์ฒด ์‘๋‹ต ๋ฐ์ดํ„ฐ๋ฅผ ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜
                return {"language": lang, "data": data}
            
            except json.JSONDecodeError as e:
                return {"error": f"JSON ๋””์ฝ”๋”ฉ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"}

    # ๋ชจ๋“  ์–ธ์–ด์—์„œ ์ž๋ง‰์„ ์ฐพ์ง€ ๋ชปํ•œ ๊ฒฝ์šฐ
    return {"error": "์šฐ์„ ์ˆœ์œ„ ์–ธ์–ด๋กœ ์ž๋ง‰์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
def youtube_transcript_interface(youtube_url):
    # ์ž๋ง‰ ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
    transcript_data = get_youtube_transcript(youtube_url)
    
    # ๊ฒฐ๊ณผ ์ถœ๋ ฅ
    return json.dumps(transcript_data, ensure_ascii=False, indent=2)

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
interface = gr.Interface(
    fn=youtube_transcript_interface, 
    inputs="text", 
    outputs="text",
    title="YouTube ์ž๋ง‰ ์ถ”์ถœ๊ธฐ",
    description="์œ ํŠœ๋ธŒ URL์„ ์ž…๋ ฅํ•˜์„ธ์š”."
)

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
interface.launch()