Spaces:
Sleeping
Sleeping
File size: 2,508 Bytes
1a4221b bc45ae2 1ef3458 cf65cfe 315190f 1ef3458 cf65cfe 49a964d 315190f b3ee479 315190f b4d2f5f fe178da 828fe8d 315190f 427bdc6 cf65cfe 1ef3458 cf65cfe 1ef3458 427bdc6 cf65cfe 44d26ea b3ee479 427bdc6 1477083 fe178da b3ee479 fe178da b3ee479 427bdc6 cf65cfe 828fe8d cf65cfe 828fe8d cf65cfe fe178da cf65cfe 828fe8d cf65cfe 315190f 828fe8d 1a4221b cf65cfe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os
import requests
import json
import gradio as gr
import re
# Hugging Face ํ๊ฒฝ ๋ณ์๋ก๋ถํฐ RapidAPI ํค์ ํธ์คํธ ๊ฐ์ ธ์ค๊ธฐ
RAPIDAPI_KEY = os.getenv("RAPIDAPI_KEY")
RAPIDAPI_HOST = "youtube-transcriptor.p.rapidapi.com"
# ์ ํ๋ธ URL์์ ๋น๋์ค ID๋ฅผ ์ถ์ถํ๋ ํจ์
def get_video_id(youtube_url):
# ์ ํ๋ธ URL ๋๋ youtu.be ๋จ์ถ URL์์ video_id ์ถ์ถ
video_id_match = re.search(r"(?<=v=)[^#&?]*", youtube_url) or re.search(r"(?<=youtu.be/)[^#&?]*", youtube_url)
return video_id_match.group(0) if video_id_match else None
# ์๋ง ์ธ์ด ์ฐ์ ์์ ๋ฆฌ์คํธ
LANGUAGE_PRIORITY = ['ko', 'en', 'ja', 'zh']
# ์ ํ๋ธ ์๋ง์ ์์ฒญํ๋ ํจ์ (์ธ์ด ์ฐ์ ์์๋ฅผ ์ ์ฉํ์ฌ ์๋)
def get_youtube_transcript(youtube_url):
# ๋น๋์ค ID ์ถ์ถ
video_id = get_video_id(youtube_url)
if video_id is None:
return {"error": "์๋ชป๋ ์ ํ๋ธ URL์
๋๋ค. ๋น๋์ค ID๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."}
url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
headers = {
"x-rapidapi-key": RAPIDAPI_KEY,
"x-rapidapi-host": RAPIDAPI_HOST
}
# ์ธ์ด ์ฐ์ ์์์ ๋ฐ๋ผ ์์ฐจ์ ์ผ๋ก ์์ฒญ์ ์๋
for lang in LANGUAGE_PRIORITY:
querystring = {"video_id": video_id, "lang": lang}
response = requests.get(url, headers=headers, params=querystring)
# ์ํ ์ฝ๋ ํ์ธ ๋ฐ ์ ์ฒด ์๋ต ๋ฐํ
if response.status_code == 200:
try:
data = response.json()
# ์ ์ฒด ์๋ต ๋ฐ์ดํฐ๋ฅผ ๊ทธ๋๋ก ๋ฐํ
return {"language": lang, "data": data}
except json.JSONDecodeError as e:
return {"error": f"JSON ๋์ฝ๋ฉ ์ค๋ฅ ๋ฐ์: {str(e)}"}
# ๋ชจ๋ ์ธ์ด์์ ์๋ง์ ์ฐพ์ง ๋ชปํ ๊ฒฝ์ฐ
return {"error": "์ฐ์ ์์ ์ธ์ด๋ก ์๋ง์ ์ฐพ์ ์ ์์ต๋๋ค."}
# Gradio ์ธํฐํ์ด์ค ์ ์
def youtube_transcript_interface(youtube_url):
# ์๋ง ๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ
transcript_data = get_youtube_transcript(youtube_url)
# ๊ฒฐ๊ณผ ์ถ๋ ฅ
return json.dumps(transcript_data, ensure_ascii=False, indent=2)
# Gradio ์ธํฐํ์ด์ค ์์ฑ
interface = gr.Interface(
fn=youtube_transcript_interface,
inputs="text",
outputs="text",
title="YouTube ์๋ง ์ถ์ถ๊ธฐ",
description="์ ํ๋ธ URL์ ์
๋ ฅํ์ธ์."
)
# Gradio ์ธํฐํ์ด์ค ์คํ
interface.launch() |