Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
from pytube import YouTube
|
2 |
from google.cloud import speech_v1p1beta1 as speech
|
3 |
-
from pydub import AudioSegment
|
4 |
import io
|
5 |
-
import openai
|
6 |
import os
|
|
|
7 |
import gradio as gr
|
8 |
|
9 |
# ์ ํ๋ธ ๋น๋์ค ID ์ถ์ถ ํจ์
|
@@ -38,25 +38,34 @@ def download_and_convert_audio(youtube_url):
|
|
38 |
|
39 |
return wav_audio_path
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
# Google Speech-to-Text API๋ฅผ ์ฌ์ฉํ์ฌ ์ค๋์ค๋ฅผ ํ
์คํธ๋ก ๋ณํ
|
42 |
def speech_to_text(audio_path):
|
43 |
client = speech.SpeechClient()
|
|
|
44 |
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
for result in response.results:
|
59 |
-
transcript += result.alternatives[0].transcript + " "
|
60 |
|
61 |
return transcript.strip()
|
62 |
|
|
|
1 |
from pytube import YouTube
|
2 |
from google.cloud import speech_v1p1beta1 as speech
|
3 |
+
from pydub import AudioSegment
|
4 |
import io
|
|
|
5 |
import os
|
6 |
+
import openai
|
7 |
import gradio as gr
|
8 |
|
9 |
# ์ ํ๋ธ ๋น๋์ค ID ์ถ์ถ ํจ์
|
|
|
38 |
|
39 |
return wav_audio_path
|
40 |
|
41 |
+
# ์ค๋์ค๋ฅผ ์ฒญํฌ๋ก ๋๋๋ ํจ์
|
42 |
+
def split_audio(audio_path, chunk_length_ms=60000):
|
43 |
+
audio = AudioSegment.from_wav(audio_path)
|
44 |
+
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
|
45 |
+
return chunks
|
46 |
+
|
47 |
# Google Speech-to-Text API๋ฅผ ์ฌ์ฉํ์ฌ ์ค๋์ค๋ฅผ ํ
์คํธ๋ก ๋ณํ
|
48 |
def speech_to_text(audio_path):
|
49 |
client = speech.SpeechClient()
|
50 |
+
chunks = split_audio(audio_path) # ์ค๋์ค๋ฅผ ์ฒญํฌ๋ก ๋๋
|
51 |
|
52 |
+
transcript = ""
|
53 |
+
for chunk in chunks:
|
54 |
+
with io.BytesIO() as audio_file:
|
55 |
+
chunk.export(audio_file, format="wav")
|
56 |
+
audio_file.seek(0)
|
57 |
+
content = audio_file.read()
|
58 |
|
59 |
+
audio = speech.RecognitionAudio(content=content)
|
60 |
+
config = speech.RecognitionConfig(
|
61 |
+
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
62 |
+
sample_rate_hertz=16000, # 16000Hz ์ํ ๋ ์ดํธ
|
63 |
+
language_code="ko-KR" # ํ๊ตญ์ด ์ธ์
|
64 |
+
)
|
65 |
|
66 |
+
response = client.recognize(config=config, audio=audio)
|
67 |
+
for result in response.results:
|
68 |
+
transcript += result.alternatives[0].transcript + " "
|
|
|
|
|
69 |
|
70 |
return transcript.strip()
|
71 |
|