SUHHHH commited on
Commit
5d64735
ยท
verified ยท
1 Parent(s): 23ac44b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -15
app.py CHANGED
@@ -1,9 +1,9 @@
1
  from pytube import YouTube
2
  from google.cloud import speech_v1p1beta1 as speech
3
- from pydub import AudioSegment # ์˜ค๋””์˜ค ๋ณ€ํ™˜์„ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
4
  import io
5
- import openai
6
  import os
 
7
  import gradio as gr
8
 
9
  # ์œ ํŠœ๋ธŒ ๋น„๋””์˜ค ID ์ถ”์ถœ ํ•จ์ˆ˜
@@ -38,25 +38,34 @@ def download_and_convert_audio(youtube_url):
38
 
39
  return wav_audio_path
40
 
 
 
 
 
 
 
41
  # Google Speech-to-Text API๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์˜ค๋””์˜ค๋ฅผ ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜
42
  def speech_to_text(audio_path):
43
  client = speech.SpeechClient()
 
44
 
45
- with io.open(audio_path, "rb") as audio_file:
46
- content = audio_file.read()
 
 
 
 
47
 
48
- audio = speech.RecognitionAudio(content=content)
49
- config = speech.RecognitionConfig(
50
- encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
51
- sample_rate_hertz=16000, # 16000Hz ์ƒ˜ํ”Œ ๋ ˆ์ดํŠธ
52
- language_code="ko-KR" # ํ•œ๊ตญ์–ด ์ธ์‹
53
- )
54
 
55
- response = client.recognize(config=config, audio=audio)
56
-
57
- transcript = ""
58
- for result in response.results:
59
- transcript += result.alternatives[0].transcript + " "
60
 
61
  return transcript.strip()
62
 
 
1
  from pytube import YouTube
2
  from google.cloud import speech_v1p1beta1 as speech
3
+ from pydub import AudioSegment
4
  import io
 
5
  import os
6
+ import openai
7
  import gradio as gr
8
 
9
  # ์œ ํŠœ๋ธŒ ๋น„๋””์˜ค ID ์ถ”์ถœ ํ•จ์ˆ˜
 
38
 
39
  return wav_audio_path
40
 
41
+ # ์˜ค๋””์˜ค๋ฅผ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜
42
+ def split_audio(audio_path, chunk_length_ms=60000):
43
+ audio = AudioSegment.from_wav(audio_path)
44
+ chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
45
+ return chunks
46
+
47
  # Google Speech-to-Text API๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์˜ค๋””์˜ค๋ฅผ ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜
48
  def speech_to_text(audio_path):
49
  client = speech.SpeechClient()
50
+ chunks = split_audio(audio_path) # ์˜ค๋””์˜ค๋ฅผ ์ฒญํฌ๋กœ ๋‚˜๋ˆ”
51
 
52
+ transcript = ""
53
+ for chunk in chunks:
54
+ with io.BytesIO() as audio_file:
55
+ chunk.export(audio_file, format="wav")
56
+ audio_file.seek(0)
57
+ content = audio_file.read()
58
 
59
+ audio = speech.RecognitionAudio(content=content)
60
+ config = speech.RecognitionConfig(
61
+ encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
62
+ sample_rate_hertz=16000, # 16000Hz ์ƒ˜ํ”Œ ๋ ˆ์ดํŠธ
63
+ language_code="ko-KR" # ํ•œ๊ตญ์–ด ์ธ์‹
64
+ )
65
 
66
+ response = client.recognize(config=config, audio=audio)
67
+ for result in response.results:
68
+ transcript += result.alternatives[0].transcript + " "
 
 
69
 
70
  return transcript.strip()
71