kevinwang676 commited on
Commit
276421d
·
verified ·
1 Parent(s): aaec7d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -12
app.py CHANGED
@@ -50,11 +50,35 @@ print("Loading WavLM for content...")
50
  cmodel = WavLMModel.from_pretrained("microsoft/wavlm-large").to(device)
51
 
52
 
53
- from openai import OpenAI
54
 
55
  import ffmpeg
56
 
57
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  html_denoise = """
60
  <html>
@@ -102,17 +126,10 @@ def convert(api_key, text, tgt, voice, save_path):
102
  hps.data.mel_fmax
103
  )
104
  # src
105
- client = OpenAI(api_key=api_key)
106
-
107
- response = client.audio.speech.create(
108
- model="tts-1",
109
- voice=voice,
110
- input=text,
111
- )
112
-
113
- response.stream_to_file("output_openai.mp3")
114
 
115
- src = "output_openai.mp3"
 
 
116
  wav_src, _ = librosa.load(src, sr=hps.data.sampling_rate)
117
  wav_src = torch.from_numpy(wav_src).unsqueeze(0).to(device)
118
  c = cmodel(wav_src).last_hidden_state.transpose(1, 2).to(device)
 
50
  cmodel = WavLMModel.from_pretrained("microsoft/wavlm-large").to(device)
51
 
52
 
 
53
 
54
  import ffmpeg
55
 
56
+ import random
57
+ import numpy as np
58
+ from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError
59
+
60
+ def pad_buffer(audio):
61
+ # Pad buffer to multiple of 2 bytes
62
+ buffer_size = len(audio)
63
+ element_size = np.dtype(np.int16).itemsize
64
+ if buffer_size % element_size != 0:
65
+ audio = audio + b'\0' * (element_size - (buffer_size % element_size))
66
+ return audio
67
+
68
+ def generate_voice(text, voice_name):
69
+ try:
70
+ audio = generate(
71
+ text[:250], # Limit to 250 characters
72
+ voice=voice_name,
73
+ model="eleven_multilingual_v2"
74
+ )
75
+ write("output.wav", 44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
76
+
77
+ return "output.wav"
78
+ except UnauthenticatedRateLimitError as e:
79
+ raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.")
80
+ except Exception as e:
81
+ raise gr.Error(e)
82
 
83
  html_denoise = """
84
  <html>
 
126
  hps.data.mel_fmax
127
  )
128
  # src
 
 
 
 
 
 
 
 
 
129
 
130
+ #response.stream_to_file("output_openai.mp3")
131
+ os.environ["ELEVEN_API_KEY"] = api_key
132
+ src = generate_voice(text, voice)
133
  wav_src, _ = librosa.load(src, sr=hps.data.sampling_rate)
134
  wav_src = torch.from_numpy(wav_src).unsqueeze(0).to(device)
135
  c = cmodel(wav_src).last_hidden_state.transpose(1, 2).to(device)