cuda

Sleeping

App Files Files Community

cuda / app.py

tomriddle

Update app.py

11da134 almost 2 years ago

raw

history blame

5.36 kB

	import pathlib
	import uuid
	import os
	import gradio as gr
	from tqdm import tqdm
	import requests
	import urllib.request
	import json
	import time
	output_mp3="output.mp3"

	def upload_image(img: str, d_id_key: str):
	url = "https://api.d-id.com/images"

	files = {"image": ("hero.jpg", open(img, "rb"), "image/jpg")}
	headers = {
	"accept": "application/json",
	"authorization": "Basic "+d_id_key
	}

	response = requests.post(url, files=files, headers=headers)

	response_dict = response.json()
	img_url = response_dict["url"]
	# return audio_url
	print(img_url)
	return img_url

	def upload_audio(audio: str, d_id_key: str):
	url = "https://api.d-id.com/audios"

	files = {"audio": (audio, open(audio, "rb"), "audio/mpeg")}
	headers = {
	"accept": "application/json",
	"authorization": "Basic "+d_id_key
	}
	response = requests.post(url, files=files, headers=headers)
	response_dict = response.json()
	audio_url = response_dict["url"]
	# return audio_url
	print(audio_url)
	return audio_url

	def get_did_video(process_video_url,d_id_key):
	url = "https://api.d-id.com/talks/"+process_video_url

	headers = {
	"accept": "application/json",
	"authorization": "Basic "+d_id_key
	}
	response_dict = {}

	while "result_url" not in response_dict:
	# make API call and get response dictionary
	response = requests.get(url, headers=headers)
	response_dict = response.json()

	print(response.text)

	# wait for 1 second before checking again
	time.sleep(1)

	# "result_url" key is now present in the dictionary
	result_url = response_dict["result_url"]

	print("From did_video \n\n\n")
	print("/n/n/n")

	# response_dict = response.json()
	result_url = response_dict["result_url"]

	print(result_url)
	return result_url

	def text_to_speach_api(text: str, elv_key,voice_id: str):
	url = "https://api.elevenlabs.io/v1/text-to-speech/"+voice_id+"/stream"
	headers = {
	"accept": "/",
	"xi-api-key": elv_key,
	"Content-Type": "application/json",
	}
	data = {
	"text": text,
	"voice_settings": {
	"stability": 0,
	"similarity_boost": 0
	}
	}

	response = requests.post(url, headers=headers, json=data)
	# print(response.text)

	if response.ok:
	with open("output.mp3", "wb") as f:
	f.write(response.content)
	else:
	print("Error: ", response.text)

	def get_voice_names():
	with open("data.json") as f:
	data = json.load(f)
	return [voice["name"] for voice in data["voices"]]



	# define a function to get voice id by name
	def get_voice_id(name):
	# load the JSON data
	with open("data.json") as f:
	data = json.load(f)
	for voice in data['voices']:
	if voice['name'] == name:
	return voice['voice_id']
	return None

	#D-id API
	def d_id_api(image_url, d_id_key,audio_url):
	print("D-id API")
	url = "https://api.d-id.com/talks"
	payload = {

	"source_url": image_url,
	"script": {
	"type": "audio",
	"audio_url": audio_url,
	}

	}
	headers = {
	"accept": "application/json",
	"content-type": "application/json",
	"authorization": "Basic "+d_id_key
	}

	response = requests.post(url, json=payload, headers=headers)
	print("From D-id API \n\n\n")
	print(response.text)
	response_dict = response.json()
	process_video = response_dict["id"]
	# return audio_url
	print(process_video)
	return process_video



	def transcribe_video(d_id_key: str, elv_key: str, full_text: str,voice_name: str,img):
	print(voice_name)
	voice_id=get_voice_id(voice_name)
	text_to_speach_api(full_text, elv_key,voice_id)
	audio_url=upload_audio(output_mp3,d_id_key)
	image_url=upload_image(img,d_id_key)
	process_video_url=d_id_api(image_url, d_id_key,audio_url)
	video_url=get_did_video(process_video_url,d_id_key)
	file_name = 'hero.mp4'
	urllib.request.urlretrieve(video_url, file_name)
	return file_name


	examples = [["", "","Good morning, it's great to see you! I hope you're having a wonderful day. I just wanted to say thank you for taking the time to speak with me. Is there anything new or exciting happening in your life? I'd love to hear about it. Let's catch up soon!",
	"Arnold","./images/hero.jpg"],["","","Hello there, I'm a talking photo! I can speak any text you type here. Try it out!", "Domi","./images/3.jpg"],["","","Hello there, I'm a talking photo! I can speak any text you type here. Try it out!", "Domi","./images/2.jpg"]]

	demo = gr.Interface(fn=transcribe_video, inputs=[
	gr.Textbox(label="D-Id API Key",placeholder="Paste your D-Id",type='password'),
	gr.Textbox(label="Elevenlabs API Keys",placeholder="Paste Elevenlabs",type='password'),
	gr.Textbox(lines=4, label=" Please input the text you wish to generate in order to make the photo speak.", placeholder="English Text here"),
	gr.Dropdown(choices=get_voice_names(), label="Select a voice"),
	gr.Image(label="photo of a Person", type="filepath")
	], outputs="video",title="Bring your images to life with the talking animation feature now!",examples=examples,cache_examples=False)

	demo.launch(cache_examples=False)