video-to-music

Running on Zero

App Files Files Community

video-to-music / app.py

fffiloni

Update app.py

20fea69 about 1 year ago

raw

history blame

3.85 kB

	import gradio as gr
	from gradio_client import Client

	fusecap_client = Client("https://noamrot-fusecap-image-captioning.hf.space/")

	def get_caption(image_in):

	fusecap_result = fusecap_client.predict(
	image_in, # str representing input in 'raw_image' Image component
	api_name="/predict"
	)
	print(f"IMAGE CAPTION: {fusecap_result}")
	return fusecap_result

	import re
	import torch
	from transformers import pipeline

	pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")

	agent_maker_sys = f"""
	You are an AI whose job it is to help users create their own chatbots. In particular, you need to respond succintly in a friendly tone, write a system prompt for an LLM, a catchy title for the chatbot, and a very short example user input. Make sure each part is included.
	To do so, user will provide an image description, from which you must write a system prompt corresponding to the character of the person or subject described.
	For example, if a user says, "make a bot that gives advice on how to grow your startup", first do a friendly response, then add the title, system prompt, and example user input. Immediately STOP after the example input. It should be EXACTLY in this format:
	Sure, I'd be happy to help you build a bot! I'm generating a title, system prompt, and an example input. How do they sound? Feel free to give me feedback!
	Title: Startup Coach
	System prompt: Your job as an LLM is to provide good startup advice. Do not provide extraneous comments on other topics. Be succinct but useful.
	Example input: Risks of setting up a non-profit board
	Here's another example. If a user types, "Make a chatbot that roasts tech ceos", respond:
	Sure, I'd be happy to help you build a bot! I'm generating a title, system prompt, and an example input. How do they sound? Feel free to give me feedback!
	Title: Tech Roaster
	System prompt: As an LLM, your primary function is to deliver hilarious and biting critiques of technology CEOs. Keep it witty and entertaining, but also make sure your jokes aren't too mean-spirited or factually incorrect.
	Example input: Elon Musk
	"""

	instruction = f"""
	<\|system\|>
	{agent_maker_sys}</s>
	<\|user\|>
	"""

	def infer(image_in):
	gr.Info("Getting image caption from Fuse Cap...")
	user_prompt = get_caption(image_in)
	prompt = f"{instruction.strip()}\n{user_prompt}</s>"
	print(f"PROMPT: {prompt}")
	gr.Info("Building a system according to the image caption ...")
	outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
	print(outputs)

	pattern = r'\<\\|system\\|\>(.*?)\<\\|assistant\\|\>'
	cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)


	return cleaned_text

	title = f"LLM Agent from a Picture",
	description = f"Get a LLM system prompt from a picture so you can use it in <a href='https://huggingface.co/spaces/abidlabs/GPT-Baker'>GPT-Baker</a>."

	css = """
	#col-container{
	margin: 0 auto;
	max-width: 840px;
	text-align: left;
	}
	"""

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.HTML(f"""
	<h2 style="text-align: center;">LLM Agent from a Picture</h2>
	<p style="text-align: center;">{description}</p>
	""")
	with gr.Row():
	with gr.Column():
	image_in = gr.Image(
	label = "Image reference",
	type = "filepath"
	)
	submit_btn = gr.Button("Make LLM system from my pic !")
	with gr.Column():
	result = gr.Textbox(
	label ="Suggested System"
	)

	submit_btn.click(
	fn = infer,
	inputs = [
	image_in
	],
	outputs =[
	result
	]
	)

	demo.queue().launch()