Spaces:

Navyabhat
/

Capstone_Project

Sleeping

App Files Files Community

Capstone_Project / app.py

Navyabhat

Update app.py

bd0b898 verified 12 months ago

raw

history blame

4.62 kB

	import gradio as gr
	from PIL import Image
	from inference.main import MultiModalPhi2

	messages = []

	multimodal_phi2 = MultiModalPhi2(
	modelname_or_path="Navyabhat/Llava-Phi2",
	temperature=0.2,
	max_new_tokens=1024,
	device="cpu",
	)


	def add_content(chatbot, text, image, audio_upload, audio_mic) -> gr.Chatbot:
	textflag, imageflag, audioflag = False, False, False
	if text not in ["", None]:
	chatbot.append((text, None))
	textflag = True
	if image is not None:
	chatbot.append(((image,), None))
	imageflag = True
	if audio_mic is not None:
	chatbot.append(((audio_mic,), None))
	audioflag = True
	else:
	if audio_upload is not None:
	chatbot.append(((audio_upload,), None))
	audioflag = True
	if not any([textflag, imageflag, audioflag]):
	# Raise an error if neither text nor file is provided
	raise gr.Error("Enter a valid text, image or audio")
	return chatbot


	def clear_data():
	return {prompt: None, image: None, audio_upload: None, audio_mic: None, chatbot: []}


	def run(history, text, image, audio_upload, audio_mic):
	if text in [None, ""]:
	text = None

	if audio_upload is not None:
	audio = audio_upload
	elif audio_mic is not None:
	audio = audio_mic
	else:
	audio = None

	print("text", text)
	print("image", image)
	print("audio", audio)

	if image is not None:
	image = Image.open(image)
	outputs = multimodal_phi2(text, audio, image)
	# outputs = ""

	history.append((None, outputs.title()))
	return history, None, None, None, None


	# Custom styling
	interface_style = {
	"box": {
	"backgroundColor": "#f9f9f9",
	"padding": "20px",
	"borderRadius": "10px",
	"boxShadow": "0 0 10px rgba(0, 0, 0, 0.1)",
	},
	"button": {
	"backgroundColor": "#4caf50",
	"color": "#fff",
	"padding": "10px",
	"border": "none",
	"borderRadius": "5px",
	"cursor": "pointer",
	},
	"textbox": {
	"width": "100%",
	"padding": "10px",
	"marginBottom": "10px",
	"boxSizing": "border-box",
	},
	"image": {
	"width": "100%",
	"marginBottom": "10px",
	},
	"audio": {
	"width": "100%",
	"marginBottom": "10px",
	},
	"chatbox": {
	"height": "550px",
	"backgroundColor": "#f0f0f0",
	"borderRadius": "5px",
	"padding": "10px",
	"overflowY": "auto",
	},
	}

	with gr.Blocks() as demo:
	gr.Markdown("## MultiModal Phi2 Model Pretraining and Finetuning from Scratch")

	with gr.Row():
	with gr.Column(scale=4):
	with gr.Box(style=interface_style["box"]):
	with gr.Row():
	prompt = gr.Textbox(
	placeholder="Enter Prompt",
	lines=2,
	label="Query",
	value=None,
	style=interface_style["textbox"],
	)
	with gr.Row():
	image = gr.Image(
	type="filepath", value=None, style=interface_style["image"]
	)
	with gr.Row():
	audio_upload = gr.Audio(
	source="upload", type="filepath", style=interface_style["audio"]
	)
	audio_mic = gr.Audio(
	source="microphone",
	type="filepath",
	format="mp3",
	style=interface_style["audio"],
	)

	with gr.Column(scale=8):
	with gr.Box(style=interface_style["box"]):
	with gr.Row():
	chatbot = gr.Chatbot(
	avatar_images=("🧑", "🤖"),
	height=550,
	style=interface_style["chatbox"],
	)
	with gr.Row():
	submit = gr.Button(style=interface_style["button"])
	clear = gr.Button(value="Clear", style=interface_style["button"])

	submit.click(
	add_content,
	inputs=[chatbot, prompt, image, audio_upload, audio_mic],
	outputs=[chatbot],
	).success(
	run,
	inputs=[chatbot, prompt, image, audio_upload, audio_mic],
	outputs=[chatbot, prompt, image, audio_upload, audio_mic],
	)

	clear.click(
	clear_data,
	outputs=[prompt, image, audio_upload, audio_mic, chatbot],
	)

	demo.launch()