Spaces:

JunJiaGuo
/

lang-ground

Running

App Files Files Community

lang-ground / app_langloc.py

JunJiaGuo

Upload folder using huggingface_hub

b9c1592 verified 2 days ago

raw

history blame contribute delete

3.19 kB

	import gradio as gr
	from langground import LangGround, text_palette


	state = {"loc_model": None, "llm_model": None, "model": None}


	def load_model(loc_model: str, llm_model: str) -> LangGround:
	if (loc_model, llm_model) != (state["loc_model"], state["llm_model"]):
	gr.Info("Loading models...", duration=5)
	state.update({"model": LangGround(loc_model=loc_model, llm_model=llm_model), "loc_model": loc_model, "llm_model": llm_model})
	gr.Info("Models loaded!", duration=2.5)
	return state["model"]


	def predict(frame, question: str, threshold: float, loc_model: str, llm_model: str):
	if not frame or not question.strip():
	gr.Warning("Please provide both an image and a question")
	return "", None, None

	model = load_model(loc_model, llm_model)
	return model.localize(frame, question, threshold=threshold)


	title = """
	<center>

	<h1> 🔍 Language Localization </h1>
	<b> Upload an image and ask questions to find objects in it. <b>

	</center>
	"""


	with gr.Blocks() as demo:
	gr.HTML(title)

	with gr.Row():

	with gr.Column(scale=1):
	frame_input = gr.Image(type="pil", label="Upload Frame")

	with gr.Column(scale=1):
	with gr.Row():
	with gr.Column(scale=1):

	loc_model_input = gr.Dropdown(
	choices=["yolo", "owl"],
	value="yolo",
	label="Localization Model",
	)
	with gr.Column(scale=2):

	llm_model_input = gr.Dropdown(
	choices=[
	"Qwen/Qwen2.5-7B-Instruct",
	"OpenGVLab/InternVL2_5-8B",
	"OpenGVLab/InternVL2_5-4B",
	"OpenGVLab/InternVL2_5-2B",
	"OpenGVLab/InternVL2_5-1B",
	],
	value="Qwen/Qwen2.5-7B-Instruct",
	label="LLM Model",
	)
	threshold_input = gr.Slider(minimum=0, maximum=1, value=0.4, step=0.1, label="Threshold")
	question_input = gr.Textbox(lines=2, placeholder="Enter your question here", label="Question")
	objs = gr.Highlightedtext(show_legend=False, show_inline_category=False, color_map=text_palette, label="Objects Found")
	submit_btn = gr.Button("Submit")

	with gr.Row():
	all_bbox_image = gr.Image(label="Found Objects")
	llm_bbox_image = gr.Image(label="Selected Objects")

	submit_btn.click(
	fn=predict,
	inputs=[frame_input, question_input, threshold_input, loc_model_input, llm_model_input],
	outputs=[objs, all_bbox_image, llm_bbox_image],
	)
	with gr.Row():
	examples = gr.Examples(
	examples=[
	["assets/demo.jpeg", "I'm thirsty"],
	["assets/kitchen.webp", "The food has expired and is no longer safe to eat."],
	["assets/kitchen.webp", "The food is about to expire."],
	],
	inputs=[frame_input, question_input],
	)
	if __name__ == "__main__":
	demo.launch()