Spaces:

sayakpaul
/

grade_images_with_gemini

Running

App Files Files Community

grade_images_with_gemini / app.py

sayakpaul HF staff

updates

c2cc0af about 1 month ago

raw

history blame contribute delete

2.72 kB

	import gradio as gr
	from google import genai
	from utils import *
	from PIL import Image
	import os


	client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
	system_instruction = load_verifier_prompt()
	generation_config = types.GenerateContentConfig(
	system_instruction=system_instruction,
	response_mime_type="application/json",
	response_schema=list[Grading],
	seed=1994,
	)


	def make_inputs(prompt, image):
	inputs = []
	inputs.extend(prepare_inputs(prompt=prompt, image=image))
	return inputs


	def format_response(response: dict):
	out = ""
	for key, value in response.items():
	score = f"* {key}: {value['score']} (explanation: {value['explanation']})\n"
	out += score
	return out


	def grade(prompt, image):
	inputs = make_inputs(prompt, image)
	response = client.models.generate_content(
	model="gemini-2.0-flash", contents=types.Content(parts=inputs, role="user"), config=generation_config
	)
	parsed_response = response.parsed[0]
	return format_response(parsed_response)


	examples = [
	["realistic photo a shiny black SUV car with a mountain in the background.", Image.open("car.jpg")],
	["photo a green and funny creature standing in front a lightweight forest.", Image.open("green_creature.jpg")],
	]

	css = """
	#col-container {
	margin: 0 auto;
	max-width: 520px;
	}
	"""

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown(
	f"""# Grade images with Gemini 2.0 Flash

	Following aspects are considered during grading:

	* Accuracy to Prompt
	* Creativity and Originality
	* Visual Quality and Realism
	* Consistency and Cohesion
	* Emotional or Thematic Resonance

	The [system prompt](./verifier_prompt.txt) comes from the paper: [Inference-Time Scaling for Diffusion Models beyond Scaling Denoising Steps](https://arxiv.org/abs/2501.09732).
	"""
	)

	with gr.Row():
	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter the prompt that generated the image to be graded.",
	container=False,
	)
	run_button = gr.Button("Run", scale=0)

	image = gr.Image(format="png", type="pil", label="Image", placeholder="The image to be graded.")

	result = gr.Markdown(label="Grading Output")

	gr.Examples(examples=examples, fn=grade, inputs=[prompt, image], outputs=[result], cache_examples=True)

	gr.on(triggers=[run_button.click, prompt.submit], fn=grade, inputs=[prompt, image], outputs=[result])

	demo.launch()