|
import typing_extensions as typing |
|
from PIL import Image |
|
import io |
|
from google.genai import types |
|
|
|
|
|
class Score(typing.TypedDict): |
|
score: float |
|
explanation: str |
|
|
|
|
|
class Grading(typing.TypedDict): |
|
accuracy_to_prompt: Score |
|
creativity_and_originality: Score |
|
visual_quality_and_realism: Score |
|
consistency_and_cohesion: Score |
|
emotional_or_thematic_resonance: Score |
|
overall_score: Score |
|
|
|
|
|
def convert_to_bytes(image: Image.Image) -> bytes: |
|
image_bytes_io = io.BytesIO() |
|
image.save(image_bytes_io, format="PNG") |
|
return image_bytes_io.getvalue() |
|
|
|
|
|
def prepare_inputs(prompt: str, image: Image.Image): |
|
"""Prepare inputs for the API from a given prompt and image.""" |
|
inputs = [ |
|
types.Part.from_text(text=prompt), |
|
types.Part.from_bytes(data=convert_to_bytes(image), mime_type="image/png"), |
|
] |
|
return inputs |
|
|
|
|
|
def load_verifier_prompt(): |
|
"""Loads the system prompt for Gemini when it acts as a verifier to grade images.""" |
|
with open("verifier_prompt.txt", "r") as f: |
|
verifier_prompt = f.read().replace('"""', "") |
|
|
|
return verifier_prompt |
|
|