from transformers import AutoProcessor, AutoModelForImageTextToText, MllamaForConditionalGeneration
import torch
from threading import Thread
import gradio as gr
import spaces
from PIL import Image
from openai import OpenAI
import os

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# from zipnn import zipnn_hf

# zipnn_hf()

# model_id = "royleibov/Llama-3.2-11B-Vision-Instruct-ZipNN-Compressed"
model_id = "unsloth/Llama-3.2-11B-Vision"

model = AutoModelForImageTextToText.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)


@spaces.GPU
def ocr(input_img):
    # Convert numpy array to PIL Image
    pil_image = Image.fromarray(input_img.astype('uint8'))

    # Now use the PIL Image as before
    image = pil_image.convert("RGB").resize((224, 224))

    prompt = "<|begin_of_text|><|image|>this script says"
    inputs = processor(image, prompt, return_tensors="pt").to(model.device)

    output = model.generate(**inputs, max_new_tokens=200)
    res = processor.decode(output[0])
    return res[len(prompt):res.find("<ORC>") - 5]


prompt = '''Review the following essay and score it. The output format is:
Score: *<score>*
Reason: ...
Suggestions: ...
'''

def score_with_gpt(text):
    messages = [
        {"role": "system", "content":
            "You are a high school English teacher,"
            + "and you are grading a student's English essay. You are given a student's essay, and you need to score it based on the following criteria:"
            + "1. The essay is well-organized and flows logically."
            + "2. The essay is well-written and uses correct grammar and punctuation."
            + "3. The words are spelled correctly."
            + "4. The words and phrases are appropriate for the context."
            + "5. Scale the score from 0 to 100."
            },
        {"role": "user", "content": f"{prompt}, The essay is: \"{text} \""}
    ]

    response = client.chat.completions.create(
        model="gpt-4o-mini",  # or "gpt-3.5-turbo"
        messages=messages,
        max_tokens=500,
    )
    return response.choices[0].message.content


def score_it(input_img):
    text = ocr(input_img)
    return text, score_with_gpt(text)


demo = gr.Interface(fn=score_it, title="Upload your English script and get the score",
                    inputs=[gr.Image()],
                    outputs=[gr.Textbox(label="Text", lines=10), gr.Markdown(
                        label="Score", show_label=True)],
                    stop_btn="Stop Generation",
                    )

demo.launch(debug=True)