from transformers import AutoProcessor, AutoModelForImageTextToText, MllamaForConditionalGeneration import torch from threading import Thread import gradio as gr import spaces from PIL import Image from openai import OpenAI import os client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # from zipnn import zipnn_hf # zipnn_hf() # model_id = "royleibov/Llama-3.2-11B-Vision-Instruct-ZipNN-Compressed" model_id = "unsloth/Llama-3.2-11B-Vision" model = AutoModelForImageTextToText.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map="auto", ) processor = AutoProcessor.from_pretrained(model_id) @spaces.GPU def ocr(input_img): # Convert numpy array to PIL Image pil_image = Image.fromarray(input_img.astype('uint8')) # Now use the PIL Image as before image = pil_image.convert("RGB").resize((224, 224)) prompt = "<|begin_of_text|><|image|>this script says" inputs = processor(image, prompt, return_tensors="pt").to(model.device) output = model.generate(**inputs, max_new_tokens=200) res = processor.decode(output[0]) return res[len(prompt):res.find("") - 5] prompt = '''Review the following essay and score it. The output format is: Score: ** Reason: ... Suggestions: ... ''' def score_with_gpt(text): messages = [ {"role": "system", "content": "You are a high school English teacher," + "and you are grading a student's English essay. You are given a student's essay, and you need to score it based on the following criteria:" + "1. The essay is well-organized and flows logically." + "2. The essay is well-written and uses correct grammar and punctuation." + "3. The words are spelled correctly." + "4. The words and phrases are appropriate for the context." + "5. Scale the score from 0 to 100." }, {"role": "user", "content": f"{prompt}, The essay is: \"{text} \""} ] response = client.chat.completions.create( model="gpt-4o-mini", # or "gpt-3.5-turbo" messages=messages, max_tokens=500, ) return response.choices[0].message.content def score_it(input_img): text = ocr(input_img) return text, score_with_gpt(text) demo = gr.Interface(fn=score_it, title="Upload your English script and get the score", inputs=[gr.Image()], outputs=[gr.Textbox(label="Text", lines=10), gr.Markdown( label="Score", show_label=True)], stop_btn="Stop Generation", ) demo.launch(debug=True)