import { MgpstrForSceneTextRecognition, MgpstrProcessor, RawImage } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@3.1.0'; // Since we will download the model from the Hugging Face Hub, we can skip the local model check env.allowLocalModels = false; // Reference the elements that we will need const status = document.getElementById('status'); const fileUpload = document.getElementById('upload'); const imageContainer = document.getElementById('container'); const example = document.getElementById('example'); // Load Model status.textContent = 'Loading model...'; const model_id = 'onnx-community/mgp-str-base'; const model = await MgpstrForSceneTextRecognition.from_pretrained(model_id); const processor = await MgpstrProcessor.from_pretrained(model_id); status.textContent = 'Ready'; // Load image from the IIIT-5k dataset const EXAMPLE_URL = "https://i.postimg.cc/ZKwLg2Gw/367-14.png"; example.addEventListener('click', (e) => { e.preventDefault(); detect(EXAMPLE_URL); }); fileUpload.addEventListener('change', function (e) { const file = e.target.files[0]; if (!file) { return; } const reader = new FileReader(); // Set up a callback when the file is loaded reader.onload = e2 => detect(e2.target.result); reader.readAsDataURL(file); }); // Detect objects in the image async function detect(img) { imageContainer.innerHTML = ''; imageContainer.style.backgroundImage = `url(${img})`; status.textContent = 'Analysing...'; const image = await RawImage.read(img); // Preprocess the image const result = await processor(image); // Perform inference const outputs = await model(result); // Decode the model outputs const generated_text = processor.batch_decode(outputs.logits).generated_text; status.textContent = generated_text; }