radames's picture
Update index.html
5744525 verified
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<style>
@import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap");
html,
body {
font-family: "Source Sans 3", sans-serif;
}
</style>
<title>Candle Blip Image Captioning Demo</title>
<script src="https://cdn.tailwindcss.com/3.4.3"></script>
<script type="module" src="./code.js"></script>
<script type="module">
const MODELS = {
blip_image_quantized_q4k: {
base_url: "https://huggingface.co/lmz/candle-blip/resolve/main/",
model: "blip-image-captioning-large-q4k.gguf",
config: "config.json",
tokenizer: "tokenizer.json",
quantized: true,
size: "271 MB",
},
blip_image_quantized_q80: {
base_url: "https://huggingface.co/lmz/candle-blip/resolve/main/",
model: "blip-image-captioning-large-q80.gguf",
config: "config.json",
tokenizer: "tokenizer.json",
quantized: true,
size: "505 MB",
},
blip_image_large: {
base_url:
"https://huggingface.co/Salesforce/blip-image-captioning-large/resolve/refs%2Fpr%2F18/",
model: "model.safetensors",
config: "config.json",
tokenizer: "tokenizer.json",
quantized: false,
size: "1.88 GB",
},
};
const blipWorker = new Worker("./blipWorker.js", {
type: "module",
});
const outputStatusEl = document.querySelector("#output-status");
const outputCaptionEl = document.querySelector("#output-caption");
const modelSelectEl = document.querySelector("#model");
const clearBtn = document.querySelector("#clear-btn");
const fileUpload = document.querySelector("#file-upload");
const dropArea = document.querySelector("#drop-area");
const imagesExamples = document.querySelector("#image-select");
const canvas = document.querySelector("#canvas");
const ctxCanvas = canvas.getContext("2d");
let isCaptioning = false;
let currentImageURL = null;
clearBtn.addEventListener("click", () => {
clearImageCanvas();
});
modelSelectEl.addEventListener("change", () => {
if (currentImageURL) {
runInference(currentImageURL);
}
});
//add event listener to file input
fileUpload.addEventListener("input", async (e) => {
const target = e.target;
if (target.files.length > 0) {
const href = URL.createObjectURL(target.files[0]);
clearImageCanvas();
await drawImageCanvas(href);
runInference(href);
}
});
// add event listener to drop-area
dropArea.addEventListener("dragenter", (e) => {
e.preventDefault();
dropArea.classList.add("border-blue-700");
});
dropArea.addEventListener("dragleave", (e) => {
e.preventDefault();
dropArea.classList.remove("border-blue-700");
});
dropArea.addEventListener("dragover", (e) => {
e.preventDefault();
});
dropArea.addEventListener("drop", async (e) => {
e.preventDefault();
dropArea.classList.remove("border-blue-700");
const url = e.dataTransfer.getData("text/uri-list");
const files = e.dataTransfer.files;
if (files.length > 0) {
const href = URL.createObjectURL(files[0]);
clearImageCanvas();
await drawImageCanvas(href);
runInference(href);
} else if (url) {
clearImageCanvas();
await drawImageCanvas(url);
runInference(url);
}
});
imagesExamples.addEventListener("click", async (e) => {
if (isCaptioning) {
return;
}
const target = e.target;
if (target.nodeName === "IMG") {
const href = target.src;
clearImageCanvas();
await drawImageCanvas(href);
runInference(href);
}
});
function clearImageCanvas() {
ctxCanvas.clearRect(0, 0, canvas.width, canvas.height);
isCaptioning = false;
clearBtn.disabled = true;
canvas.parentElement.style.height = "auto";
outputStatusEl.hidden = false;
outputCaptionEl.hidden = true;
outputStatusEl.innerText = "Please select an image";
currentImageURL = null;
}
async function drawImageCanvas(imgURL) {
if (!imgURL) {
throw new Error("No image URL provided");
}
return new Promise((resolve, reject) => {
ctxCanvas.clearRect(0, 0, canvas.width, canvas.height);
ctxCanvas.clearRect(0, 0, canvas.width, canvas.height);
const img = new Image();
img.crossOrigin = "anonymous";
img.onload = () => {
canvas.width = img.width;
canvas.height = img.height;
ctxCanvas.drawImage(img, 0, 0);
canvas.parentElement.style.height = canvas.offsetHeight + "px";
clearBtn.disabled = false;
resolve(img);
};
img.src = imgURL;
currentImageURL = imgURL;
});
}
document.addEventListener("DOMContentLoaded", () => {
for (const [id, model] of Object.entries(MODELS)) {
const option = document.createElement("option");
option.value = id;
option.innerText = `${id} (${model.size})`;
modelSelectEl.appendChild(option);
}
});
async function getImageCaption(
worker,
weightsURL,
tokenizerURL,
configURL,
modelID,
imageURL,
quantized,
updateStatus = null
) {
return new Promise((resolve, reject) => {
worker.postMessage({
weightsURL,
tokenizerURL,
configURL,
modelID,
imageURL,
quantized,
});
function messageHandler(event) {
if ("error" in event.data) {
worker.removeEventListener("message", messageHandler);
reject(new Error(event.data.error));
}
if (event.data.status === "complete") {
worker.removeEventListener("message", messageHandler);
resolve(event.data);
}
if (updateStatus) updateStatus(event.data);
}
worker.addEventListener("message", messageHandler);
});
}
function updateStatus(data) {
if (data.status === "status") {
outputStatusEl.innerText = data.message;
}
}
async function runInference(imageURL) {
if (isCaptioning || !imageURL) {
alert("Please select an image first");
return;
}
outputStatusEl.hidden = false;
outputCaptionEl.hidden = true;
clearBtn.disabled = true;
modelSelectEl.disabled = true;
isCaptioning = true;
const selectedModel = modelSelectEl.value;
const model = MODELS[selectedModel];
const weightsURL = `${model.base_url}${model.model}`;
const tokenizerURL = `${model.base_url}${model.tokenizer}`;
const configURL = `${model.base_url}${model.config}`;
const quantized = model.quantized;
try {
const time = performance.now();
const caption = await getImageCaption(
blipWorker,
weightsURL,
tokenizerURL,
configURL,
selectedModel,
imageURL,
quantized,
updateStatus
);
outputStatusEl.hidden = true;
outputCaptionEl.hidden = false;
const totalTime = ((performance.now() - time)/1000).toFixed(2);
outputCaptionEl.innerHTML = `${
caption.output
}<br/><span class="text-xs">Inference time: ${totalTime} s</span>`;
} catch (err) {
console.error(err);
outputStatusEl.hidden = false;
outputCaptionEl.hidden = true;
outputStatusEl.innerText = err.message;
}
clearBtn.disabled = false;
modelSelectEl.disabled = false;
isCaptioning = false;
}
</script>
</head>
<body class="container max-w-4xl mx-auto p-4">
<main class="grid grid-cols-1 gap-5 relative">
<span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
<div>
<h1 class="text-5xl font-bold">Candle BLIP Image Captioning</h1>
<h2 class="text-2xl font-bold">Rust/WASM Demo</h2>
<p class="max-w-lg">
<a
href="https://huggingface.co/Salesforce/blip-image-captioning-large"
target="_blank"
class="underline hover:text-blue-500 hover:no-underline"
>BLIP Image Captioning
</a>
running in the browser using
<a
href="https://github.com/huggingface/candle/"
target="_blank"
class="underline hover:text-blue-500 hover:no-underline"
>Candle</a
>, a minimalist ML framework for Rust.
</p>
<p class="text-xs max-w-lg py-2">
<b>Note:</b>
The image captioning on the smallest model takes about ~50 seconds, it
will vary depending on your machine and model size.
</p>
</div>
<div>
<label for="model" class="font-medium block">Models Options: </label>
<select
id="model"
class="border-2 border-gray-500 rounded-md font-light interactive disabled:cursor-not-allowed w-full max-w-max"
></select>
</div>
<!-- drag and drop area -->
<div class="grid gap-4 sm:grid-cols-2 py-4">
<div class="relative max-w-lg">
<div
class="absolute w-full bottom-full flex justify-between items-center"
>
<div class="flex gap-2 w-full">
<button
id="clear-btn"
disabled
title="Clear Image"
class="ml-auto text-xs bg-white rounded-md disabled:opacity-50 flex gap-1 items-center"
>
<svg
class=""
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 13 12"
height="1em"
>
<path
d="M1.6.7 12 11.1M12 .7 1.6 11.1"
stroke="#2E3036"
stroke-width="2"
/>
</svg>
</button>
</div>
</div>
<div
id="drop-area"
class="flex flex-col items-center justify-center border-2 border-gray-300 border-dashed rounded-xl relative aspect-video w-full overflow-hidden"
>
<div
class="flex flex-col items-center justify-center space-y-1 text-center"
>
<svg
width="25"
height="25"
viewBox="0 0 25 25"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z"
fill="#000"
/>
</svg>
<div class="flex text-sm text-gray-600">
<label
for="file-upload"
class="relative cursor-pointer bg-white rounded-md font-medium text-blue-950 hover:text-blue-700"
>
<span>Drag and drop y our image here</span>
<span class="block text-xs">or</span>
<span class="block text-xs">Click to upload</span>
</label>
</div>
<input
id="file-upload"
name="file-upload"
type="file"
class="sr-only"
/>
</div>
<canvas
id="canvas"
class="absolute pointer-events-none w-full"
></canvas>
</div>
</div>
<div class="">
<div
class="h-full bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2"
>
<p
id="output-caption"
class="m-auto text-xl text-center p-2"
hidden
></p>
<span id="output-status" class="m-auto font-light">
Please select an image
</span>
</div>
</div>
</div>
<div>
<div
class="flex gap-3 items-center overflow-x-scroll"
id="image-select"
>
<h3 class="font-medium">Examples:</h3>
<img
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/sf.jpg"
class="cursor-pointer w-24 h-24 object-cover"
/>
<img
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/bike.jpeg"
class="cursor-pointer w-24 h-24 object-cover"
/>
<img
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/000000000077.jpg"
class="cursor-pointer w-24 h-24 object-cover"
/>
</div>
</div>
</main>
</body>
</html>