Update index.js
Browse files
index.js
CHANGED
@@ -31,7 +31,7 @@ let currentQuery = '';
|
|
31 |
|
32 |
async function initializeSessions() {
|
33 |
status.textContent = 'Loading model...';
|
34 |
-
|
35 |
ortSessionA = await ort.InferenceSession.create(
|
36 |
await getModelFile(ONNX_MODEL, `onnx/QwenVL_A_${QUANT}.onnx`),
|
37 |
{ executionProviders: ["webgpu"] }
|
@@ -51,14 +51,14 @@ async function initializeSessions() {
|
|
51 |
await getModelFile(ONNX_MODEL, `onnx/QwenVL_D_${QUANT}.onnx`),
|
52 |
{
|
53 |
executionProviders: ["webgpu"],
|
54 |
-
}
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
);
|
63 |
|
64 |
|
@@ -126,11 +126,11 @@ export function float16ToInt64(float16Value) {
|
|
126 |
|
127 |
|
128 |
async function handleQuery(imageUrl, query) {
|
129 |
-
console.log('handleQuery', {imageUrl}, {query});
|
130 |
-
|
131 |
try {
|
132 |
status.textContent = 'Analyzing...';
|
133 |
-
|
134 |
const result = await imageTextToText(imageUrl, query, (out) => {
|
135 |
output.textContent = out;
|
136 |
});
|
@@ -160,9 +160,9 @@ export async function imageTextToText(
|
|
160 |
"float16",
|
161 |
new Uint16Array(
|
162 |
config.num_hidden_layers *
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
).fill(0),
|
167 |
[
|
168 |
config.num_hidden_layers,
|
@@ -181,7 +181,7 @@ export async function imageTextToText(
|
|
181 |
);
|
182 |
|
183 |
let pos_factor = new Tensor("float16", new Uint16Array([0]), [1]);
|
184 |
-
|
185 |
const tokenizer = await AutoTokenizer.from_pretrained(BASE_MODEL);
|
186 |
const prompt = `\n<|im_start|>user\n<|vision_start|><|vision_end|>${query}<|im_end|>\n<|im_start|>assistant\n`;
|
187 |
const token = await tokenizer(prompt, {
|
@@ -249,7 +249,7 @@ export async function imageTextToText(
|
|
249 |
|
250 |
await ortSessionA.release();
|
251 |
ortSessionA = null;
|
252 |
-
|
253 |
({ hidden_states, position_ids } = await ortSessionD.run({
|
254 |
"hidden_states.1": hidden_states,
|
255 |
image_embed,
|
@@ -333,7 +333,7 @@ export async function imageTextToText(
|
|
333 |
} else {
|
334 |
const decoded = tokenizer.decode([...token_id.data]);
|
335 |
cb(output);
|
336 |
-
|
337 |
output += decoded;
|
338 |
}
|
339 |
}
|
@@ -378,7 +378,7 @@ promptInput.addEventListener('keypress', (e) => {
|
|
378 |
|
379 |
form.addEventListener('submit', (e) => {
|
380 |
e.preventDefault();
|
381 |
-
|
382 |
if (!currentImage || !currentQuery) {
|
383 |
status.textContent = 'Please select an image and type a prompt';
|
384 |
} else {
|
|
|
31 |
|
32 |
async function initializeSessions() {
|
33 |
status.textContent = 'Loading model...';
|
34 |
+
|
35 |
ortSessionA = await ort.InferenceSession.create(
|
36 |
await getModelFile(ONNX_MODEL, `onnx/QwenVL_A_${QUANT}.onnx`),
|
37 |
{ executionProviders: ["webgpu"] }
|
|
|
51 |
await getModelFile(ONNX_MODEL, `onnx/QwenVL_D_${QUANT}.onnx`),
|
52 |
{
|
53 |
executionProviders: ["webgpu"],
|
54 |
+
};
|
55 |
|
56 |
+
ortSessionE = await ort.InferenceSession.create(
|
57 |
+
await getModelFile(ONNX_MODEL, `onnx/QwenVL_E_${QUANT}.onnx`),
|
58 |
+
{
|
59 |
+
executionProviders: ["wasm"],
|
60 |
+
},
|
61 |
+
);
|
62 |
);
|
63 |
|
64 |
|
|
|
126 |
|
127 |
|
128 |
async function handleQuery(imageUrl, query) {
|
129 |
+
console.log('handleQuery', { imageUrl }, { query });
|
130 |
+
|
131 |
try {
|
132 |
status.textContent = 'Analyzing...';
|
133 |
+
|
134 |
const result = await imageTextToText(imageUrl, query, (out) => {
|
135 |
output.textContent = out;
|
136 |
});
|
|
|
160 |
"float16",
|
161 |
new Uint16Array(
|
162 |
config.num_hidden_layers *
|
163 |
+
config.num_key_value_heads *
|
164 |
+
MAX_SEQ_LENGTH *
|
165 |
+
(config.hidden_size / config.num_attention_heads)
|
166 |
).fill(0),
|
167 |
[
|
168 |
config.num_hidden_layers,
|
|
|
181 |
);
|
182 |
|
183 |
let pos_factor = new Tensor("float16", new Uint16Array([0]), [1]);
|
184 |
+
|
185 |
const tokenizer = await AutoTokenizer.from_pretrained(BASE_MODEL);
|
186 |
const prompt = `\n<|im_start|>user\n<|vision_start|><|vision_end|>${query}<|im_end|>\n<|im_start|>assistant\n`;
|
187 |
const token = await tokenizer(prompt, {
|
|
|
249 |
|
250 |
await ortSessionA.release();
|
251 |
ortSessionA = null;
|
252 |
+
|
253 |
({ hidden_states, position_ids } = await ortSessionD.run({
|
254 |
"hidden_states.1": hidden_states,
|
255 |
image_embed,
|
|
|
333 |
} else {
|
334 |
const decoded = tokenizer.decode([...token_id.data]);
|
335 |
cb(output);
|
336 |
+
|
337 |
output += decoded;
|
338 |
}
|
339 |
}
|
|
|
378 |
|
379 |
form.addEventListener('submit', (e) => {
|
380 |
e.preventDefault();
|
381 |
+
|
382 |
if (!currentImage || !currentQuery) {
|
383 |
status.textContent = 'Please select an image and type a prompt';
|
384 |
} else {
|