|
export async function getEmbeddings( |
|
worker, |
|
weightsURL, |
|
tokenizerURL, |
|
configURL, |
|
modelID, |
|
sentences, |
|
updateStatus = null |
|
) { |
|
return new Promise((resolve, reject) => { |
|
worker.postMessage({ |
|
weightsURL, |
|
tokenizerURL, |
|
configURL, |
|
modelID, |
|
sentences, |
|
}); |
|
function messageHandler(event) { |
|
if ("error" in event.data) { |
|
worker.removeEventListener("message", messageHandler); |
|
reject(new Error(event.data.error)); |
|
} |
|
if (event.data.status === "complete") { |
|
worker.removeEventListener("message", messageHandler); |
|
resolve(event.data); |
|
} |
|
if (updateStatus) updateStatus(event.data); |
|
} |
|
worker.addEventListener("message", messageHandler); |
|
}); |
|
} |
|
|
|
const MODELS = { |
|
intfloat_e5_small_v2: { |
|
base_url: "https://huggingface.co/intfloat/e5-small-v2/resolve/main/", |
|
search_prefix: "query: ", |
|
document_prefix: "passage: ", |
|
}, |
|
intfloat_e5_base_v2: { |
|
base_url: "https://huggingface.co/intfloat/e5-base-v2/resolve/main/", |
|
search_prefix: "query: ", |
|
document_prefix: "passage:", |
|
}, |
|
intfloat_multilingual_e5_small: { |
|
base_url: |
|
"https://huggingface.co/intfloat/multilingual-e5-small/resolve/main/", |
|
search_prefix: "query: ", |
|
document_prefix: "passage: ", |
|
}, |
|
sentence_transformers_all_MiniLM_L6_v2: { |
|
base_url: |
|
"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/refs%2Fpr%2F21/", |
|
search_prefix: "", |
|
document_prefix: "", |
|
}, |
|
sentence_transformers_all_MiniLM_L12_v2: { |
|
base_url: |
|
"https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/refs%2Fpr%2F4/", |
|
search_prefix: "", |
|
document_prefix: "", |
|
}, |
|
gte_tiny: { |
|
base_url: "https://huggingface.co/TaylorAI/gte-tiny/resolve/refs%2Fpr%2F2/", |
|
search_prefix: "", |
|
document_prefix: "", |
|
}, |
|
}; |
|
export function getModelInfo(id) { |
|
return { |
|
modelURL: MODELS[id].base_url + "model.safetensors", |
|
configURL: MODELS[id].base_url + "config.json", |
|
tokenizerURL: MODELS[id].base_url + "tokenizer.json", |
|
search_prefix: MODELS[id].search_prefix, |
|
document_prefix: MODELS[id].document_prefix, |
|
}; |
|
} |
|
|
|
export function cosineSimilarity(vec1, vec2) { |
|
const dot = vec1.reduce((acc, val, i) => acc + val * vec2[i], 0); |
|
const a = Math.sqrt(vec1.reduce((acc, val) => acc + val * val, 0)); |
|
const b = Math.sqrt(vec2.reduce((acc, val) => acc + val * val, 0)); |
|
return dot / (a * b); |
|
} |
|
export async function getWikiText(article) { |
|
|
|
const URL = `https://en.wikipedia.org/w/api.php?action=query&prop=extracts&exlimit=1&titles=${article}&explaintext=1&exsectionformat=plain&format=json&origin=*`; |
|
return fetch(URL, { |
|
method: "GET", |
|
headers: { |
|
Accept: "application/json", |
|
}, |
|
}) |
|
.then((r) => r.json()) |
|
.then((data) => { |
|
const pages = data.query.pages; |
|
const pageId = Object.keys(pages)[0]; |
|
const extract = pages[pageId].extract; |
|
if (extract === undefined || extract === "") { |
|
throw new Error("No article found"); |
|
} |
|
return extract; |
|
}) |
|
.catch((error) => console.error("Error:", error)); |
|
} |
|
|