Spaces:
Paused
Paused
File size: 3,102 Bytes
872630d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import { pipeline } from "@xenova/transformers";
export class SimpleVectorStore {
constructor() {
this.documents = [];
this.embeddings = [];
}
addDocument(embedding, document) {
this.embeddings.push(embedding);
this.documents.push(document);
}
async similaritySearch(queryEmbedding, topK) {
let scores = this.embeddings.map((emb, index) => ({
score: cosineSimilarity(emb, queryEmbedding),
index: index
}));
// these are empty?
console.log('similaritySearch', queryEmbedding, scores, this.embeddings);
scores.sort((a, b) => b.score - a.score);
return scores.slice(0, topK).map(score => ({
document: this.documents[score.index],
score: score.score
}));
}
}
export function cosineSimilarity(vecA, vecB) {
console.log('cosineSimilarity', vecA, vecB);
const dotProduct = vecA.reduce((acc, val, i) => acc + val * vecB[i], 0);
const magA = Math.sqrt(vecA.reduce((acc, val) => acc + val * val, 0));
const magB = Math.sqrt(vecB.reduce((acc, val) => acc + val * val, 0));
return dotProduct / (magA * magB);
}
class EmbeddingsWorker {
constructor(modelName = "Xenova/all-MiniLM-L6-v2") {
this.modelName = modelName;
this.client = null;
this.vectorStore = new SimpleVectorStore();
}
async loadClient() {
if (!this.client) {
this.client = await pipeline("embeddings", this.modelName);
}
}
async _embed(texts) {
await this.loadClient();
return Promise.all(
texts.map(async (text) => {
const response = await this.client(text, {
pooling: "mean",
normalize: true
});
return response.data;
})
);
console.log("Embeddings: ", embeddings); // Debugging: Check embeddings
}
async addDocumentsToStore(docs) {
const embeddings = await this._embed(docs);
embeddings.forEach((embedding, index) => {
console.log(embedding, index);
this.vectorStore.addDocument(embedding, docs[index]);
});
}
async searchSimilarDocuments(query, topK) {
const queryEmbedding = await this._embed([query]);
console.log(queryEmbedding);
return this.vectorStore.similaritySearch(queryEmbedding[0], topK);
}
}
function testVectorStore() {
const store = new SimpleVectorStore();
// Mock embeddings (simple vectors for testing)
const mockEmbeddings = [
[1, 0, 0],
[0, 1, 0],
[0, 0, 1]
];
// Add mock embeddings to the store
mockEmbeddings.forEach((emb, index) => {
store.addDocument(emb, `Document ${index + 1}`);
});
// Test cosine similarity directly
const cosSimTest = cosineSimilarity([1, 0, 0], [0, 1, 0]);
console.log('Cosine Similarity Test:', cosSimTest); // Should be 0 for orthogonal vectors
// Perform a similarity search
const results = store.similaritySearch([1, 0, 0], 2);
console.log('Similarity Search Results:', results);
}
// Run the test function
testVectorStore(); |