Spaces:
Running
Running
import duckdb | |
import polars as pl | |
from datasets import load_dataset | |
from model2vec import StaticModel | |
# Load a model from the HuggingFace hub (in this case the potion-base-8M model) | |
model_name = "minishlab/M2V_multilingual_output" | |
model = StaticModel.from_pretrained(model_name) | |
# Make embeddings | |
ds = load_dataset("fka/awesome-chatgpt-prompts") | |
df = ds["train"].to_polars() | |
embeddings = model.encode(df["prompt"]) | |
df = df.with_columns(pl.Series(embeddings).alias("embeddings")) | |
vector = model.encode("vector search", show_progress_bar=True) | |
duckdb.sql( | |
query=f""" | |
SELECT * | |
FROM df | |
ORDER BY array_distance(embeddings, {vector.tolist()}::FLOAT[256]) | |
LIMIT 1 | |
""" | |
).show() | |