Spaces:
Runtime error
Runtime error
File size: 1,694 Bytes
c69cba4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import pytest
from typing import Any
from huggingface_hub import snapshot_download
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
snapshot_download(
repo_id='KonradSzafer/index-large-notebooks',
allow_patterns=['*.faiss', '*.pkl'],
repo_type='dataset',
local_dir='indexes/'
)
@pytest.fixture(scope='module')
def embedding_model() -> HuggingFaceInstructEmbeddings:
model_name = 'hkunlp/instructor-large'
embed_instruction = 'Represent the Hugging Face library documentation'
query_instruction = 'Query the most relevant piece of information from the Hugging Face documentation'
return HuggingFaceInstructEmbeddings(
model_name=model_name,
embed_instruction=embed_instruction,
query_instruction=query_instruction,
)
@pytest.fixture(scope='module')
def index_path() -> str:
return 'indexes/'
@pytest.fixture(scope='module')
def index(embedding_model: HuggingFaceInstructEmbeddings, index_path: str):
return FAISS.load_local(index_path, embedding_model)
@pytest.fixture(scope='module')
def query() -> str:
return 'How to use the tokenizer?'
def test_load_index(embedding_model: HuggingFaceInstructEmbeddings, index_path: str):
index = FAISS.load_local(index_path, embedding_model)
assert index is not None, 'Failed to load index'
def test_index_page_content(index, query: str):
query_docs = index.similarity_search(query=query, k=3)
assert isinstance(query_docs[0].page_content, str)
def test_index_metadata(index, query):
query_docs = index.similarity_search(query=query, k=3)
assert isinstance(query_docs[0].metadata['source'], str)
|