Spaces:
Sleeping
Sleeping
import logging | |
from course_search.search_system.embeddings import EmbeddingGenerator | |
from course_search.search_system.vector_store import FAISSManager | |
import pandas as pd | |
import numpy as np | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
def test_pipeline(): | |
try: | |
# Test data | |
test_data = pd.DataFrame({ | |
'title': ['Test Course 1', 'Test Course 2'], | |
'description': ['This is a test course about Python', 'This is a test course about ML'], | |
'url': ['http://test1.com', 'http://test2.com'] | |
}) | |
# Test embedding generation | |
logger.info("Testing embedding generation...") | |
embedding_gen = EmbeddingGenerator() | |
test_data = embedding_gen.add_embeddings_to_df(test_data) | |
assert 'embeddings' in test_data.columns | |
logger.info("Embedding generation successful!") | |
# Test FAISS storage | |
logger.info("Testing FAISS storage...") | |
vector_store = FAISSManager() | |
vector_store.upsert_courses(test_data) | |
logger.info("FAISS storage successful!") | |
# Test search | |
logger.info("Testing search...") | |
query = "Python programming" | |
query_embedding = embedding_gen.generate_embeddings(pd.Series([query]))[0] | |
results = vector_store.search_courses(query_embedding) | |
assert len(results) > 0 | |
logger.info("Search successful!") | |
return True | |
except Exception as e: | |
logger.error(f"Pipeline test failed: {str(e)}") | |
return False | |
if __name__ == "__main__": | |
test_pipeline() |