course-search-av / tests /test_complete_pipeline.py
Rohil Bansal
commit
2ed2129
import logging
from course_search.search_system.embeddings import EmbeddingGenerator
from course_search.search_system.vector_store import FAISSManager
import pandas as pd
import numpy as np
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_pipeline():
try:
# Test data
test_data = pd.DataFrame({
'title': ['Test Course 1', 'Test Course 2'],
'description': ['This is a test course about Python', 'This is a test course about ML'],
'url': ['http://test1.com', 'http://test2.com']
})
# Test embedding generation
logger.info("Testing embedding generation...")
embedding_gen = EmbeddingGenerator()
test_data = embedding_gen.add_embeddings_to_df(test_data)
assert 'embeddings' in test_data.columns
logger.info("Embedding generation successful!")
# Test FAISS storage
logger.info("Testing FAISS storage...")
vector_store = FAISSManager()
vector_store.upsert_courses(test_data)
logger.info("FAISS storage successful!")
# Test search
logger.info("Testing search...")
query = "Python programming"
query_embedding = embedding_gen.generate_embeddings(pd.Series([query]))[0]
results = vector_store.search_courses(query_embedding)
assert len(results) > 0
logger.info("Search successful!")
return True
except Exception as e:
logger.error(f"Pipeline test failed: {str(e)}")
return False
if __name__ == "__main__":
test_pipeline()