import logging from course_search.search_system.embeddings import EmbeddingGenerator from course_search.search_system.vector_store import FAISSManager import pandas as pd import numpy as np logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def test_pipeline(): try: # Test data test_data = pd.DataFrame({ 'title': ['Test Course 1', 'Test Course 2'], 'description': ['This is a test course about Python', 'This is a test course about ML'], 'url': ['http://test1.com', 'http://test2.com'] }) # Test embedding generation logger.info("Testing embedding generation...") embedding_gen = EmbeddingGenerator() test_data = embedding_gen.add_embeddings_to_df(test_data) assert 'embeddings' in test_data.columns logger.info("Embedding generation successful!") # Test FAISS storage logger.info("Testing FAISS storage...") vector_store = FAISSManager() vector_store.upsert_courses(test_data) logger.info("FAISS storage successful!") # Test search logger.info("Testing search...") query = "Python programming" query_embedding = embedding_gen.generate_embeddings(pd.Series([query]))[0] results = vector_store.search_courses(query_embedding) assert len(results) > 0 logger.info("Search successful!") return True except Exception as e: logger.error(f"Pipeline test failed: {str(e)}") return False if __name__ == "__main__": test_pipeline()