Spaces:

realrohilbansal
/

course-search-av

Sleeping

course-search-av / tests /test_complete_pipeline.py

Rohil Bansal

commit

2ed2129 4 months ago

1.6 kB

	import logging
	from course_search.search_system.embeddings import EmbeddingGenerator
	from course_search.search_system.vector_store import FAISSManager
	import pandas as pd
	import numpy as np

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def test_pipeline():
	try:
	# Test data
	test_data = pd.DataFrame({
	'title': ['Test Course 1', 'Test Course 2'],
	'description': ['This is a test course about Python', 'This is a test course about ML'],
	'url': ['http://test1.com', 'http://test2.com']
	})

	# Test embedding generation
	logger.info("Testing embedding generation...")
	embedding_gen = EmbeddingGenerator()
	test_data = embedding_gen.add_embeddings_to_df(test_data)
	assert 'embeddings' in test_data.columns
	logger.info("Embedding generation successful!")

	# Test FAISS storage
	logger.info("Testing FAISS storage...")
	vector_store = FAISSManager()
	vector_store.upsert_courses(test_data)
	logger.info("FAISS storage successful!")

	# Test search
	logger.info("Testing search...")
	query = "Python programming"
	query_embedding = embedding_gen.generate_embeddings(pd.Series([query]))[0]
	results = vector_store.search_courses(query_embedding)
	assert len(results) > 0
	logger.info("Search successful!")

	return True

	except Exception as e:
	logger.error(f"Pipeline test failed: {str(e)}")
	return False

	if __name__ == "__main__":
	test_pipeline()