File size: 1,597 Bytes
2ed2129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import logging
from course_search.search_system.embeddings import EmbeddingGenerator
from course_search.search_system.vector_store import FAISSManager
import pandas as pd
import numpy as np

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def test_pipeline():
    try:
        # Test data
        test_data = pd.DataFrame({
            'title': ['Test Course 1', 'Test Course 2'],
            'description': ['This is a test course about Python', 'This is a test course about ML'],
            'url': ['http://test1.com', 'http://test2.com']
        })

        # Test embedding generation
        logger.info("Testing embedding generation...")
        embedding_gen = EmbeddingGenerator()
        test_data = embedding_gen.add_embeddings_to_df(test_data)
        assert 'embeddings' in test_data.columns
        logger.info("Embedding generation successful!")

        # Test FAISS storage
        logger.info("Testing FAISS storage...")
        vector_store = FAISSManager()
        vector_store.upsert_courses(test_data)
        logger.info("FAISS storage successful!")

        # Test search
        logger.info("Testing search...")
        query = "Python programming"
        query_embedding = embedding_gen.generate_embeddings(pd.Series([query]))[0]
        results = vector_store.search_courses(query_embedding)
        assert len(results) > 0
        logger.info("Search successful!")

        return True

    except Exception as e:
        logger.error(f"Pipeline test failed: {str(e)}")
        return False

if __name__ == "__main__":
    test_pipeline()