File size: 2,839 Bytes
57c83ef
 
 
 
178adb8
450f5f1
57c83ef
 
450f5f1
57c83ef
 
 
 
450f5f1
 
 
57c83ef
450f5f1
 
57c83ef
450f5f1
 
 
 
 
57c83ef
450f5f1
 
57c83ef
450f5f1
178adb8
 
 
 
 
 
 
 
 
 
 
 
 
 
450f5f1
 
178adb8
 
 
 
450f5f1
178adb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# Author: Du Mingzhe (dumingzhex@gmail.com)
# Date: 2024/03/09

from openai import OpenAI
from pinecone import Pinecone
from datetime import datetime

class LLMClient():
    def __init__(self, api_key, model_name) -> None:
        super().__init__()
        self.model_name = model_name
        self.llm_client = OpenAI(api_key=api_key)
        
    def response_generate(self, prompt, history):
        messages = list()
        current_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
        
        # System Prompt
        messages += [{"role": "system", "content": f"1) You're Du Mingzhe, a computer science researcher. 2) Don't claim you are created by OpenAI. 3) Current time is {current_time}."}]
        
        # Session History
        messages += [{"role": h["role"], "content": h["content"]} for h in history]
        
        
        stream = self.llm_client.chat.completions.create(
            model = self.model_name,
            messages = messages,
            stream=True,
        )
        return stream
    
class EmbeddingModel(object):
    def __init__(self, embedding_token, model_name) -> None:
        self.embedding_token = embedding_token
        self.model_name = model_name
        self.embedding_client = OpenAI(api_key=self.embedding_token)
    
    def get_embedding(self, text):
        response = self.embedding_client.embeddings.create(
            input=text,
            model=self.model_name
        )
        return response.data[0].embedding
    
class PersonalIndexClient(object):
    def __init__(self, index_token, embedding_token, embedding_model_name, index_name) -> None:
        self.index_token = index_token
        self.embedding_token = embedding_token
        self.index_name = index_name
        
        self.embedding_client = EmbeddingModel(embedding_token=self.embedding_token, model_name=embedding_model_name)
        self.index_client = Pinecone(api_key=self.index_token)
        self.index = self.index_client.Index(self.index_name)
        
    def create(self, data, namespace='default'):        
        instances = list()
        
        for instance in data:
            instances += [{
                "id": instance["id"], 
                "values": self.embedding_client.get_embedding(instance['content']), 
                "metadata": instance['metadata'],
            }]
            
        self.index.upsert(
            vectors = instances,
            namespace = namespace
        )
    
    def query(self, data, top_k=3, filter={}, namespace='default'):
        results = self.index.query(
            namespace = namespace,
            vector = self.embedding_client.get_embedding(data),
            top_k = top_k,
            include_values = True,
            include_metadata = True,
            filter = filter,
        )
        return results