File size: 2,676 Bytes
fe87ae1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6443dbc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

import os
import gradio as gr
import json
import os

# get the root path of the project
current_file_path = os.path.dirname(os.path.abspath(__file__))
root_path = os.path.abspath(current_file_path)

class RepoSearch:
    def __init__(self):

        # db_path = os.path.join(root_path, "database", "faiss_index")
        db_path = root_path
        embeddings = OpenAIEmbeddings(api_key="sk-Mo5K9m2hKXjV1DeGeBAIzXLZFxxiOTvSwUoemKmfMXdmE9Bs", 
                              base_url="https://api.wlai.vip/v1",
                              model="text-embedding-3-small")
        
        assert os.path.exists(db_path), f"Database not found: {db_path}"
        self.vector_db = FAISS.load_local(db_path, embeddings, 
                                          allow_dangerous_deserialization=True)
        
    def search(self, query, k=10):
        '''
            name + description + html_url + topics
        '''
        results = self.vector_db.similarity_search(query + " technology", k=k)

        simple_str = ""
        for i, doc in enumerate(results):
            content = json.loads(doc.page_content)
            if content["description"] is None:
                content["description"] = ""
            desc = content["description"] if len(content["description"]) < 300 else content["description"][:300] + "..."
            simple_str += f"\t**{i+1}. {content['name']}** || **Description:** {desc} || **Url:** {content['html_url']} \n"

        return simple_str


def main():
    search = RepoSearch()

    def respond(
        prompt: str,
        history,
    ):
        if not history:
            history = [{"role": "system", "content": "You are a friendly chatbot"}]
        history.append({"role": "user", "content": prompt})

        yield history

        response = {"role": "assistant", "content": ""}
        response["content"] = search.search(prompt)
        yield history + [response]

    with gr.Blocks() as demo:
        gr.Markdown("## Semantic github search (基于语义的 github 仓库搜索) 🌐")
        chatbot = gr.Chatbot(
            label="Agent",
            type="messages",
            avatar_images=(
                None,
                "https://img1.baidu.com/it/u=2193901176,1740242983&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=500",
            ),
            height="65vh"
        )
        prompt = gr.Textbox(max_lines=2, label="Chat Message")
        prompt.submit(respond, [prompt, chatbot], [chatbot])
        prompt.submit(lambda: "", None, [prompt])

    demo.launch(share=True)


if __name__ == "__main__":
    main()