Slach ysharma HF staff commited on
Commit
2164147
·
0 Parent(s):

Duplicate from ysharma/LangchainBot-space-creator

Browse files

Co-authored-by: yuvraj sharma <ysharma@users.noreply.huggingface.co>

Files changed (6) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +173 -0
  4. requirements.txt +5 -0
  5. template/app_og.py +80 -0
  6. template/requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: LangchainBot space creator
3
+ emoji: 🌌🔨
4
+ colorFrom: red
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.10.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: ysharma/LangchainBot-space-creator
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.llms import OpenAI
2
+ from langchain.chains.qa_with_sources import load_qa_with_sources_chain
3
+ from langchain.docstore.document import Document
4
+ import requests
5
+ import pathlib
6
+ import subprocess
7
+ import tempfile
8
+ import os
9
+ import gradio as gr
10
+ import pickle
11
+ from huggingface_hub import HfApi, upload_folder
12
+ from huggingface_hub import whoami, list_models
13
+
14
+ # using a vector space for our search
15
+ from langchain.embeddings.openai import OpenAIEmbeddings
16
+ from langchain.vectorstores.faiss import FAISS
17
+ from langchain.text_splitter import CharacterTextSplitter
18
+
19
+
20
+ #Code for extracting the markdown fies from a Repo
21
+ #To get markdowns from github for any/your repo
22
+ def get_github_docs(repo_link):
23
+ repo_owner, repo_name = repo_link.split('/')[-2], repo_link.split('/')[-1]
24
+
25
+ with tempfile.TemporaryDirectory() as d:
26
+ subprocess.check_call(
27
+ f"git clone https://github.com/{repo_owner}/{repo_name}.git .",
28
+ cwd=d,
29
+ shell=True,
30
+ )
31
+ git_sha = (
32
+ subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)
33
+ .decode("utf-8")
34
+ .strip()
35
+ )
36
+ repo_path = pathlib.Path(d)
37
+ markdown_files = list(repo_path.rglob("*.md")) + list(
38
+ repo_path.rglob("*.mdx")
39
+ )
40
+ for markdown_file in markdown_files:
41
+ try:
42
+ with open(markdown_file, "r") as f:
43
+ relative_path = markdown_file.relative_to(repo_path)
44
+ github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}"
45
+ yield Document(page_content=f.read(), metadata={"source": github_url})
46
+ except FileNotFoundError:
47
+ print(f"Could not open file: {markdown_file}")
48
+
49
+ #Code for creating a new space for the user
50
+ def create_space(repo_link, hf_token):
51
+ repo_name = repo_link.split('/')[-1]
52
+ api = HfApi(token=hf_token)
53
+ repo_url = api.create_repo(
54
+ repo_id=f'LangChain_{repo_name}Bot', #example - ysharma/LangChain_GradioBot
55
+ exist_ok = True,
56
+ repo_type="space",
57
+ space_sdk="gradio",
58
+ private=False)
59
+
60
+ #Code for creating the search index
61
+ #Saving search index to disk
62
+ def create_search_index(repo_link, openai_api_key):
63
+ sources = get_github_docs(repo_link)
64
+ source_chunks = []
65
+ splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0)
66
+ for source in sources:
67
+ for chunk in splitter.split_text(source.page_content):
68
+ source_chunks.append(Document(page_content=chunk, metadata=source.metadata))
69
+
70
+ search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings(openai_api_key=openai_api_key))
71
+
72
+ #saving FAISS search index to disk
73
+ with open("search_index.pickle", "wb") as f:
74
+ pickle.dump(search_index, f)
75
+ return "search_index.pickle"
76
+
77
+ def upload_files_to_space(repo_link, hf_token):
78
+ repo_name = repo_link.split('/')[-1]
79
+ api = HfApi(token=hf_token)
80
+ user_name = whoami(token=hf_token)['name']
81
+
82
+ #Replacing the repo namein app.py
83
+ with open("template/app_og.py", "r") as f:
84
+ app = f.read()
85
+ app = app.replace("$RepoName", repo_name)
86
+
87
+ #Saving the new app.py file to disk
88
+ with open("template/app.py", "w") as f:
89
+ f.write(app)
90
+
91
+ #Uploading the new app.py to the new space
92
+ api.upload_file(
93
+ path_or_fileobj = "template/app.py",
94
+ path_in_repo = "app.py",
95
+ repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id,
96
+ token = hf_token,
97
+ repo_type="space",)
98
+ #Uploading the new search_index file to the new space
99
+ api.upload_file(
100
+ path_or_fileobj = "search_index.pickle",
101
+ path_in_repo = "search_index.pickle",
102
+ repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id,
103
+ token = hf_token,
104
+ repo_type="space",)
105
+ #Upload requirements.txt to the space
106
+ api.upload_file(
107
+ path_or_fileobj="template/requirements.txt",
108
+ path_in_repo="requirements.txt",
109
+ repo_id=f'{user_name}/LangChain_{repo_name}Bot', #model_id,
110
+ token=hf_token,
111
+ repo_type="space",)
112
+ #Deleting the files - search_index and app.py file
113
+ os.remove("template/app.py")
114
+ os.remove("search_index.pickle")
115
+
116
+ repo_url = f"https://huggingface.co/spaces/{user_name}/LangChain_{repo_name}Bot"
117
+ space_name = f"{user_name}/LangChain_{repo_name}Bot"
118
+ return "<p style='color: orange; text-align: center; font-size: 24px; background-color: lightgray;'>🎉Congratulations🎉 Chatbot created successfully! Access it here : <a href="+ repo_url + " target='_blank'>" + space_name + "</a></p>"
119
+
120
+
121
+ def driver(repo_link, hf_token):
122
+ #create search index openai_api_key=openai_api_key
123
+ #search_index_pickle = create_search_index(repo_link, openai_api_key)
124
+ #create a new space
125
+ create_space(repo_link, hf_token)
126
+ #upload files to the new space
127
+ html_tag = upload_files_to_space(repo_link, hf_token)
128
+ print(f"html tag is : {html_tag}")
129
+ return html_tag
130
+
131
+ def set_state():
132
+ return gr.update(visible=True), gr.update(visible=True)
133
+
134
+ #Gradio code for Repo as input and search index as output file
135
+ with gr.Blocks() as demo:
136
+ gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
137
+ <div
138
+ style="
139
+ display: inline-flex;
140
+ align-items: center;
141
+ gap: 0.8rem;
142
+ font-size: 1.75rem;
143
+ "
144
+ >
145
+ <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
146
+ QandA Chatbot Creator for Github Repos - Automation done using LangChain, Gradio, and Spaces
147
+ </h1>
148
+ </div>
149
+ <p style="margin-bottom: 10px; font-size: 94%">
150
+ Generate a top-notch <b>Q&A Chatbot</b> for your Github Repo, using <a href="https://langchain.readthedocs.io/en/latest/" target="_blank">LangChain</a> and <a href="https://github.com/gradio-app/gradio" target="_blank">Gradio</a>.
151
+ Paste your Github repository link, enter your OpenAI API key, and the app will create a FAISS embedding vector space for you.
152
+ Next, input your Huggingface Token and press the final button.<br><br>
153
+ Your new chatbot will be ready under your Huggingface profile, accessible via the displayed link.
154
+ <center><a href="https://huggingface.co/spaces/ysharma/LangchainBot-space-creator?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></center>
155
+ </p>
156
+ </div>""")
157
+ with gr.Row() :
158
+ with gr.Column():
159
+ repo_link = gr.Textbox(label="Enter Github repo name")
160
+ openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here")
161
+ btn_faiss = gr.Button("Create Search index")
162
+ search_index_file = gr.File(label= 'Search index vector')
163
+ with gr.Row():
164
+ hf_token_in = gr.Textbox(type='password', label="Enter hf-token name", visible=False)
165
+ btn_create_space = gr.Button("Create Your Chatbot", visible=False)
166
+ html_out = gr.HTML()
167
+
168
+ btn_faiss.click(create_search_index, [repo_link, openai_api_key],search_index_file )
169
+ btn_faiss.click(fn=set_state, inputs=[] , outputs=[hf_token_in, btn_create_space])
170
+ btn_create_space.click(driver, [repo_link, hf_token_in], html_out)
171
+
172
+ demo.queue()
173
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain==0.0.55
2
+ requests
3
+ openai
4
+ transformers
5
+ faiss-cpu
template/app_og.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.llms import OpenAI
2
+ from langchain.chains.qa_with_sources import load_qa_with_sources_chain
3
+ from langchain.docstore.document import Document
4
+ import requests
5
+ import pathlib
6
+ import subprocess
7
+ import tempfile
8
+ import os
9
+ import gradio as gr
10
+ import pickle
11
+
12
+ # using a vector space for our search
13
+ from langchain.embeddings.openai import OpenAIEmbeddings
14
+ from langchain.vectorstores.faiss import FAISS
15
+ from langchain.text_splitter import CharacterTextSplitter
16
+
17
+ #loading FAISS search index from disk
18
+ with open("search_index.pickle", "rb") as f:
19
+ search_index = pickle.load(f)
20
+
21
+ #Get GPT3 response using Langchain
22
+ def print_answer(question, openai): #openai_embeddings
23
+ #search_index = get_search_index()
24
+ chain = load_qa_with_sources_chain(openai) #(OpenAI(temperature=0))
25
+ response = (
26
+ chain(
27
+ {
28
+ "input_documents": search_index.similarity_search(question, k=4),
29
+ "question": question,
30
+ },
31
+ return_only_outputs=True,
32
+ )["output_text"]
33
+ )
34
+ if len(response.split('\n')[-1].split())>2:
35
+ response = response.split('\n')[0] + ', '.join([' <a href="' + response.split('\n')[-1].split()[i] + '" target="_blank"><u>Click Link' + str(i) + '</u></a>' for i in range(1,len(response.split('\n')[-1].split()))])
36
+ else:
37
+ response = response.split('\n')[0] + ' <a href="' + response.split('\n')[-1].split()[-1] + '" target="_blank"><u>Click Link</u></a>'
38
+ return response
39
+
40
+
41
+ def chat(message, history, openai_api_key):
42
+ #openai_embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
43
+ openai = OpenAI(temperature=0, openai_api_key=openai_api_key )
44
+ #os.environ["OPENAI_API_KEY"] = openai_api_key
45
+ history = history or []
46
+ message = message.lower()
47
+ response = print_answer(message, openai) #openai_embeddings
48
+ history.append((message, response))
49
+ return history, history
50
+
51
+
52
+ with gr.Blocks() as demo:
53
+ gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
54
+ <div
55
+ style="
56
+ display: inline-flex;
57
+ align-items: center;
58
+ gap: 0.8rem;
59
+ font-size: 1.75rem;
60
+ "
61
+ >
62
+ <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
63
+ $RepoName QandA - LangChain Bot
64
+ </h1>
65
+ </div>
66
+ <p style="margin-bottom: 10px; font-size: 94%">
67
+ Hi, I'm a Q and A $RepoName expert bot, start by typing in your OpenAI API key, questions/issues you are facing in your $RepoName implementations and then press enter.<br>
68
+ <a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate Space with GPU Upgrade for fast Inference & no queue<br>
69
+ Built using <a href="https://langchain.readthedocs.io/en/latest/" target="_blank">LangChain</a> and <a href="https://github.com/gradio-app/gradio" target="_blank">Gradio</a> for the $RepoName Repo
70
+ </p>
71
+ </div>""")
72
+ with gr.Row():
73
+ question = gr.Textbox(label = 'Type in your questions about $RepoName here and press Enter!', placeholder = 'What questions do you want to ask about the $RepoName library?')
74
+ openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here")
75
+ state = gr.State()
76
+ chatbot = gr.Chatbot()
77
+ question.submit(chat, [question, state, openai_api_key], [chatbot, state])
78
+
79
+ if __name__ == "__main__":
80
+ demo.launch()
template/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain==0.0.55
2
+ requests
3
+ openai
4
+ transformers
5
+ huggingface_hub
6
+ faiss-cpu