Spaces:
Runtime error
Runtime error
Duplicate from ysharma/LangchainBot-space-creator
Browse filesCo-authored-by: yuvraj sharma <ysharma@users.noreply.huggingface.co>
- .gitattributes +34 -0
- README.md +14 -0
- app.py +173 -0
- requirements.txt +5 -0
- template/app_og.py +80 -0
- template/requirements.txt +6 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: LangchainBot space creator
|
3 |
+
emoji: 🌌🔨
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.10.1
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
duplicated_from: ysharma/LangchainBot-space-creator
|
12 |
+
---
|
13 |
+
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.llms import OpenAI
|
2 |
+
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
3 |
+
from langchain.docstore.document import Document
|
4 |
+
import requests
|
5 |
+
import pathlib
|
6 |
+
import subprocess
|
7 |
+
import tempfile
|
8 |
+
import os
|
9 |
+
import gradio as gr
|
10 |
+
import pickle
|
11 |
+
from huggingface_hub import HfApi, upload_folder
|
12 |
+
from huggingface_hub import whoami, list_models
|
13 |
+
|
14 |
+
# using a vector space for our search
|
15 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
16 |
+
from langchain.vectorstores.faiss import FAISS
|
17 |
+
from langchain.text_splitter import CharacterTextSplitter
|
18 |
+
|
19 |
+
|
20 |
+
#Code for extracting the markdown fies from a Repo
|
21 |
+
#To get markdowns from github for any/your repo
|
22 |
+
def get_github_docs(repo_link):
|
23 |
+
repo_owner, repo_name = repo_link.split('/')[-2], repo_link.split('/')[-1]
|
24 |
+
|
25 |
+
with tempfile.TemporaryDirectory() as d:
|
26 |
+
subprocess.check_call(
|
27 |
+
f"git clone https://github.com/{repo_owner}/{repo_name}.git .",
|
28 |
+
cwd=d,
|
29 |
+
shell=True,
|
30 |
+
)
|
31 |
+
git_sha = (
|
32 |
+
subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)
|
33 |
+
.decode("utf-8")
|
34 |
+
.strip()
|
35 |
+
)
|
36 |
+
repo_path = pathlib.Path(d)
|
37 |
+
markdown_files = list(repo_path.rglob("*.md")) + list(
|
38 |
+
repo_path.rglob("*.mdx")
|
39 |
+
)
|
40 |
+
for markdown_file in markdown_files:
|
41 |
+
try:
|
42 |
+
with open(markdown_file, "r") as f:
|
43 |
+
relative_path = markdown_file.relative_to(repo_path)
|
44 |
+
github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}"
|
45 |
+
yield Document(page_content=f.read(), metadata={"source": github_url})
|
46 |
+
except FileNotFoundError:
|
47 |
+
print(f"Could not open file: {markdown_file}")
|
48 |
+
|
49 |
+
#Code for creating a new space for the user
|
50 |
+
def create_space(repo_link, hf_token):
|
51 |
+
repo_name = repo_link.split('/')[-1]
|
52 |
+
api = HfApi(token=hf_token)
|
53 |
+
repo_url = api.create_repo(
|
54 |
+
repo_id=f'LangChain_{repo_name}Bot', #example - ysharma/LangChain_GradioBot
|
55 |
+
exist_ok = True,
|
56 |
+
repo_type="space",
|
57 |
+
space_sdk="gradio",
|
58 |
+
private=False)
|
59 |
+
|
60 |
+
#Code for creating the search index
|
61 |
+
#Saving search index to disk
|
62 |
+
def create_search_index(repo_link, openai_api_key):
|
63 |
+
sources = get_github_docs(repo_link)
|
64 |
+
source_chunks = []
|
65 |
+
splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0)
|
66 |
+
for source in sources:
|
67 |
+
for chunk in splitter.split_text(source.page_content):
|
68 |
+
source_chunks.append(Document(page_content=chunk, metadata=source.metadata))
|
69 |
+
|
70 |
+
search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings(openai_api_key=openai_api_key))
|
71 |
+
|
72 |
+
#saving FAISS search index to disk
|
73 |
+
with open("search_index.pickle", "wb") as f:
|
74 |
+
pickle.dump(search_index, f)
|
75 |
+
return "search_index.pickle"
|
76 |
+
|
77 |
+
def upload_files_to_space(repo_link, hf_token):
|
78 |
+
repo_name = repo_link.split('/')[-1]
|
79 |
+
api = HfApi(token=hf_token)
|
80 |
+
user_name = whoami(token=hf_token)['name']
|
81 |
+
|
82 |
+
#Replacing the repo namein app.py
|
83 |
+
with open("template/app_og.py", "r") as f:
|
84 |
+
app = f.read()
|
85 |
+
app = app.replace("$RepoName", repo_name)
|
86 |
+
|
87 |
+
#Saving the new app.py file to disk
|
88 |
+
with open("template/app.py", "w") as f:
|
89 |
+
f.write(app)
|
90 |
+
|
91 |
+
#Uploading the new app.py to the new space
|
92 |
+
api.upload_file(
|
93 |
+
path_or_fileobj = "template/app.py",
|
94 |
+
path_in_repo = "app.py",
|
95 |
+
repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id,
|
96 |
+
token = hf_token,
|
97 |
+
repo_type="space",)
|
98 |
+
#Uploading the new search_index file to the new space
|
99 |
+
api.upload_file(
|
100 |
+
path_or_fileobj = "search_index.pickle",
|
101 |
+
path_in_repo = "search_index.pickle",
|
102 |
+
repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id,
|
103 |
+
token = hf_token,
|
104 |
+
repo_type="space",)
|
105 |
+
#Upload requirements.txt to the space
|
106 |
+
api.upload_file(
|
107 |
+
path_or_fileobj="template/requirements.txt",
|
108 |
+
path_in_repo="requirements.txt",
|
109 |
+
repo_id=f'{user_name}/LangChain_{repo_name}Bot', #model_id,
|
110 |
+
token=hf_token,
|
111 |
+
repo_type="space",)
|
112 |
+
#Deleting the files - search_index and app.py file
|
113 |
+
os.remove("template/app.py")
|
114 |
+
os.remove("search_index.pickle")
|
115 |
+
|
116 |
+
repo_url = f"https://huggingface.co/spaces/{user_name}/LangChain_{repo_name}Bot"
|
117 |
+
space_name = f"{user_name}/LangChain_{repo_name}Bot"
|
118 |
+
return "<p style='color: orange; text-align: center; font-size: 24px; background-color: lightgray;'>🎉Congratulations🎉 Chatbot created successfully! Access it here : <a href="+ repo_url + " target='_blank'>" + space_name + "</a></p>"
|
119 |
+
|
120 |
+
|
121 |
+
def driver(repo_link, hf_token):
|
122 |
+
#create search index openai_api_key=openai_api_key
|
123 |
+
#search_index_pickle = create_search_index(repo_link, openai_api_key)
|
124 |
+
#create a new space
|
125 |
+
create_space(repo_link, hf_token)
|
126 |
+
#upload files to the new space
|
127 |
+
html_tag = upload_files_to_space(repo_link, hf_token)
|
128 |
+
print(f"html tag is : {html_tag}")
|
129 |
+
return html_tag
|
130 |
+
|
131 |
+
def set_state():
|
132 |
+
return gr.update(visible=True), gr.update(visible=True)
|
133 |
+
|
134 |
+
#Gradio code for Repo as input and search index as output file
|
135 |
+
with gr.Blocks() as demo:
|
136 |
+
gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
|
137 |
+
<div
|
138 |
+
style="
|
139 |
+
display: inline-flex;
|
140 |
+
align-items: center;
|
141 |
+
gap: 0.8rem;
|
142 |
+
font-size: 1.75rem;
|
143 |
+
"
|
144 |
+
>
|
145 |
+
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
|
146 |
+
QandA Chatbot Creator for Github Repos - Automation done using LangChain, Gradio, and Spaces
|
147 |
+
</h1>
|
148 |
+
</div>
|
149 |
+
<p style="margin-bottom: 10px; font-size: 94%">
|
150 |
+
Generate a top-notch <b>Q&A Chatbot</b> for your Github Repo, using <a href="https://langchain.readthedocs.io/en/latest/" target="_blank">LangChain</a> and <a href="https://github.com/gradio-app/gradio" target="_blank">Gradio</a>.
|
151 |
+
Paste your Github repository link, enter your OpenAI API key, and the app will create a FAISS embedding vector space for you.
|
152 |
+
Next, input your Huggingface Token and press the final button.<br><br>
|
153 |
+
Your new chatbot will be ready under your Huggingface profile, accessible via the displayed link.
|
154 |
+
<center><a href="https://huggingface.co/spaces/ysharma/LangchainBot-space-creator?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></center>
|
155 |
+
</p>
|
156 |
+
</div>""")
|
157 |
+
with gr.Row() :
|
158 |
+
with gr.Column():
|
159 |
+
repo_link = gr.Textbox(label="Enter Github repo name")
|
160 |
+
openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here")
|
161 |
+
btn_faiss = gr.Button("Create Search index")
|
162 |
+
search_index_file = gr.File(label= 'Search index vector')
|
163 |
+
with gr.Row():
|
164 |
+
hf_token_in = gr.Textbox(type='password', label="Enter hf-token name", visible=False)
|
165 |
+
btn_create_space = gr.Button("Create Your Chatbot", visible=False)
|
166 |
+
html_out = gr.HTML()
|
167 |
+
|
168 |
+
btn_faiss.click(create_search_index, [repo_link, openai_api_key],search_index_file )
|
169 |
+
btn_faiss.click(fn=set_state, inputs=[] , outputs=[hf_token_in, btn_create_space])
|
170 |
+
btn_create_space.click(driver, [repo_link, hf_token_in], html_out)
|
171 |
+
|
172 |
+
demo.queue()
|
173 |
+
demo.launch(debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain==0.0.55
|
2 |
+
requests
|
3 |
+
openai
|
4 |
+
transformers
|
5 |
+
faiss-cpu
|
template/app_og.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.llms import OpenAI
|
2 |
+
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
3 |
+
from langchain.docstore.document import Document
|
4 |
+
import requests
|
5 |
+
import pathlib
|
6 |
+
import subprocess
|
7 |
+
import tempfile
|
8 |
+
import os
|
9 |
+
import gradio as gr
|
10 |
+
import pickle
|
11 |
+
|
12 |
+
# using a vector space for our search
|
13 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
14 |
+
from langchain.vectorstores.faiss import FAISS
|
15 |
+
from langchain.text_splitter import CharacterTextSplitter
|
16 |
+
|
17 |
+
#loading FAISS search index from disk
|
18 |
+
with open("search_index.pickle", "rb") as f:
|
19 |
+
search_index = pickle.load(f)
|
20 |
+
|
21 |
+
#Get GPT3 response using Langchain
|
22 |
+
def print_answer(question, openai): #openai_embeddings
|
23 |
+
#search_index = get_search_index()
|
24 |
+
chain = load_qa_with_sources_chain(openai) #(OpenAI(temperature=0))
|
25 |
+
response = (
|
26 |
+
chain(
|
27 |
+
{
|
28 |
+
"input_documents": search_index.similarity_search(question, k=4),
|
29 |
+
"question": question,
|
30 |
+
},
|
31 |
+
return_only_outputs=True,
|
32 |
+
)["output_text"]
|
33 |
+
)
|
34 |
+
if len(response.split('\n')[-1].split())>2:
|
35 |
+
response = response.split('\n')[0] + ', '.join([' <a href="' + response.split('\n')[-1].split()[i] + '" target="_blank"><u>Click Link' + str(i) + '</u></a>' for i in range(1,len(response.split('\n')[-1].split()))])
|
36 |
+
else:
|
37 |
+
response = response.split('\n')[0] + ' <a href="' + response.split('\n')[-1].split()[-1] + '" target="_blank"><u>Click Link</u></a>'
|
38 |
+
return response
|
39 |
+
|
40 |
+
|
41 |
+
def chat(message, history, openai_api_key):
|
42 |
+
#openai_embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
43 |
+
openai = OpenAI(temperature=0, openai_api_key=openai_api_key )
|
44 |
+
#os.environ["OPENAI_API_KEY"] = openai_api_key
|
45 |
+
history = history or []
|
46 |
+
message = message.lower()
|
47 |
+
response = print_answer(message, openai) #openai_embeddings
|
48 |
+
history.append((message, response))
|
49 |
+
return history, history
|
50 |
+
|
51 |
+
|
52 |
+
with gr.Blocks() as demo:
|
53 |
+
gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
|
54 |
+
<div
|
55 |
+
style="
|
56 |
+
display: inline-flex;
|
57 |
+
align-items: center;
|
58 |
+
gap: 0.8rem;
|
59 |
+
font-size: 1.75rem;
|
60 |
+
"
|
61 |
+
>
|
62 |
+
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
|
63 |
+
$RepoName QandA - LangChain Bot
|
64 |
+
</h1>
|
65 |
+
</div>
|
66 |
+
<p style="margin-bottom: 10px; font-size: 94%">
|
67 |
+
Hi, I'm a Q and A $RepoName expert bot, start by typing in your OpenAI API key, questions/issues you are facing in your $RepoName implementations and then press enter.<br>
|
68 |
+
<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate Space with GPU Upgrade for fast Inference & no queue<br>
|
69 |
+
Built using <a href="https://langchain.readthedocs.io/en/latest/" target="_blank">LangChain</a> and <a href="https://github.com/gradio-app/gradio" target="_blank">Gradio</a> for the $RepoName Repo
|
70 |
+
</p>
|
71 |
+
</div>""")
|
72 |
+
with gr.Row():
|
73 |
+
question = gr.Textbox(label = 'Type in your questions about $RepoName here and press Enter!', placeholder = 'What questions do you want to ask about the $RepoName library?')
|
74 |
+
openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here")
|
75 |
+
state = gr.State()
|
76 |
+
chatbot = gr.Chatbot()
|
77 |
+
question.submit(chat, [question, state, openai_api_key], [chatbot, state])
|
78 |
+
|
79 |
+
if __name__ == "__main__":
|
80 |
+
demo.launch()
|
template/requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain==0.0.55
|
2 |
+
requests
|
3 |
+
openai
|
4 |
+
transformers
|
5 |
+
huggingface_hub
|
6 |
+
faiss-cpu
|