Slach's picture
Duplicate from ysharma/LangchainBot-space-creator
2164147
from langchain.llms import OpenAI
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.docstore.document import Document
import requests
import pathlib
import subprocess
import tempfile
import os
import gradio as gr
import pickle
from huggingface_hub import HfApi, upload_folder
from huggingface_hub import whoami, list_models
# using a vector space for our search
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.faiss import FAISS
from langchain.text_splitter import CharacterTextSplitter
#Code for extracting the markdown fies from a Repo
#To get markdowns from github for any/your repo
def get_github_docs(repo_link):
repo_owner, repo_name = repo_link.split('/')[-2], repo_link.split('/')[-1]
with tempfile.TemporaryDirectory() as d:
subprocess.check_call(
f"git clone https://github.com/{repo_owner}/{repo_name}.git .",
cwd=d,
shell=True,
)
git_sha = (
subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)
.decode("utf-8")
.strip()
)
repo_path = pathlib.Path(d)
markdown_files = list(repo_path.rglob("*.md")) + list(
repo_path.rglob("*.mdx")
)
for markdown_file in markdown_files:
try:
with open(markdown_file, "r") as f:
relative_path = markdown_file.relative_to(repo_path)
github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}"
yield Document(page_content=f.read(), metadata={"source": github_url})
except FileNotFoundError:
print(f"Could not open file: {markdown_file}")
#Code for creating a new space for the user
def create_space(repo_link, hf_token):
repo_name = repo_link.split('/')[-1]
api = HfApi(token=hf_token)
repo_url = api.create_repo(
repo_id=f'LangChain_{repo_name}Bot', #example - ysharma/LangChain_GradioBot
exist_ok = True,
repo_type="space",
space_sdk="gradio",
private=False)
#Code for creating the search index
#Saving search index to disk
def create_search_index(repo_link, openai_api_key):
sources = get_github_docs(repo_link)
source_chunks = []
splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0)
for source in sources:
for chunk in splitter.split_text(source.page_content):
source_chunks.append(Document(page_content=chunk, metadata=source.metadata))
search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings(openai_api_key=openai_api_key))
#saving FAISS search index to disk
with open("search_index.pickle", "wb") as f:
pickle.dump(search_index, f)
return "search_index.pickle"
def upload_files_to_space(repo_link, hf_token):
repo_name = repo_link.split('/')[-1]
api = HfApi(token=hf_token)
user_name = whoami(token=hf_token)['name']
#Replacing the repo namein app.py
with open("template/app_og.py", "r") as f:
app = f.read()
app = app.replace("$RepoName", repo_name)
#Saving the new app.py file to disk
with open("template/app.py", "w") as f:
f.write(app)
#Uploading the new app.py to the new space
api.upload_file(
path_or_fileobj = "template/app.py",
path_in_repo = "app.py",
repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id,
token = hf_token,
repo_type="space",)
#Uploading the new search_index file to the new space
api.upload_file(
path_or_fileobj = "search_index.pickle",
path_in_repo = "search_index.pickle",
repo_id = f'{user_name}/LangChain_{repo_name}Bot', #model_id,
token = hf_token,
repo_type="space",)
#Upload requirements.txt to the space
api.upload_file(
path_or_fileobj="template/requirements.txt",
path_in_repo="requirements.txt",
repo_id=f'{user_name}/LangChain_{repo_name}Bot', #model_id,
token=hf_token,
repo_type="space",)
#Deleting the files - search_index and app.py file
os.remove("template/app.py")
os.remove("search_index.pickle")
repo_url = f"https://huggingface.co/spaces/{user_name}/LangChain_{repo_name}Bot"
space_name = f"{user_name}/LangChain_{repo_name}Bot"
return "<p style='color: orange; text-align: center; font-size: 24px; background-color: lightgray;'>🎉Congratulations🎉 Chatbot created successfully! Access it here : <a href="+ repo_url + " target='_blank'>" + space_name + "</a></p>"
def driver(repo_link, hf_token):
#create search index openai_api_key=openai_api_key
#search_index_pickle = create_search_index(repo_link, openai_api_key)
#create a new space
create_space(repo_link, hf_token)
#upload files to the new space
html_tag = upload_files_to_space(repo_link, hf_token)
print(f"html tag is : {html_tag}")
return html_tag
def set_state():
return gr.update(visible=True), gr.update(visible=True)
#Gradio code for Repo as input and search index as output file
with gr.Blocks() as demo:
gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
QandA Chatbot Creator for Github Repos - Automation done using LangChain, Gradio, and Spaces
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Generate a top-notch <b>Q&A Chatbot</b> for your Github Repo, using <a href="https://langchain.readthedocs.io/en/latest/" target="_blank">LangChain</a> and <a href="https://github.com/gradio-app/gradio" target="_blank">Gradio</a>.
Paste your Github repository link, enter your OpenAI API key, and the app will create a FAISS embedding vector space for you.
Next, input your Huggingface Token and press the final button.<br><br>
Your new chatbot will be ready under your Huggingface profile, accessible via the displayed link.
<center><a href="https://huggingface.co/spaces/ysharma/LangchainBot-space-creator?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></center>
</p>
</div>""")
with gr.Row() :
with gr.Column():
repo_link = gr.Textbox(label="Enter Github repo name")
openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here")
btn_faiss = gr.Button("Create Search index")
search_index_file = gr.File(label= 'Search index vector')
with gr.Row():
hf_token_in = gr.Textbox(type='password', label="Enter hf-token name", visible=False)
btn_create_space = gr.Button("Create Your Chatbot", visible=False)
html_out = gr.HTML()
btn_faiss.click(create_search_index, [repo_link, openai_api_key],search_index_file )
btn_faiss.click(fn=set_state, inputs=[] , outputs=[hf_token_in, btn_create_space])
btn_create_space.click(driver, [repo_link, hf_token_in], html_out)
demo.queue()
demo.launch(debug=True)