Spaces:
Runtime error
Runtime error
from langchain.llms import OpenAI | |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain | |
from langchain.docstore.document import Document | |
import requests | |
import pathlib | |
import subprocess | |
import tempfile | |
import os | |
import gradio as gr | |
import pickle | |
from huggingface_hub import HfApi, upload_folder | |
from huggingface_hub import whoami, list_models | |
# using a vector space for our search | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores.faiss import FAISS | |
from langchain.text_splitter import CharacterTextSplitter | |
#Code for extracting the markdown fies from a Repo | |
#To get markdowns from github for any/your repo | |
def get_github_docs(repo_link): | |
repo_owner, repo_name = repo_link.split('/')[-2], repo_link.split('/')[-1] | |
with tempfile.TemporaryDirectory() as d: | |
subprocess.check_call( | |
f"git clone https://github.com/{repo_owner}/{repo_name}.git .", | |
cwd=d, | |
shell=True, | |
) | |
git_sha = ( | |
subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d) | |
.decode("utf-8") | |
.strip() | |
) | |
repo_path = pathlib.Path(d) | |
markdown_files = list(repo_path.rglob("*.md")) + list( | |
repo_path.rglob("*.mdx") | |
) | |
for markdown_file in markdown_files: | |
try: | |
with open(markdown_file, "r") as f: | |
relative_path = markdown_file.relative_to(repo_path) | |
github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}" | |
yield Document(page_content=f.read(), metadata={"source": github_url}) | |
except FileNotFoundError: | |
print(f"Could not open file: {markdown_file}") | |
#Code for creating a new space for the user | |
def create_space(repo_link, hf_token): | |
print("***********INSIDE CREATE SPACE***************") | |
repo_name = repo_link.split('/')[-1] | |
api = HfApi(token=hf_token) | |
repo_url = api.create_repo( | |
repo_id=f'LangChain_{repo_name}Bot', #example - ysharma/LangChain_GradioBot | |
repo_type="space", | |
space_sdk="gradio", | |
private=False) | |
#Code for creating the search index | |
#Saving search index to disk | |
def create_search_index(repo_link, openai_api_key): | |
print("***********INSIDE CREATE SEARCH INDEX***************") | |
#openai = OpenAI(temperature=0, openai_api_key=openai_api_key ) | |
sources = get_github_docs(repo_link) #"gradio-app", "gradio" | |
source_chunks = [] | |
splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0) | |
for source in sources: | |
for chunk in splitter.split_text(source.page_content): | |
source_chunks.append(Document(page_content=chunk, metadata=source.metadata)) | |
search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings(openai_api_key=openai_api_key)) | |
#saving FAISS search index to disk | |
with open("search_index.pickle", "wb") as f: | |
pickle.dump(search_index, f) | |
return "search_index.pickle" | |
def upload_files_to_space(repo_link, hf_token): | |
print("***********INSIDE UPLOAD FILES TO SPACE***************") | |
repo_name = repo_link.split('/')[-1] | |
#Replacing the repo namein app.py | |
with open("template/app_og.py", "r") as f: | |
app = f.read() | |
app = app.replace("$RepoName", reponame) | |
#app = app.replace("$space_id", whoami(token=token)["name"] + "/" + model_id.split("/")[-1]) | |
#Saving the new app.py file to disk | |
with open("template/app.py", "w") as f: | |
f.write(app) | |
#Uploading the new app.py to the new space | |
api.upload_file( | |
path_or_fileobj = "template/app.py", | |
path_in_repo = "app.py", | |
repo_id = f'LangChain_{repo_name}Bot', #model_id, | |
token = hf_token, | |
repo_type="space",) | |
#Uploading the new search_index file to the new space | |
api.upload_file( | |
path_or_fileobj = "search_index.pickle", | |
path_in_repo = "search_index.pickle", | |
repo_id = f'LangChain_{repo_name}Bot', #model_id, | |
token = hf_token, | |
repo_type="space",) | |
#Upload requirements.txt to the space | |
api.upload_file( | |
path_or_fileobj="template/requirements.txt", | |
path_in_repo="requirements.txt", | |
repo_id=model_id, | |
token=token, | |
repo_type="space",) | |
#Deleting the files - search_index and app.py file | |
os.remove("template/app.py") | |
os.remove("search_index.pickle") | |
user_name = whoami(token=hf_token)['name'] | |
repo_url = f"https://huggingface.co/spaces/{user_name}/LangChain_{repo_name}Bot" | |
space_name = f"{user_name}/LangChain_{repo_name}Bot" | |
return f"Successfully created the Chatbot at: <a href="+ repo_url + " target='_blank'>" + space_name + "</a>" | |
def driver(repo_link, hf_token): | |
#create search index openai_api_key=openai_api_key | |
#search_index_pickle = create_search_index(repo_link, openai_api_key) | |
#create a new space | |
print("***********INSIDE DRIVER***************") | |
create_space(repo_link, hf_token) | |
#upload files to the new space | |
html_tag = upload_files_to_space(repo_link, hf_token) | |
print(f"html tag is : {html_tag}") | |
return html_tag | |
#Gradio code for Repo as input and search index as output file | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
repo_link = gr.Textbox(label="Enter Github repo name") | |
hf_token_in = gr.Textbox(type='password', label="Enter hf-token name") | |
openai_api_key = gr.Textbox(type='password', label="Enter your OpenAI API key here") | |
with gr.Row(): | |
btn_faiss = gr.Button("Create Search index") | |
btn_create_space = gr.Button("Create YOur Chatbot") | |
html_out = gr.HTML() | |
search_index_file = gr.File() | |
btn_faiss.click(create_search_index, [repo_link, openai_api_key],search_index_file ) | |
btn_create_space.click(driver, [repo_link, hf_token_in], html_out) | |
demo.queue() | |
demo.launch(debug=True) |