Spaces:
Sleeping
Sleeping
import os | |
import pathlib | |
import gradio as gr | |
import pandas as pd | |
import yaml | |
from autorag.evaluator import Evaluator | |
from src.runner import GradioStreamRunner | |
root_dir = os.path.dirname(os.path.realpath(__file__)) | |
# Paths to example files | |
config_dir = os.path.join(root_dir, "config") | |
# Non-GPU Examples | |
non_gpu = os.path.join(config_dir, "non_gpu") | |
simple_openai = os.path.join(non_gpu, "simple_openai.yaml") | |
simple_openai_korean = os.path.join(non_gpu, "simple_openai_korean.yaml") | |
compact_openai = os.path.join(non_gpu, "compact_openai.yaml") | |
compact_openai_korean = os.path.join(non_gpu, "compact_openai_korean.yaml") | |
half_openai = os.path.join(non_gpu, "half_openai.yaml") | |
half_openai_korean = os.path.join(non_gpu, "half_openai_korean.yaml") | |
full_openai = os.path.join(non_gpu, "full_no_rerank_openai.yaml") | |
non_gpu_examples_list = [ | |
simple_openai, simple_openai_korean, compact_openai, compact_openai_korean, half_openai, half_openai_korean, | |
full_openai | |
] | |
non_gpu_examples = list(map(lambda x: [x], non_gpu_examples_list)) | |
# GPU Examples | |
gpu = os.path.join(config_dir, "gpu") | |
compact_openai_gpu = os.path.join(gpu, "compact_openai.yaml") | |
compact_openai_korean_gpu = os.path.join(gpu, "compact_openai_korean.yaml") | |
half_openai_gpu = os.path.join(gpu, "half_openai.yaml") | |
half_openai_korean_gpu = os.path.join(gpu, "half_openai_korean.yaml") | |
full_openai_gpu = os.path.join(gpu, "full_no_rerank_openai.yaml") | |
gpu_examples_list = [ | |
compact_openai_gpu, compact_openai_korean_gpu, half_openai_gpu, half_openai_korean_gpu, full_openai_gpu | |
] | |
gpu_examples = list(map(lambda x: [x], gpu_examples_list)) | |
# GPU + API | |
gpu_api = os.path.join(config_dir, "gpu_api") | |
compact_openai_gpu_api = os.path.join(gpu_api, "compact_openai.yaml") | |
compact_openai_korean_gpu_api = os.path.join(gpu_api, "compact_openai_korean.yaml") | |
half_openai_gpu_api = os.path.join(gpu_api, "half_openai.yaml") | |
half_openai_korean_gpu_api = os.path.join(gpu_api, "half_openai_korean.yaml") | |
full_openai_gpu_api = os.path.join(gpu_api, "full_no_rerank_openai.yaml") | |
gpu_api_examples_list = [ | |
compact_openai_gpu_api, compact_openai_korean_gpu_api, half_openai_gpu_api, half_openai_korean_gpu_api, | |
full_openai_gpu_api | |
] | |
gpu_api_examples = list(map(lambda x: [x], gpu_api_examples_list)) | |
example_qa_parquet = os.path.join(root_dir, "sample_data", "qa_data_sample.parquet") | |
example_corpus_parquet = os.path.join(root_dir, "sample_data", "corpus_data_sample.parquet") | |
def display_yaml(file): | |
if file is None: | |
return "No file uploaded" | |
with open(file.name, "r") as f: | |
content = yaml.safe_load(f) | |
return yaml.dump(content, default_flow_style=False) | |
def display_parquet(file): | |
if file is None: | |
return pd.DataFrame() | |
df = pd.read_parquet(file.name) | |
return df | |
def check_files(yaml_file, qa_file, corpus_file): | |
if yaml_file is not None and qa_file is not None and corpus_file is not None: | |
return gr.update(visible=True) | |
return gr.update(visible=False) | |
def run_trial(file, yaml_file, qa_file, corpus_file): | |
project_dir = os.path.join(pathlib.PurePath(file.name).parent, "project") | |
evaluator = Evaluator(qa_file, corpus_file, project_dir=project_dir) | |
evaluator.start_trial(yaml_file, skip_validation=True) | |
return ("❗Trial Completed❗ " | |
"Go to Chat Tab to start the conversation") | |
def set_environment_variable(api_name, api_key): | |
if api_name and api_key: | |
try: | |
os.environ[api_name] = api_key | |
return "✅ Setting Complete ✅" | |
except Exception as e: | |
return f"Error setting environment variable: {e}" | |
return "API Name or Key is missing" | |
def stream_default(file, history): | |
# Default YAML Runner | |
yaml_path = os.path.join(config_dir, "extracted_sample.yaml") | |
project_dir = os.path.join( | |
pathlib.PurePath(file.name).parent, "project" | |
) | |
default_gradio_runner = GradioStreamRunner.from_yaml(yaml_path, project_dir) | |
history.append({"role": "assistant", "content": ""}) | |
# Stream responses for the chatbox | |
for default_output in default_gradio_runner.stream_run(history[-2]["content"]): | |
stream_delta = default_output[0] | |
history[-1]["content"] = stream_delta | |
yield history | |
def stream_optimized(file, history): | |
# Custom YAML Runner | |
trial_dir = os.path.join(pathlib.PurePath(file.name).parent, "project", "0") | |
custom_gradio_runner = GradioStreamRunner.from_trial_folder(trial_dir) | |
history.append({"role": "assistant", "content": ""}) | |
for output in custom_gradio_runner.stream_run(history[-2]["content"]): | |
stream_delta = output[0] | |
history[-1]["content"] = stream_delta | |
yield history | |
def user(user_message, history: list): | |
return "", history + [{"role": "user", "content": user_message}] | |
with gr.Blocks(theme="earneleh/paris") as demo: | |
gr.Markdown("# AutoRAG Trial & Debugging Interface") | |
with gr.Tabs() as tabs: | |
with gr.Tab("Environment Variables"): | |
gr.Markdown("## Environment Variables") | |
with gr.Row(): # Arrange horizontally | |
with gr.Column(scale=3): | |
api_name = gr.Textbox( | |
label="Environment Variable Name", | |
type="text", | |
placeholder="Enter your Environment Variable Name", | |
) | |
gr.Examples(examples=[["OPENAI_API_KEY"]], inputs=api_name) | |
with gr.Column(scale=7): | |
api_key = gr.Textbox( | |
label="API Key", | |
type="password", | |
placeholder="Enter your API Key", | |
) | |
set_env_button = gr.Button("Set Environment Variable") | |
env_output = gr.Textbox( | |
label="Status", interactive=False | |
) | |
api_key.submit( | |
set_environment_variable, inputs=[api_name, api_key], outputs=env_output | |
) | |
set_env_button.click( | |
set_environment_variable, inputs=[api_name, api_key], outputs=env_output | |
) | |
with gr.Tab("File Upload"): | |
with gr.Row() as file_upload_row: | |
with gr.Column(scale=3): | |
yaml_file = gr.File( | |
label="Upload YAML File", | |
file_count="single", | |
) | |
make_yaml_button = gr.Button("Make Your Own YAML File", | |
link="https://tally.so/r/mBQY5N") | |
with gr.Column(scale=7): | |
yaml_content = gr.Textbox(label="YAML File Content") | |
gr.Markdown("Here is the Sample YAML File. Just click the file ❗") | |
gr.Markdown("### Non-GPU Examples") | |
gr.Examples(examples=non_gpu_examples, inputs=yaml_file) | |
with gr.Row(): | |
# Section for GPU examples | |
with gr.Column(): | |
gr.Markdown("### GPU Examples") | |
gr.Markdown( | |
"**⚠️ Warning**: Here are the YAML files containing the modules that use the **local model**.") | |
gr.Markdown( | |
"Note that if you Run_Trial in a non-GPU environment, **it can take a very long time**.") | |
gr.Examples(examples=gpu_examples, inputs=yaml_file) | |
make_gpu = gr.Button("Use AutoRAG GPU Feature", | |
link="https://tally.so/r/3j7rP6") | |
# Section for GPU + API examples | |
with gr.Column(): | |
gr.Markdown("### GPU + API Examples") | |
gr.Markdown( | |
"**⚠️ Warning**: Here are the YAML files containing the modules that use the **local model** and **API Based Model**.") | |
gr.Markdown("You need to set **JINA_API_KEY**, **COHERE_API_KEY**, **MXBAI_API_KEY** and **VOYAGE_API_KEY** as environment variables to use this feature. ") | |
gr.Examples(examples=gpu_api_examples, inputs=yaml_file) | |
gpu_api_button = gr.Button("Use AutoRAG API KEY Feature", | |
link="https://tally.so/r/waD1Ab") | |
with gr.Row() as qa_upload_row: | |
with gr.Column(scale=3): | |
qa_file = gr.File( | |
label="Upload qa.parquet File", | |
file_count="single", | |
) | |
# Add button for QA | |
make_qa_button = gr.Button("Make Your Own QA Data", | |
link="https://huggingface.co/spaces/AutoRAG/AutoRAG-data-creation") | |
with gr.Column(scale=7): | |
qa_content = gr.Dataframe(label="QA Parquet File Content") | |
gr.Markdown("Here is the Sample QA File. Just click the file ❗") | |
gr.Examples(examples=[[example_qa_parquet]], inputs=qa_file) | |
with gr.Row() as corpus_upload_row: | |
with gr.Column(scale=3): | |
corpus_file = gr.File( | |
label="Upload corpus.parquet File", | |
file_count="single", | |
) | |
make_corpus_button = gr.Button("Make Your Own Corpus Data", | |
link="https://huggingface.co/spaces/AutoRAG/AutoRAG-data-creation") | |
with gr.Column(scale=7): | |
corpus_content = gr.Dataframe(label="Corpus Parquet File Content") | |
gr.Markdown( | |
"Here is the Sample Corpus File. Just click the file ❗" | |
) | |
gr.Examples(examples=[[example_corpus_parquet]], inputs=corpus_file) | |
run_trial_button = gr.Button("Run Trial", visible=False) | |
trial_output = gr.Textbox(label="Trial Output", visible=False) | |
yaml_file.change(display_yaml, inputs=yaml_file, outputs=yaml_content) | |
qa_file.change(display_parquet, inputs=qa_file, outputs=qa_content) | |
corpus_file.change( | |
display_parquet, inputs=corpus_file, outputs=corpus_content | |
) | |
yaml_file.change( | |
check_files, | |
inputs=[yaml_file, qa_file, corpus_file], | |
outputs=run_trial_button, | |
) | |
qa_file.change( | |
check_files, | |
inputs=[yaml_file, qa_file, corpus_file], | |
outputs=run_trial_button, | |
) | |
corpus_file.change( | |
check_files, | |
inputs=[yaml_file, qa_file, corpus_file], | |
outputs=run_trial_button, | |
) | |
run_trial_button.click( | |
lambda: ( | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=True), | |
), | |
outputs=[ | |
file_upload_row, | |
qa_upload_row, | |
corpus_upload_row, | |
trial_output, | |
], | |
) | |
run_trial_button.click( | |
run_trial, | |
inputs=[yaml_file, yaml_file, qa_file, corpus_file], | |
outputs=trial_output, | |
) | |
# New Chat Tab | |
with gr.Tab("Chat") as chat_tab: | |
gr.Markdown("### Compare Chat Models") | |
question_input = gr.Textbox( | |
label="Your Question", placeholder="Type your question here..." | |
) | |
pseudo_input = gr.Textbox(label="havertz", visible=False) | |
with gr.Row(): | |
# Left Chatbox (Default YAML) | |
with gr.Column(): | |
gr.Markdown("#### Naive RAG Chat") | |
default_chatbox = gr.Chatbot(label="Naive RAG Conversation",type="messages") | |
# Right Chatbox (Custom YAML) | |
with gr.Column(): | |
gr.Markdown("#### Optimized RAG Chat") | |
custom_chatbox = gr.Chatbot(label="Optimized RAG Conversation",type="messages") | |
question_input.submit(lambda x: x, inputs=[question_input], outputs=[pseudo_input]).then( | |
user, [question_input, default_chatbox], outputs=[question_input, default_chatbox], queue=False | |
).then( | |
stream_default, | |
inputs=[yaml_file, default_chatbox], | |
outputs=[default_chatbox], | |
) | |
pseudo_input.change( | |
user, [pseudo_input, custom_chatbox], outputs=[question_input, custom_chatbox], queue=False).then( | |
stream_optimized, | |
inputs=[yaml_file, custom_chatbox], | |
outputs=[custom_chatbox], | |
) | |
deploy_button = gr.Button("Deploy", | |
link="https://tally.so/r/3XM7y4") | |
if __name__ == "__main__": | |
# Run the interface | |
demo.launch(share=False, debug=True) | |