import os import shutil from datetime import datetime import gradio as gr import pandas as pd import time import random import uuid def save_uploaded_files(files, session_id): """Save uploaded files to telemetry directory with session ID.""" save_dir = os.path.join("telemetry_files", session_id) os.makedirs(save_dir, exist_ok=True) saved_paths = [] for file in files: if file is not None: filename = os.path.basename(file.name) save_path = os.path.join(save_dir, filename) shutil.copy2(file.name, save_path) saved_paths.append(save_path) return saved_paths def mock_process_documents(files, chunk_size, num_questions, question_types, complexity_types, difficulty, selected_models): """Mock processing function that simulates document processing.""" time.sleep(5) # Simulate 5 seconds of processing # Create session ID and save files session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}" saved_files = save_uploaded_files(files, session_id) data = [] for _ in range(num_questions): # Since question_types is now a list of selected values, we can use it directly question_type = random.choice(question_types) complexity = random.choice(complexity_types) model = random.choice(selected_models) question = f"[{complexity}] Sample {question_type} question {_+1} (Difficulty: {difficulty:.1f}, Model: {model})" answer = f"This is a sample answer for question {_+1}. Files processed: {', '.join(saved_files)}" data.append({ "question_type": question_type, "complexity": complexity, "question": question, "answer": answer, "model": model, "difficulty": difficulty }) return pd.DataFrame(data) def generate_csv_file(df, session_id): """Generate and save CSV file for the results.""" if df.empty: return None # Create session directory session_dir = os.path.join("telemetry_files", session_id) os.makedirs(session_dir, exist_ok=True) # Save CSV csv_path = os.path.join(session_dir, "results.csv") df.to_csv(csv_path, index=False) return csv_path def process_files( input_files, chunk_size, num_questions, question_types_dict, complexity_types_dict, difficulty_level, model_selection_dict ): """Process files with the given configuration.""" if not input_files: return pd.DataFrame(), "Error: No files uploaded", None # Convert checkbox groups to lists of selected values question_types = question_types_dict complexity_types = complexity_types_dict selected_models = model_selection_dict if not question_types or not complexity_types or not selected_models: return pd.DataFrame(), "Error: Please select at least one option from each category", None start_time = time.time() results_df = mock_process_documents( input_files, chunk_size, num_questions, question_types, complexity_types, difficulty_level, selected_models ) processing_time = time.time() - start_time # Generate CSV file session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}" csv_path = generate_csv_file(results_df, session_id) return ( results_df, f"Processing completed in {processing_time:.2f} seconds", csv_path if csv_path else None ) # Create custom theme theme = gr.themes.Base( primary_hue="blue", secondary_hue="indigo", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), radius_size=gr.themes.sizes.radius_sm, ).set( body_background_fill="*neutral_50", body_background_fill_dark="*neutral_950", button_primary_background_fill="*primary_600", button_primary_background_fill_hover="*primary_700", button_primary_text_color="white", button_primary_text_color_dark="white", block_label_text_weight="600", block_title_text_weight="600", input_background_fill="white", input_background_fill_dark="*neutral_800", input_border_color="*neutral_200", input_border_color_dark="*neutral_700", ) # Create the Gradio interface with gr.Blocks( title="Yourbench - Dynamic Question Generation", theme=theme, css=""" .gradio-container {max-width: 1400px !important; margin-left: auto; margin-right: auto} .contain { display: flex; flex-direction: column; } .contain > * { flex: 1} .gap { margin-top: 1rem !important } footer {display: none !important} .citation-box { background-color: #f8fafc; border: 1px solid #e2e8f0; border-radius: 0.5rem; padding: 1rem; margin-top: 2rem; font-family: monospace; } .citation-box pre { margin: 0; white-space: pre-wrap; } .main-panel { min-height: 600px } .output-panel { min-height: 400px } .checkbox-group { max-height: 200px; overflow-y: auto } .model-select { max-height: 150px } .download-btn { margin-top: 1rem !important } """ ) as demo: # Header with description gr.Markdown(""" # 📚 Yourbench: Dynamic Question Generation Tool Generate high-quality questions and answers from your documents using state-of-the-art language models. This tool helps create diverse question types with varying complexity levels, perfect for educational assessment and content understanding. """) with gr.Row(): # Left column for configuration with gr.Column(scale=2, elem_classes="main-panel"): # Document Upload Section with gr.Group(): gr.Markdown("### 📄 Document Upload") input_files = gr.File( label="Upload Documents (PDF/TXT)", file_types=[".txt", ".pdf"], file_count="multiple", elem_id="file_upload", scale=2 ) # Core Parameters Section with gr.Group(): gr.Markdown("### ⚙️ Core Parameters") with gr.Row(): chunk_size = gr.Slider( minimum=100, maximum=1000, value=500, step=50, label="Chunk Size", info="Number of tokens per chunk", elem_id="chunk_size" ) num_questions = gr.Slider( minimum=1, maximum=20, value=5, step=1, label="Number of Questions", info="How many questions to generate", elem_id="num_questions" ) difficulty_level = gr.Slider( minimum=1, maximum=5, value=3, step=0.1, label="Average Difficulty", info="1: Easy, 5: Very Hard", elem_id="difficulty" ) with gr.Row(): # Question Types Section with gr.Column(): gr.Markdown("### 🎯 Question Types") question_types_dict = gr.CheckboxGroup( choices=[ "Analytical", "Application Based", "Conceptual", "Counterfactual", "Factual", "Open Ended", "True False", "False Premise", "Clarification", "Edge Case" ], value=["Analytical", "Factual", "Conceptual", "Application Based"], label="Select Types", elem_id="question_types", elem_classes="checkbox-group" ) # Complexity and Models Section with gr.Column(): with gr.Group(): gr.Markdown("### 🔄 Complexity") complexity_types_dict = gr.CheckboxGroup( choices=["Single Shot", "Multi Hop"], value=["Single Shot", "Multi Hop"], label="Select Complexity", elem_id="complexity_types" ) with gr.Group(): gr.Markdown("### 🤖 Models") model_selection_dict = gr.CheckboxGroup( choices=[ "Mistral Large", "Llama-3 70B", "GPT-4", "Claude 3.5 Sonnet", "Gemini Pro" ], value=["Mistral Large", "GPT-4", "Claude 3.5 Sonnet"], label="Select Models", elem_id="models", elem_classes="model-select" ) process_btn = gr.Button( "🚀 Generate Questions", variant="primary", size="lg", elem_id="generate_btn" ) # Right column for outputs with gr.Column(scale=3, elem_classes="output-panel"): with gr.Group(): gr.Markdown("### 📊 Generated Questions") output_status = gr.Textbox( label="Status", elem_id="status" ) output_table = gr.Dataframe( headers=["question_type", "complexity", "question", "answer", "model", "difficulty"], label="Questions and Answers", elem_id="results_table", wrap=True ) csv_output = gr.File( label="Download Results", elem_id="csv_download", elem_classes="download-btn", interactive=False ) # Instructions Section with gr.Accordion("📝 Instructions", open=False): gr.Markdown(""" 1. **Upload Documents**: Support for .txt and .pdf files 2. **Configure Parameters**: - Set chunk size for document processing - Choose number of questions to generate - Adjust difficulty level (1: Easy to 5: Very Hard) 3. **Select Question Types**: Choose from various question categories 4. **Set Complexity**: Single-shot or multi-hop reasoning 5. **Choose Models**: Select AI models for ensemble generation 6. Click '🚀 Generate Questions' to start 7. Download results as CSV for further use """) # Citation Section gr.Markdown(""" ### 📚 Citation If you find this work helpful in your research or applications, please cite: """) with gr.Group(elem_classes="citation-box"): gr.Markdown("""```bibtex @misc{yourbench2024, title={Yourbench: A Dynamic Question Generation Framework for Document Understanding}, author={Your Team}, year={2024}, publisher={GitHub}, journal={GitHub repository}, howpublished={\\url{https://github.com/yourbench/yourbench}}, } ```""") # API Information gr.Markdown(""" ### 🔌 API Usage This tool can be used programmatically through its API. Here's how to interact with it: ```python import gradio_client client = gradio_client.Client("YOUR_SPACE_URL") result = client.predict( ["document.pdf"], # Input files 500, # Chunk size 5, # Number of questions ["Analytical", "Factual"], # Question types ["Single Shot"], # Complexity types 3.0, # Difficulty level ["GPT-4", "Claude 3.5 Sonnet"], # Models api_name="/predict" ) ``` Replace `YOUR_SPACE_URL` with the actual deployment URL. The API endpoint accepts the same parameters as the web interface and returns a tuple containing the results DataFrame, status message, and CSV file path. """) # Event handler process_btn.click( process_files, inputs=[ input_files, chunk_size, num_questions, question_types_dict, complexity_types_dict, difficulty_level, model_selection_dict ], outputs=[output_table, output_status, csv_output] ) if __name__ == "__main__": demo.launch(share=True)