import gradio as gr from datasets import load_dataset from transformers import AutoTokenizer, AutoModelForQuestionAnswering, TrainingArguments, Trainer, pipeline # Load your dataset function dataset = load_dataset("karthikmns/eval_testing_mns") # Load a pre-trained model and tokenizer model_name = "distilbert-base-uncased-distilled-squad" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForQuestionAnswering.from_pretrained(model_name) # Tokenize the dataset def tokenize_function(examples): return tokenizer(examples["text"], truncation=True, padding="max_length") tokenized_datasets = dataset.map(tokenize_function, batched=True) # Set up training arguments training_args = TrainingArguments( output_dir="./results", evaluation_strategy="epoch", learning_rate=2e-5, per_device_train_batch_size=16, per_device_eval_batch_size=16, num_train_epochs=3, weight_decay=0.01, ) # Create Trainer instance trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["validation"], ) # Fine-tune the model trainer.train() # Save the model model.save_pretrained("./fine_tuned_model") # Create a question-answering pipeline qa_pipeline = pipeline("question-answering", model="./fine_tuned_model") # Define the Gradio interface function def answer_question(question): result = qa_pipeline(question=question, context=dataset["text"]) return result['answer'] # Create and launch the Gradio interface iface = gr.Interface( fn=answer_question, inputs="text", outputs="text", title="Textbook Q&A", description="Ask a question about your textbook!" ) iface.launch()