import re import torch import gradio as gr from transformers import GPT2LMHeadModel, GPT2Tokenizer # Load the model and tokenizer from Hugging Face repository model_repo_id = "Ajay12345678980/QA_GPT_J" # Replace with your model repository ID # Initialize the model and tokenizer device = "cuda" if torch.cuda.is_available() else "cpu" model = GPT2LMHeadModel.from_pretrained(model_repo_id).to(device) tokenizer = GPT2Tokenizer.from_pretrained(model_repo_id) # Define the prediction function def generate_answer(question): input_ids = tokenizer.encode(question, return_tensors="pt").to(device) attention_mask = torch.ones_like(input_ids).to(device) pad_token_id = tokenizer.eos_token_id output = model.generate( input_ids, max_new_tokens=100, num_return_sequences=1, attention_mask=attention_mask, pad_token_id=pad_token_id ) decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) start_index = decoded_output.find("Answer") end_index = decoded_output.find("") if start_index != -1: if end_index != -1: answer_text = decoded_output[start_index + len("Answer"):end_index].strip() else: answer_text = decoded_output[start_index + len("Answer"):].strip() return answer_text else: return "Sorry, I couldn't generate an answer." # Gradio interface setup interface = gr.Interface( fn=generate_answer, inputs="text", outputs="text", title="GPT-2 Text Generation", description="Enter a question and see what the model generates!" ) # Launch the Gradio app if __name__ == "__main__": interface.launch()