#inference Gradio

import gradio as gr
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the fine-tuned model and tokenizer
model_path = 'brunosan/GPT2-impactscience'
tokenizer_path = 'brunosan/GPT2-impactscience'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = GPT2Tokenizer.from_pretrained(tokenizer_path)
model = GPT2LMHeadModel.from_pretrained(model_path).to(device)

# Define the generation function
def generate_text(prompt):
    #remove trailing space if any
    prompt = prompt.rstrip()
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=device)
    outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask,
                              max_length=100, num_beams=9,
                              no_repeat_ngram_size=2,
                              temperature=1.0, do_sample=True,
                              top_p=0.95, top_k=50)

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

# Create a Gradio interface
input_text = gr.inputs.Textbox(lines=2, label="Enter the starting text")
output_text = gr.outputs.Textbox(label="Generated Text")

interface = gr.Interface(fn=generate_text, inputs=input_text, outputs=output_text,
             title="GPT-2 Impact Science Text Generator", description="Generate text using a fine-tuned GPT-2 model onthe Impact Science book.")

if __name__ == "__main__":
    interface.launch()