import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load the model and tokenizer from Hugging Face model_name = "Rehman1603/airline_guidenece" # Replace with your Hugging Face model name tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") # Prepare the model for inference model.eval() # Define the Alpaca prompt format alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Response: {}""" def chat_with_model(instruction): # Format the input with the Alpaca prompt formatted_input = alpaca_prompt.format(instruction) # Tokenize the input inputs = tokenizer( formatted_input, return_tensors="pt", ).to("cuda") # Generate the response outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True) decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] # Extract the response part after "### Response:" response_start = decoded_output.find("### Response:") + len("### Response:") response_text = decoded_output[response_start:].strip() return response_text # Create a Gradio interface interface = gr.Interface( fn=chat_with_model, inputs=gr.Textbox(lines=2, placeholder="Enter your instruction here..."), outputs="text", title="Airline Guidance Chatbot", description="Ask questions about airline guidance and get responses from the model.", ) # Launch the Gradio app interface.launch()