import torch from peft import AutoPeftModelForCausalLM from transformers import AutoTokenizer, TextStreamer import streamlit as st # Initialize Streamlit UI st.title("Legal Query Chatbot") st.write("Ask questions related to Indian traffic laws and get AI-generated responses.") # Load LoRA fine-tuned model and tokenizer model_path = "lora_model" load_in_4bit = True # Load the model model = AutoPeftModelForCausalLM.from_pretrained( model_path, torch_dtype=torch.float16 if not load_in_4bit else torch.float32, load_in_4bit=load_in_4bit, device_map="auto" ) # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_path) # Enable inference mode model.eval() # Streamlit input for user prompt user_input = st.text_input("Enter your legal query:", "What are the penalties for breaking a red light in India?") if user_input: # Prepare the prompt messages = [{"role": "user", "content": user_input}] # Tokenize input inputs = tokenizer.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_tensors="pt" ).to("cuda" if torch.cuda.is_available() else "cpu") # Streamlit progress indicator with st.spinner("Generating response..."): # Use a text streamer for efficient streaming output text_streamer = TextStreamer(tokenizer, skip_prompt=True) # Generate response output = model.generate( input_ids=inputs, streamer=text_streamer, max_new_tokens=128, use_cache=True, temperature=1.5, min_p=0.1 ) st.success("Generation Complete!")