import streamlit as st from unsloth import FastLanguageModel from transformers import TextStreamer # Page Configuration st.set_page_config(page_title="AI Traffic Law Advisor", layout="wide") # Load the LoRA model MODEL_PATH = "./lora_model/lora_model" @st.cache_resource(show_spinner=False) def load_model(): # Load model and tokenizer model, tokenizer = FastLanguageModel.from_pretrained( MODEL_PATH, device_map="auto" ) # Enable inference mode model = FastLanguageModel.for_inference(model) return model, tokenizer model, tokenizer = load_model() st.title("AI Traffic Law Advisor") user_query = st.text_area("Enter your legal question about traffic rules in India:", "") if st.button("Get Advice"): if user_query.strip(): messages = [{"role": "user", "content": user_query}] # Tokenize input inputs = tokenizer.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_tensors="pt" ).to(model.device) # Stream response text_streamer = TextStreamer(tokenizer, skip_prompt=True) st.markdown("**AI Response:**") with st.spinner("Generating response..."): model.generate(input_ids=inputs, streamer=text_streamer, max_new_tokens=1048, temperature=0.7) else: st.warning("Please enter a query.")