Spaces:

ayush0504
/

LLM-CHATBOT

Sleeping

App Files Files Community

ayush0504 commited on 14 days ago

Commit

48ceee6

verified ·

1 Parent(s): 042e364

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -41

app.py CHANGED Viewed

@@ -1,58 +1,62 @@
 import torch
 from peft import AutoPeftModelForCausalLM
 from transformers import AutoTokenizer, TextStreamer
-import streamlit as st
-# Initialize Streamlit UI
-st.title("Legal Query Chatbot")
-st.write("Ask questions related to Indian traffic laws and get AI-generated responses.")
 # Load LoRA fine-tuned model and tokenizer
-model_path = "lora_model"
-load_in_4bit = True
 # Load the model
-model = AutoPeftModelForCausalLM.from_pretrained(
-    model_path,
-    torch_dtype=torch.float16 if not load_in_4bit else torch.float32,
-    load_in_4bit=load_in_4bit,
-    device_map="auto"
-)
 # Load tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-# Enable inference mode
-model.eval()
-# Streamlit input for user prompt
-user_input = st.text_input("Enter your legal query:", "What are the penalties for breaking a red light in India?")
-if user_input:
-    # Prepare the prompt
-    messages = [{"role": "user", "content": user_input}]
-    # Tokenize input
     inputs = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
         add_generation_prompt=True,
         return_tensors="pt"
     ).to("cuda" if torch.cuda.is_available() else "cpu")
-    # Streamlit progress indicator
-    with st.spinner("Generating response..."):
-        # Use a text streamer for efficient streaming output
-        text_streamer = TextStreamer(tokenizer, skip_prompt=True)
-        # Generate response
-        output = model.generate(
-            input_ids=inputs,
-            streamer=text_streamer,
-            max_new_tokens=128,
-            use_cache=True,
-            temperature=1.5,
-            min_p=0.1
-        )
-    st.success("Generation Complete!")

+import streamlit as st
 import torch
 from peft import AutoPeftModelForCausalLM
 from transformers import AutoTokenizer, TextStreamer
 # Load LoRA fine-tuned model and tokenizer
+model_path = "lora_model"  # Your model folder path
+load_in_4bit = True  # Whether to load in 4-bit precision
 # Load the model
+@st.cache_resource
+def load_model():
+    model = AutoPeftModelForCausalLM.from_pretrained(
+        model_path,
+        torch_dtype=torch.float16 if not load_in_4bit else torch.float32,
+        load_in_4bit=load_in_4bit,
+        device_map="auto"
+    )
+    model.eval()
+    return model
 # Load tokenizer
+@st.cache_resource
+def load_tokenizer():
+    return AutoTokenizer.from_pretrained(model_path)
+model = load_model()
+tokenizer = load_tokenizer()
+def generate_response(question):
+    messages = [{"role": "user", "content": question}]
     inputs = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
         add_generation_prompt=True,
         return_tensors="pt"
     ).to("cuda" if torch.cuda.is_available() else "cpu")
+    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
+    output = model.generate(
+        input_ids=inputs,
+        streamer=text_streamer,
+        max_new_tokens=1048,
+        use_cache=True,
+        temperature=0.7,
+        min_p=0.1
+    )
+    return tokenizer.decode(output[0], skip_special_tokens=True)
+# Streamlit UI
+st.title("Indian Penal Code AI Assistant")
+question = st.text_area("Ask a legal question:")
+if st.button("Generate Response"):
+    if question.strip():
+        with st.spinner("Generating response..."):
+            answer = generate_response(question)
+        st.subheader("Answer:")
+        st.write(answer)
+    else:
+        st.warning("Please enter a question.")