Spaces:

wedo2910
/

QA_arabic

Sleeping

App Files Files Community

wedo2910 commited on Feb 5

Commit

33b295a

verified ·

1 Parent(s): 930a873

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -42

app.py CHANGED Viewed

@@ -1,72 +1,94 @@
-import os
-os.environ["TRANSFORMERS_NO_BITSANDBYTES"] = "1"  # Disable bitsandbytes integration
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# Load the new model and tokenizer
-model_name = "wedo2910/research_ai"
-tokenizer_name = "wedo2910/research_ai_tok"
-tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=False)
-# Define the custom inference function
-def single_inference(question, max_new_tokens, temperature):
-    # Prepare the prompt messages
     messages = [
         {"role": "system", "content": "اجب علي الاتي بالعربي فقط."},
         {"role": "user", "content": question},
     ]
-    # Use the tokenizer's chat template functionality
-    input_ids = tokenizer.apply_chat_template(
-        messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model.device)
-    # Define terminator tokens (end-of-sequence markers)
-    terminators = [
-        tokenizer.eos_token_id,
-        tokenizer.convert_tokens_to_ids("<|eot_id|>")
-    ]
-    # Generate the output
     outputs = model.generate(
         input_ids,
         max_new_tokens=max_new_tokens,
-        eos_token_id=terminators,
         do_sample=True,
         temperature=temperature,
     )
-    # Decode only the newly generated tokens (i.e. skip the prompt)
-    response = outputs[0][input_ids.shape[-1]:]
-    output = tokenizer.decode(response, skip_special_tokens=True)
-    return output
 # Streamlit UI
 st.title("Arabic AI Research QA")
-st.subheader("Ask a question to get an answer from the research AI model.")
-# Input field for the question
 question = st.text_input("Question", placeholder="Enter your question here...")
-# Settings sliders for generation parameters
 st.subheader("Settings")
 max_new_tokens = st.number_input("Max New Tokens", min_value=1, max_value=1000, value=256)
 temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.4, step=0.1)
-# Generate Answer button
 if st.button("Get Answer"):
     if not question:
-        st.error("The question field is required.")
     else:
-        try:
-            answer = single_inference(question, max_new_tokens, temperature)
-            st.subheader("Result")
-            st.write(f"**Question:** {question}")
-            st.write(f"**Answer:** {answer}")
-        except Exception as e:
-            st.error(f"Error: {e}")

 import streamlit as st
+import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# Define your repository names.
+# For a fully merged model, you typically use the model repo (and a matching tokenizer repo).
+MODEL_NAME = "wedo2910/research_ai"
+TOKENIZER_NAME = "wedo2910/research_ai_tok"
+# Load the tokenizer and model.
+# Note: Use trust_remote_code=True if your model repo uses custom code.
+tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
+# Move model to the appropriate device.
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(device)
+# Optionally set model to evaluation mode.
+model.eval()
+def single_inference(question: str, max_new_tokens: int, temperature: float) -> str:
+    """
+    Generates an answer for the given question.
+    The prompt is constructed using a system instruction in Arabic, and the question is appended.
+    """
+    # Define the messages that simulate a chat conversation.
     messages = [
         {"role": "system", "content": "اجب علي الاتي بالعربي فقط."},
         {"role": "user", "content": question},
     ]
+    # Some tokenizers provided by custom repos may implement apply_chat_template.
+    # If available, use it; otherwise, build a prompt manually.
+    if hasattr(tokenizer, "apply_chat_template"):
+        input_ids = tokenizer.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            return_tensors="pt"
+        ).to(device)
+    else:
+        # Manually build the prompt
+        system_prompt = "اجب علي الاتي بالعربي فقط.\n"
+        user_prompt = f"السؤال: {question}\n"
+        full_prompt = system_prompt + user_prompt
+        input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device)
+    # Define the terminator tokens.
+    # (For a merged model, usually the eos_token_id is sufficient.)
+    terminators = [tokenizer.eos_token_id]
+    # Generate the output.
     outputs = model.generate(
         input_ids,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
+        # Optionally add other generation parameters (top_p, top_k, etc.) if needed.
     )
+    # Remove the prompt part from the output.
+    generated_ids = outputs[0][input_ids.shape[-1]:]
+    # Decode the tokens into a string.
+    output_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
+    return output_text
 # Streamlit UI
 st.title("Arabic AI Research QA")
+st.subheader("Ask a question and get an answer from the research AI model.")
+# Input field for the question.
 question = st.text_input("Question", placeholder="Enter your question here...")
+# Settings for generation.
 st.subheader("Settings")
 max_new_tokens = st.number_input("Max New Tokens", min_value=1, max_value=1000, value=256)
 temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.4, step=0.1)
+# When the button is pressed, generate the answer.
 if st.button("Get Answer"):
     if not question:
+        st.error("Please enter a question.")
     else:
+        with st.spinner("Generating answer..."):
+            try:
+                answer = single_inference(question, max_new_tokens, temperature)
+                st.subheader("Result")
+                st.markdown(f"**Question:** {question}")
+                st.markdown(f"**Answer:** {answer}")
+            except Exception as e:
+                st.error(f"Error: {e}")