QA_arabic / app.py
wedo2910's picture
Update app.py
30fe006 verified
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Define your repository names.
MODEL_NAME = "wedo2910/research_ai"
TOKENIZER_NAME = "wedo2910/research_ai_tok"
# Check if CUDA is available and choose an appropriate device mapping.
if torch.cuda.is_available():
device = "cuda"
# When using GPU, you might let the model auto-map to available GPUs.
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
device_map="auto"
)
else:
device = "cpu"
# Force CPU loading; this bypasses GPU-specific integrations like bitsandbytes.
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
device_map="cpu"
)
# Load the tokenizer.
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, trust_remote_code=True)
# Optionally set the model to evaluation mode.
model.eval()
def single_inference(question: str, max_new_tokens: int, temperature: float) -> str:
"""
Generates an answer for the given question.
The prompt is constructed using a system instruction in Arabic, and the question is appended.
"""
# Define messages for a simulated chat conversation.
messages = [
{"role": "system", "content": "اجب علي الاتي بالعربي فقط."},
{"role": "user", "content": question},
]
# If the tokenizer has an `apply_chat_template` method, use it; otherwise, build the prompt manually.
if hasattr(tokenizer, "apply_chat_template"):
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(device)
else:
system_prompt = "اجب علي الاتي بالعربي فقط.\n"
user_prompt = f"السؤال: {question}\n"
full_prompt = system_prompt + user_prompt
input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device)
# Generate the output.
outputs = model.generate(
input_ids,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
# You can add more generation parameters if needed.
)
# Remove the prompt part from the generated output.
generated_ids = outputs[0][input_ids.shape[-1]:]
# Decode the tokens into a string.
output_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
return output_text
# Streamlit UI
st.title("Arabic AI Research QA")
st.subheader("Ask a question and get an answer from the research AI model.")
# Input field for the question.
question = st.text_input("Question", placeholder="Enter your question here...")
# Settings for generation.
st.subheader("Settings")
max_new_tokens = st.number_input("Max New Tokens", min_value=1, max_value=1000, value=256)
temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.4, step=0.1)
# When the button is pressed, generate the answer.
if st.button("Get Answer"):
if not question:
st.error("Please enter a question.")
else:
with st.spinner("Generating answer..."):
try:
answer = single_inference(question, max_new_tokens, temperature)
st.subheader("Result")
st.markdown(f"**Question:** {question}")
st.markdown(f"**Answer:** {answer}")
except Exception as e:
st.error(f"Error: {e}")