Spaces:

wedo2910
/

QA_arabic

Running

App Files Files Community

QA_arabic / app.py

wedo2910

Update app.py

30fe006 verified 1 day ago

raw

history blame contribute delete

3.53 kB

	import streamlit as st
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Define your repository names.
	MODEL_NAME = "wedo2910/research_ai"
	TOKENIZER_NAME = "wedo2910/research_ai_tok"

	# Check if CUDA is available and choose an appropriate device mapping.
	if torch.cuda.is_available():
	device = "cuda"
	# When using GPU, you might let the model auto-map to available GPUs.
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	trust_remote_code=True,
	device_map="auto"
	)
	else:
	device = "cpu"
	# Force CPU loading; this bypasses GPU-specific integrations like bitsandbytes.
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	trust_remote_code=True,
	device_map="cpu"
	)

	# Load the tokenizer.
	tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, trust_remote_code=True)

	# Optionally set the model to evaluation mode.
	model.eval()

	def single_inference(question: str, max_new_tokens: int, temperature: float) -> str:
	"""
	Generates an answer for the given question.

	The prompt is constructed using a system instruction in Arabic, and the question is appended.
	"""
	# Define messages for a simulated chat conversation.
	messages = [
	{"role": "system", "content": "اجب علي الاتي بالعربي فقط."},
	{"role": "user", "content": question},
	]

	# If the tokenizer has an `apply_chat_template` method, use it; otherwise, build the prompt manually.
	if hasattr(tokenizer, "apply_chat_template"):
	input_ids = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	return_tensors="pt"
	).to(device)
	else:
	system_prompt = "اجب علي الاتي بالعربي فقط.\n"
	user_prompt = f"السؤال: {question}\n"
	full_prompt = system_prompt + user_prompt
	input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device)

	# Generate the output.
	outputs = model.generate(
	input_ids,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	temperature=temperature,
	# You can add more generation parameters if needed.
	)

	# Remove the prompt part from the generated output.
	generated_ids = outputs[0][input_ids.shape[-1]:]

	# Decode the tokens into a string.
	output_text = tokenizer.decode(generated_ids, skip_special_tokens=True)

	return output_text

	# Streamlit UI
	st.title("Arabic AI Research QA")
	st.subheader("Ask a question and get an answer from the research AI model.")

	# Input field for the question.
	question = st.text_input("Question", placeholder="Enter your question here...")

	# Settings for generation.
	st.subheader("Settings")
	max_new_tokens = st.number_input("Max New Tokens", min_value=1, max_value=1000, value=256)
	temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.4, step=0.1)

	# When the button is pressed, generate the answer.
	if st.button("Get Answer"):
	if not question:
	st.error("Please enter a question.")
	else:
	with st.spinner("Generating answer..."):
	try:
	answer = single_inference(question, max_new_tokens, temperature)
	st.subheader("Result")
	st.markdown(f"Question: {question}")
	st.markdown(f"Answer: {answer}")
	except Exception as e:
	st.error(f"Error: {e}")