Spaces:

wedo2910
/

QA_arabic

Sleeping

App Files Files Community

wedo2910 commited on Feb 5

Commit

30fe006

verified ·

1 Parent(s): 3e00dd2

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -19

app.py CHANGED Viewed

@@ -3,20 +3,31 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Define your repository names.
-# For a fully merged model, you typically use the model repo (and a matching tokenizer repo).
 MODEL_NAME = "wedo2910/research_ai"
 TOKENIZER_NAME = "wedo2910/research_ai_tok"
-# Load the tokenizer and model.
-# Note: Use trust_remote_code=True if your model repo uses custom code.
-tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
-# Move model to the appropriate device.
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = model.to(device)
-# Optionally set model to evaluation mode.
 model.eval()
 def single_inference(question: str, max_new_tokens: int, temperature: float) -> str:
@@ -25,14 +36,13 @@ def single_inference(question: str, max_new_tokens: int, temperature: float) ->
     The prompt is constructed using a system instruction in Arabic, and the question is appended.
     """
-    # Define the messages that simulate a chat conversation.
     messages = [
         {"role": "system", "content": "اجب علي الاتي بالعربي فقط."},
         {"role": "user", "content": question},
     ]
-    # Some tokenizers provided by custom repos may implement apply_chat_template.
-    # If available, use it; otherwise, build a prompt manually.
     if hasattr(tokenizer, "apply_chat_template"):
         input_ids = tokenizer.apply_chat_template(
             messages,
@@ -40,26 +50,21 @@ def single_inference(question: str, max_new_tokens: int, temperature: float) ->
             return_tensors="pt"
         ).to(device)
     else:
-        # Manually build the prompt
         system_prompt = "اجب علي الاتي بالعربي فقط.\n"
         user_prompt = f"السؤال: {question}\n"
         full_prompt = system_prompt + user_prompt
         input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device)
-    # Define the terminator tokens.
-    # (For a merged model, usually the eos_token_id is sufficient.)
-    terminators = [tokenizer.eos_token_id]
     # Generate the output.
     outputs = model.generate(
         input_ids,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
-        # Optionally add other generation parameters (top_p, top_k, etc.) if needed.
     )
-    # Remove the prompt part from the output.
     generated_ids = outputs[0][input_ids.shape[-1]:]
     # Decode the tokens into a string.

 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Define your repository names.
 MODEL_NAME = "wedo2910/research_ai"
 TOKENIZER_NAME = "wedo2910/research_ai_tok"
+# Check if CUDA is available and choose an appropriate device mapping.
+if torch.cuda.is_available():
+    device = "cuda"
+    # When using GPU, you might let the model auto-map to available GPUs.
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        trust_remote_code=True,
+        device_map="auto"
+    )
+else:
+    device = "cpu"
+    # Force CPU loading; this bypasses GPU-specific integrations like bitsandbytes.
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        trust_remote_code=True,
+        device_map="cpu"
+    )
+# Load the tokenizer.
+tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, trust_remote_code=True)
+# Optionally set the model to evaluation mode.
 model.eval()
 def single_inference(question: str, max_new_tokens: int, temperature: float) -> str:
     The prompt is constructed using a system instruction in Arabic, and the question is appended.
     """
+    # Define messages for a simulated chat conversation.
     messages = [
         {"role": "system", "content": "اجب علي الاتي بالعربي فقط."},
         {"role": "user", "content": question},
     ]
+    # If the tokenizer has an `apply_chat_template` method, use it; otherwise, build the prompt manually.
     if hasattr(tokenizer, "apply_chat_template"):
         input_ids = tokenizer.apply_chat_template(
             messages,
             return_tensors="pt"
         ).to(device)
     else:
         system_prompt = "اجب علي الاتي بالعربي فقط.\n"
         user_prompt = f"السؤال: {question}\n"
         full_prompt = system_prompt + user_prompt
         input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device)
     # Generate the output.
     outputs = model.generate(
         input_ids,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
+        # You can add more generation parameters if needed.
     )
+    # Remove the prompt part from the generated output.
     generated_ids = outputs[0][input_ids.shape[-1]:]
     # Decode the tokens into a string.