wedo2910 commited on
Commit
30fe006
·
verified ·
1 Parent(s): 3e00dd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -19
app.py CHANGED
@@ -3,20 +3,31 @@ import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
  # Define your repository names.
6
- # For a fully merged model, you typically use the model repo (and a matching tokenizer repo).
7
  MODEL_NAME = "wedo2910/research_ai"
8
  TOKENIZER_NAME = "wedo2910/research_ai_tok"
9
 
10
- # Load the tokenizer and model.
11
- # Note: Use trust_remote_code=True if your model repo uses custom code.
12
- tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, trust_remote_code=True)
13
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # Move model to the appropriate device.
16
- device = "cuda" if torch.cuda.is_available() else "cpu"
17
- model = model.to(device)
18
 
19
- # Optionally set model to evaluation mode.
20
  model.eval()
21
 
22
  def single_inference(question: str, max_new_tokens: int, temperature: float) -> str:
@@ -25,14 +36,13 @@ def single_inference(question: str, max_new_tokens: int, temperature: float) ->
25
 
26
  The prompt is constructed using a system instruction in Arabic, and the question is appended.
27
  """
28
- # Define the messages that simulate a chat conversation.
29
  messages = [
30
  {"role": "system", "content": "اجب علي الاتي بالعربي فقط."},
31
  {"role": "user", "content": question},
32
  ]
33
 
34
- # Some tokenizers provided by custom repos may implement apply_chat_template.
35
- # If available, use it; otherwise, build a prompt manually.
36
  if hasattr(tokenizer, "apply_chat_template"):
37
  input_ids = tokenizer.apply_chat_template(
38
  messages,
@@ -40,26 +50,21 @@ def single_inference(question: str, max_new_tokens: int, temperature: float) ->
40
  return_tensors="pt"
41
  ).to(device)
42
  else:
43
- # Manually build the prompt
44
  system_prompt = "اجب علي الاتي بالعربي فقط.\n"
45
  user_prompt = f"السؤال: {question}\n"
46
  full_prompt = system_prompt + user_prompt
47
  input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device)
48
 
49
- # Define the terminator tokens.
50
- # (For a merged model, usually the eos_token_id is sufficient.)
51
- terminators = [tokenizer.eos_token_id]
52
-
53
  # Generate the output.
54
  outputs = model.generate(
55
  input_ids,
56
  max_new_tokens=max_new_tokens,
57
  do_sample=True,
58
  temperature=temperature,
59
- # Optionally add other generation parameters (top_p, top_k, etc.) if needed.
60
  )
61
 
62
- # Remove the prompt part from the output.
63
  generated_ids = outputs[0][input_ids.shape[-1]:]
64
 
65
  # Decode the tokens into a string.
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
  # Define your repository names.
 
6
  MODEL_NAME = "wedo2910/research_ai"
7
  TOKENIZER_NAME = "wedo2910/research_ai_tok"
8
 
9
+ # Check if CUDA is available and choose an appropriate device mapping.
10
+ if torch.cuda.is_available():
11
+ device = "cuda"
12
+ # When using GPU, you might let the model auto-map to available GPUs.
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL_NAME,
15
+ trust_remote_code=True,
16
+ device_map="auto"
17
+ )
18
+ else:
19
+ device = "cpu"
20
+ # Force CPU loading; this bypasses GPU-specific integrations like bitsandbytes.
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ MODEL_NAME,
23
+ trust_remote_code=True,
24
+ device_map="cpu"
25
+ )
26
 
27
+ # Load the tokenizer.
28
+ tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, trust_remote_code=True)
 
29
 
30
+ # Optionally set the model to evaluation mode.
31
  model.eval()
32
 
33
  def single_inference(question: str, max_new_tokens: int, temperature: float) -> str:
 
36
 
37
  The prompt is constructed using a system instruction in Arabic, and the question is appended.
38
  """
39
+ # Define messages for a simulated chat conversation.
40
  messages = [
41
  {"role": "system", "content": "اجب علي الاتي بالعربي فقط."},
42
  {"role": "user", "content": question},
43
  ]
44
 
45
+ # If the tokenizer has an `apply_chat_template` method, use it; otherwise, build the prompt manually.
 
46
  if hasattr(tokenizer, "apply_chat_template"):
47
  input_ids = tokenizer.apply_chat_template(
48
  messages,
 
50
  return_tensors="pt"
51
  ).to(device)
52
  else:
 
53
  system_prompt = "اجب علي الاتي بالعربي فقط.\n"
54
  user_prompt = f"السؤال: {question}\n"
55
  full_prompt = system_prompt + user_prompt
56
  input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device)
57
 
 
 
 
 
58
  # Generate the output.
59
  outputs = model.generate(
60
  input_ids,
61
  max_new_tokens=max_new_tokens,
62
  do_sample=True,
63
  temperature=temperature,
64
+ # You can add more generation parameters if needed.
65
  )
66
 
67
+ # Remove the prompt part from the generated output.
68
  generated_ids = outputs[0][input_ids.shape[-1]:]
69
 
70
  # Decode the tokens into a string.