eneSadi commited on
Commit
ff9863c
1 Parent(s): 7dea212

cosmos loading

Browse files
Files changed (2) hide show
  1. app.py +32 -15
  2. app_cosmos.py → app_gemma.py +15 -32
app.py CHANGED
@@ -1,26 +1,26 @@
1
  from fastapi import FastAPI, Request
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
- from huggingface_hub import login
5
- import os
6
 
7
- print("Google Gemma 2 Chatbot is starting...")
8
 
9
- # read access token from environment variable
10
- access_token = os.getenv('HF_TOKEN')
11
- login(access_token)
12
-
13
- model_id = "google/gemma-2-9b-it"
14
 
15
  print("Model loading started")
16
  tokenizer = AutoTokenizer.from_pretrained(model_id)
17
  model = AutoModelForCausalLM.from_pretrained(
18
  model_id,
19
- device_map="auto",
20
  torch_dtype=torch.bfloat16,
 
21
  )
22
  print("Model loading completed")
23
 
 
 
 
 
 
 
24
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
  print("Selected device:", device)
26
 
@@ -40,18 +40,35 @@ async def ask(request: Request):
40
  return {"error": "Prompt is missing"}
41
 
42
  print("Device of the model:", model.device)
43
- messages = [
44
- {"role": "user", "content": f"{prompt}"},
45
- ]
46
  print("Messages:", messages)
47
  print("Tokenizer process started")
48
- input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
 
 
 
 
 
 
 
 
 
49
  print("Tokenizer process completed")
50
 
51
  print("Model process started")
52
- outputs = model.generate(**input_ids, max_new_tokens=256)
 
 
 
 
 
 
 
 
53
 
54
  print("Tokenizer decode process started")
55
- answer = tokenizer.decode(outputs[0]).split("<end_of_turn>")[1].strip()
56
 
57
  return {"answer": answer}
 
1
  from fastapi import FastAPI, Request
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
 
4
 
5
+ print("COSMOS Llama Chatbot is starting...")
6
 
7
+ model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
 
 
 
 
8
 
9
  print("Model loading started")
10
  tokenizer = AutoTokenizer.from_pretrained(model_id)
11
  model = AutoModelForCausalLM.from_pretrained(
12
  model_id,
 
13
  torch_dtype=torch.bfloat16,
14
+ device_map="auto",
15
  )
16
  print("Model loading completed")
17
 
18
+ # bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
19
+ initial_message = [
20
+ {"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
21
+ # Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
22
+ ]
23
+
24
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
  print("Selected device:", device)
26
 
 
40
  return {"error": "Prompt is missing"}
41
 
42
  print("Device of the model:", model.device)
43
+ messages = initial_message.copy()
44
+ messages.append({"role": "user", "content": f"{prompt}"})
45
+
46
  print("Messages:", messages)
47
  print("Tokenizer process started")
48
+ input_ids = tokenizer.apply_chat_template(
49
+ messages,
50
+ add_generation_prompt=True,
51
+ return_tensors="pt"
52
+ ).to(model.device)
53
+
54
+ terminators = [
55
+ tokenizer.eos_token_id,
56
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
57
+ ]
58
  print("Tokenizer process completed")
59
 
60
  print("Model process started")
61
+ outputs = model.generate(
62
+ input_ids,
63
+ max_new_tokens=256,
64
+ eos_token_id=terminators,
65
+ do_sample=True,
66
+ temperature=0.6,
67
+ top_p=0.9,
68
+ )
69
+ response = outputs[0][input_ids.shape[-1]:]
70
 
71
  print("Tokenizer decode process started")
72
+ answer = tokenizer.decode(response, skip_special_tokens=True)
73
 
74
  return {"answer": answer}
app_cosmos.py → app_gemma.py RENAMED
@@ -1,26 +1,26 @@
1
  from fastapi import FastAPI, Request
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
 
4
 
5
- print("COSMOS Llama Chatbot is starting...")
6
 
7
- model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
 
 
 
 
8
 
9
  print("Model loading started")
10
  tokenizer = AutoTokenizer.from_pretrained(model_id)
11
  model = AutoModelForCausalLM.from_pretrained(
12
  model_id,
13
- torch_dtype=torch.bfloat16,
14
  device_map="auto",
 
15
  )
16
  print("Model loading completed")
17
 
18
- # bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
19
- initial_message = [
20
- {"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
21
- # Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
22
- ]
23
-
24
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
  print("Selected device:", device)
26
 
@@ -40,35 +40,18 @@ async def ask(request: Request):
40
  return {"error": "Prompt is missing"}
41
 
42
  print("Device of the model:", model.device)
43
- messages = initial_message.copy()
44
- messages.append({"role": "user", "content": f"{prompt}"})
45
-
46
  print("Messages:", messages)
47
  print("Tokenizer process started")
48
- input_ids = tokenizer.apply_chat_template(
49
- messages,
50
- add_generation_prompt=True,
51
- return_tensors="pt"
52
- ).to(model.device)
53
-
54
- terminators = [
55
- tokenizer.eos_token_id,
56
- tokenizer.convert_tokens_to_ids("<|eot_id|>")
57
- ]
58
  print("Tokenizer process completed")
59
 
60
  print("Model process started")
61
- outputs = model.generate(
62
- input_ids,
63
- max_new_tokens=256,
64
- eos_token_id=terminators,
65
- do_sample=True,
66
- temperature=0.6,
67
- top_p=0.9,
68
- )
69
- response = outputs[0][input_ids.shape[-1]:]
70
 
71
  print("Tokenizer decode process started")
72
- answer = tokenizer.decode(response, skip_special_tokens=True)
73
 
74
  return {"answer": answer}
 
1
  from fastapi import FastAPI, Request
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
+ from huggingface_hub import login
5
+ import os
6
 
7
+ print("Google Gemma 2 Chatbot is starting...")
8
 
9
+ # read access token from environment variable
10
+ access_token = os.getenv('HF_TOKEN')
11
+ login(access_token)
12
+
13
+ model_id = "google/gemma-2-9b-it"
14
 
15
  print("Model loading started")
16
  tokenizer = AutoTokenizer.from_pretrained(model_id)
17
  model = AutoModelForCausalLM.from_pretrained(
18
  model_id,
 
19
  device_map="auto",
20
+ torch_dtype=torch.bfloat16,
21
  )
22
  print("Model loading completed")
23
 
 
 
 
 
 
 
24
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
  print("Selected device:", device)
26
 
 
40
  return {"error": "Prompt is missing"}
41
 
42
  print("Device of the model:", model.device)
43
+ messages = [
44
+ {"role": "user", "content": f"{prompt}"},
45
+ ]
46
  print("Messages:", messages)
47
  print("Tokenizer process started")
48
+ input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
 
 
 
 
 
 
 
 
 
49
  print("Tokenizer process completed")
50
 
51
  print("Model process started")
52
+ outputs = model.generate(**input_ids, max_new_tokens=256)
 
 
 
 
 
 
 
 
53
 
54
  print("Tokenizer decode process started")
55
+ answer = tokenizer.decode(outputs[0]).split("<end_of_turn>")[1].strip()
56
 
57
  return {"answer": answer}